From 5d37e6bb5c57e501cc5b831d192a36ec4aa6459d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:12:36 +0100 Subject: [PATCH 01/33] accessor for smart-pointer version of FileVal's value --- src/Val.cc | 2 ++ src/Val.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/Val.cc b/src/Val.cc index bf86360b70..e3574116cc 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -1277,6 +1277,8 @@ FileVal::FileVal(FilePtr f) : Val(make_intrusive(base_type(TYPE_STRING assert(file_val->GetType()->Tag() == TYPE_STRING); } +FilePtr FileVal::AsFilePtr() const { return file_val; } + ValPtr FileVal::SizeVal() const { return make_intrusive(file_val->Size()); } void FileVal::ValDescribe(ODesc* d) const { file_val->Describe(d); } diff --git a/src/Val.h b/src/Val.h index a74f68f8d6..1267f602ec 100644 --- a/src/Val.h +++ b/src/Val.h @@ -609,6 +609,8 @@ class FileVal final : public Val { public: explicit FileVal(FilePtr f); + FilePtr AsFilePtr() const; + ValPtr SizeVal() const override; File* Get() const { return file_val.get(); } From 3962810e4b77670920835ded770f0feafcc04ba3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:14:55 +0100 Subject: [PATCH 02/33] ListVal method to clear the list to allow reusing w/o new construction --- src/Type.h | 3 +++ src/Val.h | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/src/Type.h b/src/Type.h index 7261a445f4..da455ee6c3 100644 --- a/src/Type.h +++ b/src/Type.h @@ -341,6 +341,9 @@ public: void Append(TypePtr t); void AppendEvenIfNotPure(TypePtr t); + // Resets the list to be empty. + void Clear() { types.clear(); } + detail::TraversalCode Traverse(detail::TraversalCallback* cb) const override; protected: diff --git a/src/Val.h b/src/Val.h index 1267f602ec..f4e177d602 100644 --- a/src/Val.h +++ b/src/Val.h @@ -684,6 +684,15 @@ public: */ void Append(ValPtr v); + /** + * Empties the list. + * @param v the value to append. + */ + void Clear() { + vals.clear(); + type->AsTypeList()->Clear(); + } + // Returns a Set representation of the list (which must be homogeneous). TableValPtr ToSetVal() const; From d2c6208421721a88bf8355bcf666375b684cf5a6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:17:13 +0100 Subject: [PATCH 03/33] allow profiling without updating of hash values --- src/script_opt/ProfileFunc.cc | 20 +++++++++++++++----- src/script_opt/ProfileFunc.h | 19 ++++++++++++------- src/script_opt/ScriptOpt.cc | 10 +++------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index a62e436774..53f71be393 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -546,7 +546,9 @@ void ProfileFunc::CheckRecordConstructor(TypePtr t) { } } -ProfileFuncs::ProfileFuncs(std::vector& funcs, is_compilable_pred pred, bool _full_record_hashes) { +ProfileFuncs::ProfileFuncs(std::vector& funcs, is_compilable_pred pred, bool _compute_func_hashes, + bool _full_record_hashes) { + compute_func_hashes = _compute_func_hashes; full_record_hashes = _full_record_hashes; for ( auto& f : funcs ) { @@ -558,6 +560,11 @@ ProfileFuncs::ProfileFuncs(std::vector& funcs, is_compilable_pred pred // Track the profile even if we're not compiling the function, since // the AST optimizer will still need it to reason about function-call // side effects. + + // Propagate previous hash if requested. + if ( ! compute_func_hashes && f.Profile() ) + pf->SetHashVal(f.Profile()->HashVal()); + f.SetProfile(std::move(pf)); func_profs[f.Func()] = f.ProfilePtr(); } @@ -805,15 +812,18 @@ void ProfileFuncs::ComputeTypeHashes(const std::vector& types) { } void ProfileFuncs::ComputeBodyHashes(std::vector& funcs) { - for ( auto& f : funcs ) - if ( ! f.ShouldSkip() ) - ComputeProfileHash(f.ProfilePtr()); + if ( compute_func_hashes ) + for ( auto& f : funcs ) + if ( ! f.ShouldSkip() ) + ComputeProfileHash(f.ProfilePtr()); for ( auto& l : lambdas ) { auto pf = ExprProf(l); func_profs[l->PrimaryFunc().get()] = pf; lambda_primaries[l->Name()] = l->PrimaryFunc().get(); - ComputeProfileHash(pf); + + if ( compute_func_hashes ) + ComputeProfileHash(pf); } } diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index e75688adb6..2ce4dfe562 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -347,13 +347,15 @@ using is_compilable_pred = bool (*)(const ProfileFunc*, const char** reason); // Collectively profile an entire collection of functions. class ProfileFuncs { public: - // Updates entries in "funcs" to include profiles. If pred is - // non-nil, then it is called for each profile to see whether it's - // compilable, and, if not, the FuncInfo is marked as ShouldSkip(). - // "full_record_hashes" controls whether the hashes for extended - // records covers their final, full form, or should only their - // original fields. - ProfileFuncs(std::vector& funcs, is_compilable_pred pred, bool full_record_hashes); + // Updates entries in "funcs" to include profiles. If pred is non-nil, + // then it is called for each profile to see whether it's compilable, + // and, if not, the FuncInfo is marked as ShouldSkip(). + // "compute_func_hashes" governs whether we compute hashes for the + // FuncInfo entries, or keep their existing ones. "full_record_hashes" + // controls whether the hashes for extended records covers their final, + // full form, or should only their original fields. + ProfileFuncs(std::vector& funcs, is_compilable_pred pred, bool compute_func_hashes, + bool full_record_hashes); // The following accessors provide a global profile across all of // the (non-skipped) functions in "funcs". See the comments for @@ -604,6 +606,9 @@ protected: // These can arise for example due to lambdas or record attributes. std::vector pending_exprs; + // Whether to compute new hashes for the FuncInfo entries. + bool compute_func_hashes; + // Whether the hashes for extended records should cover their final, // full form, or only their original fields. bool full_record_hashes; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index c41e6cf429..99fd8d76df 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -391,7 +391,7 @@ static void use_CPP() { int num_used = 0; - auto pfs = std::make_unique(funcs, is_CPP_compilable, false); + auto pfs = std::make_unique(funcs, is_CPP_compilable, true, false); for ( auto& f : funcs ) { auto hash = f.Profile()->HashVal(); @@ -435,18 +435,16 @@ static void use_CPP() { reporter->FatalError("no C++ functions found to use"); } -static void generate_CPP() { +static void generate_CPP(std::shared_ptr pfs) { const auto gen_name = CPP_dir + "CPP-gen.cc"; const bool standalone = analysis_options.gen_standalone_CPP; const bool report = analysis_options.report_uncompilable; - auto pfs = std::make_shared(funcs, is_CPP_compilable, false); - CPPCompile cpp(funcs, pfs, gen_name, standalone, report); } -static void analyze_scripts_for_ZAM() { +static void analyze_scripts_for_ZAM(std::shared_ptr pfs) { if ( analysis_options.usage_issues > 0 && analysis_options.optimize_AST ) { fprintf(stderr, "warning: \"-O optimize-AST\" option is incompatible with -u option, " @@ -454,8 +452,6 @@ static void analyze_scripts_for_ZAM() { analysis_options.optimize_AST = false; } - auto pfs = std::make_shared(funcs, nullptr, true); - if ( analysis_options.profile_ZAM ) { #ifdef ENABLE_ZAM_PROFILE AST_blocks = std::make_unique(funcs); From 857df9f06399e5a7a3d291e65c09087d98ed3407 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:17:46 +0100 Subject: [PATCH 04/33] support for more in-depth AST profiling --- src/script_opt/ProfileFunc.cc | 24 ++++++++++++++++++++++++ src/script_opt/ScriptOpt.cc | 14 ++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 53f71be393..84f8cb16f7 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -487,10 +487,29 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { TraversalCode ProfileFunc::PreID(const ID* id) { TrackID(id); + if ( id->IsGlobal() ) { + globals.insert(id); + all_globals.insert(id); + + const auto& t = id->GetType(); + TrackType(t); + + if ( t->Tag() == TYPE_FUNC ) + if ( t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT ) + events.insert(id->Name()); + } + // There's no need for any further analysis of this ID. return TC_ABORTSTMT; } +TraversalCode ProfileFunc::PreType(const Type* t) { + TrackType(t); + + // There's no need for any further analysis of this type. + return TC_ABORTSTMT; +} + void ProfileFunc::TrackType(const Type* t) { if ( ! t ) return; @@ -514,6 +533,11 @@ void ProfileFunc::TrackID(const ID* id) { // Already tracked. return; + if ( id->IsGlobal() ) { + globals.insert(id); + all_globals.insert(id); + } + ordered_ids.push_back(id); } diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 99fd8d76df..73399e6dc1 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -607,17 +607,23 @@ void analyze_scripts(bool no_unused_warnings) { if ( analysis_options.use_CPP ) use_CPP(); + std::shared_ptr pfs; + // Note, in the following it's not clear whether the final argument + // for absolute/relative record fields matters any more ... + if ( generating_CPP ) + pfs = std::make_shared(funcs, is_CPP_compilable, true, false); + else + pfs = std::make_shared(funcs, nullptr, true, true); + if ( generating_CPP ) { if ( analysis_options.gen_ZAM ) reporter->FatalError("-O ZAM and -O gen-C++ conflict"); - generate_CPP(); + generate_CPP(pfs); exit(0); } - // At this point we're done with C++ considerations, so instead - // are compiling to ZAM. - analyze_scripts_for_ZAM(); + analyze_scripts_for_ZAM(pfs); if ( reporter->Errors() > 0 ) reporter->FatalError("Optimized script execution aborted due to errors"); From e6fe20f140bc3c0db922f50d5d53e044da275044 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:18:33 +0100 Subject: [PATCH 05/33] fix for -O report-C++ --- src/script_opt/ScriptOpt.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 73399e6dc1..4a3e712399 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -600,6 +600,7 @@ void analyze_scripts(bool no_unused_warnings) { } if ( analysis_options.report_CPP ) { + auto pfs = std::make_unique(funcs, is_CPP_compilable, true, false); report_CPP(); exit(0); } From cfb068a922ded8e1f8629111026929feb33c6282 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:19:50 +0100 Subject: [PATCH 06/33] fix to correctly track whether a capture needs deep-copying --- src/Type.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Type.cc b/src/Type.cc index 5341476a3f..c00410c0ad 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -603,6 +603,9 @@ SetType::~SetType() = default; #pragma GCC diagnostic ignored "-Wdeprecated-declarations" FuncType::Capture::Capture(detail::IDPtr _id, bool _deep_copy) : id(std::move(_id)), deep_copy(_deep_copy) { is_managed = id ? ZVal::IsManagedType(id->GetType()) : false; + if ( ! is_managed ) + // For non-managed types, deep copying isn't applicable. + deep_copy = false; } #pragma GCC diagnostic pop From e93db75f78dd7ce13e52d2d136b35bb1fd061e9f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:21:13 +0100 Subject: [PATCH 07/33] fixes for script optimization of coerce-to-any expressions --- src/Expr.h | 3 +++ src/Stmt.cc | 3 ++- src/script_opt/Expr.cc | 17 +++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/Expr.h b/src/Expr.h index 73929a2114..ffead932af 100644 --- a/src/Expr.h +++ b/src/Expr.h @@ -1642,6 +1642,9 @@ class CoerceToAnyExpr : public UnaryExpr { public: CoerceToAnyExpr(ExprPtr op); + bool IsReduced(Reducer* c) const override; + ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; + protected: ValPtr Fold(Val* v) const override; diff --git a/src/Stmt.cc b/src/Stmt.cc index a1edb51ee0..01e892ead0 100644 --- a/src/Stmt.cc +++ b/src/Stmt.cc @@ -1849,7 +1849,8 @@ void WhenInfo::Build(StmtPtr ws) { auto else_branch = timeout_s ? timeout_s : empty; auto do_bodies = make_intrusive(two_test, s, else_branch); - auto dummy_return = make_intrusive(true_const); + auto any_true_const = make_intrusive(true_const); + auto dummy_return = make_intrusive(any_true_const); auto shebang = make_intrusive(do_test, do_bodies, dummy_return); diff --git a/src/script_opt/Expr.cc b/src/script_opt/Expr.cc index 353604f1a7..5e6500ae0d 100644 --- a/src/script_opt/Expr.cc +++ b/src/script_opt/Expr.cc @@ -3109,6 +3109,23 @@ CoerceToAnyExpr::CoerceToAnyExpr(ExprPtr arg_op) : UnaryExpr(EXPR_TO_ANY_COERCE, type = base_type(TYPE_ANY); } +bool CoerceToAnyExpr::IsReduced(Reducer* c) const { return HasReducedOps(c); } + +ExprPtr CoerceToAnyExpr::Reduce(Reducer* c, StmtPtr& red_stmt) { + if ( c->Optimizing() ) + op = c->UpdateExpr(op); + + red_stmt = nullptr; + + if ( ! op->IsSingleton(c) ) + op = op->ReduceToSingleton(c, red_stmt); + + if ( c->Optimizing() ) + return ThisPtr(); + else + return AssignToTemporary(c, red_stmt); +} + ValPtr CoerceToAnyExpr::Fold(Val* v) const { return {NewRef{}, v}; } ExprPtr CoerceToAnyExpr::Duplicate() { return SetSucc(new CoerceToAnyExpr(op->Duplicate())); } From b333d24e0e73b0d41d879edc8aa8d7c67d6ece96 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:24:08 +0100 Subject: [PATCH 08/33] allow C++ script compiler access to type internals --- src/Type.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Type.h b/src/Type.h index da455ee6c3..12b0e68020 100644 --- a/src/Type.h +++ b/src/Type.h @@ -35,6 +35,7 @@ class CompositeHash; class Expr; class ListExpr; class ZAMCompiler; +class CPPRuntime; using ExprPtr = IntrusivePtr; using ListExprPtr = IntrusivePtr; @@ -757,6 +758,7 @@ private: class CreationInitsOptimizer; friend zeek::RecordVal; friend zeek::detail::ZAMCompiler; + friend zeek::detail::CPPRuntime; const auto& DeferredInits() const { return deferred_inits; } const auto& CreationInits() const { return creation_inits; } From 85a8f0739c6aa948723786e86c9e0df5fa441039 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:24:47 +0100 Subject: [PATCH 09/33] run-time warnings for scripts compiled to C++ --- src/Reporter.cc | 8 ++++++++ src/Reporter.h | 3 +++ 2 files changed, 11 insertions(+) diff --git a/src/Reporter.cc b/src/Reporter.cc index 47950735c4..81da38db7b 100644 --- a/src/Reporter.cc +++ b/src/Reporter.cc @@ -203,6 +203,14 @@ void Reporter::CPPRuntimeError(const char* fmt, ...) { throw InterpreterException(); } +void Reporter::CPPRuntimeWarning(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + FILE* out = EmitToStderr(warnings_to_stderr) ? stderr : nullptr; + DoLog("runtime warning in compiled code", reporter_error, out, nullptr, nullptr, true, true, "", fmt, ap); + va_end(ap); +} + void Reporter::InternalError(const char* fmt, ...) { va_list ap; va_start(ap, fmt); diff --git a/src/Reporter.h b/src/Reporter.h index 2107f137ff..d67a68c5a8 100644 --- a/src/Reporter.h +++ b/src/Reporter.h @@ -115,6 +115,9 @@ public: // function will not return but raise an InterpreterException. [[noreturn]] void CPPRuntimeError(const char* fmt, ...) __attribute__((format(printf, 2, 3))); + // Similar, but for warnings. This function does return. + void CPPRuntimeWarning(const char* fmt, ...) __attribute__((format(printf, 2, 3))); + // Report a traffic weirdness, i.e., an unexpected protocol situation // that may lead to incorrectly processing a connection. void Weird(const char* name, const char* addl = "", From 37fcb231fafff335802ca2cfa3af02ba948d0ed4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:25:53 +0100 Subject: [PATCH 10/33] support for traversing ZAM code similar to AST traversal --- src/script_opt/ZAM/ZBody.cc | 12 +++++- src/script_opt/ZAM/ZInst.cc | 79 +++++++++++++++++++++++++++++++++++++ src/script_opt/ZAM/ZInst.h | 5 +++ 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/src/script_opt/ZAM/ZBody.cc b/src/script_opt/ZAM/ZBody.cc index 0eb283284a..80a1039c5d 100644 --- a/src/script_opt/ZAM/ZBody.cc +++ b/src/script_opt/ZAM/ZBody.cc @@ -591,12 +591,22 @@ TraversalCode ZBody::Traverse(TraversalCallback* cb) const { TraversalCode tc = cb->PreStmt(this); HANDLE_TC_STMT_PRE(tc); + for ( auto& gi : globals ) { + tc = gi.id->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + for ( size_t i = 0; i < NumInsts(); ++i ) { + tc = insts[i].Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + tc = cb->PostStmt(this); HANDLE_TC_STMT_POST(tc); } // Unary vector operation of v1 v2. -static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1, const VectorVal* v2, const ZInst& z) { +static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1, const VectorVal* v2, const ZInst& /* z */) { // We could speed this up further still by gen'ing up an instance // of the loop inside each switch case (in which case we might as // well move the whole kit-and-caboodle into the Exec method). But diff --git a/src/script_opt/ZAM/ZInst.cc b/src/script_opt/ZAM/ZInst.cc index c619e25984..f3a24812b9 100644 --- a/src/script_opt/ZAM/ZInst.cc +++ b/src/script_opt/ZAM/ZInst.cc @@ -5,6 +5,7 @@ #include "zeek/Desc.h" #include "zeek/Func.h" #include "zeek/Reporter.h" +#include "zeek/module_util.h" using std::string; @@ -298,6 +299,84 @@ string ZInst::ConstDump() const { return d.Description(); } +TraversalCode ZInst::Traverse(TraversalCallback* cb) const { + TraversalCode tc; + if ( t ) { + tc = t->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + if ( t2 ) { + tc = t2->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + } + + if ( aux ) { + tc = aux->Traverse(cb); + HANDLE_TC_STMT_POST(tc); + } + + return TC_CONTINUE; +} + +TraversalCode ZInstAux::Traverse(TraversalCallback* cb) const { + TraversalCode tc; + + if ( id_val ) { + tc = id_val->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + // Don't traverse the "func" field, as if it's a recursive function + // we can wind up right back here. + + if ( lambda ) { + tc = lambda->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + if ( event_handler ) { + auto g = lookup_ID(event_handler->Name(), GLOBAL_MODULE_NAME, false, false, false); + ASSERT(g); + tc = g->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + if ( attrs ) { + tc = attrs->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + if ( value_var_type ) { + tc = value_var_type->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + for ( auto& lvt : loop_var_types ) { + tc = lvt->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + if ( elems ) { + for ( int i = 0; i < n; ++i ) { + auto& e_i = elems[i]; + + auto& c = e_i.Constant(); + if ( c ) { + tc = c->GetType()->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + + auto& t = e_i.GetType(); + if ( t ) { + tc = t->Traverse(cb); + HANDLE_TC_STMT_PRE(tc); + } + } + } + + return TC_CONTINUE; +} + void ZInstI::Dump(FILE* f, const FrameMap* frame_ids, const FrameReMap* remappings) const { int n = NumFrameSlots(); // fprintf(f, "v%d ", n); diff --git a/src/script_opt/ZAM/ZInst.h b/src/script_opt/ZAM/ZInst.h index a0d56e9c73..35ed4fc233 100644 --- a/src/script_opt/ZAM/ZInst.h +++ b/src/script_opt/ZAM/ZInst.h @@ -6,6 +6,7 @@ #include "zeek/Desc.h" #include "zeek/Func.h" +#include "zeek/TraverseTypes.h" #include "zeek/script_opt/ZAM/BuiltInSupport.h" #include "zeek/script_opt/ZAM/Support.h" #include "zeek/script_opt/ZAM/ZOp.h" @@ -109,6 +110,8 @@ public: // Returns a string describing the constant. std::string ConstDump() const; + TraversalCode Traverse(TraversalCallback* cb) const; + ZOp op = OP_NOP; ZAMOpType op_type = OP_X; @@ -440,6 +443,8 @@ public: // Same but for constants. void Add(int i, ValPtr c) { elems[i].SetConstant(c); } + TraversalCode Traverse(TraversalCallback* cb) const; + // Member variables. We could add accessors for manipulating // these (and make the variables private), but for convenience we // make them directly available. From 11e9135f800afc500fbea21003011b95a446784c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:26:35 +0100 Subject: [PATCH 11/33] fix for avoiding inadvertent interpreter errors in CallExpr::IsPure() --- src/Expr.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Expr.cc b/src/Expr.cc index abbb07a3d4..d87526b654 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -4070,11 +4070,15 @@ bool CallExpr::IsPure() const { if ( IsError() ) return true; - if ( ! func->IsPure() ) + if ( func->Tag() != EXPR_NAME ) + // Indirect call, can't resolve up front. return false; - auto func_val = func->Eval(nullptr); + auto func_id = func->AsNameExpr()->Id(); + if ( ! func_id->IsGlobal() ) + return false; + auto func_val = func_id->GetVal(); if ( ! func_val ) return false; From 1253af42f9d57c6ff379d33fd042ba99d496af2f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:27:33 +0100 Subject: [PATCH 12/33] mark functions skipped by ZAM compilation as such --- src/script_opt/ScriptOpt.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 4a3e712399..47f9fddf24 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -507,12 +507,12 @@ static void analyze_scripts_for_ZAM(std::shared_ptr pfs) { if ( ! analysis_options.compile_all && ! is_lambda && inl && inl->WasFullyInlined(func.get()) && func_used_indirectly.count(func.get()) == 0 ) { - // No need to compile as it won't be called directly. - // We'd like to zero out the body to recover the - // memory, but a *few* such functions do get called, - // such as by the event engine reaching up, or - // BiFs looking for them, so we can't safely zero - // them. + // No need to compile as it won't be called directly. We'd + // like to zero out the body to recover the memory, but a *few* + // such functions do get called, such as by the event engine + // reaching up, or BiFs looking for them, so we can't safely + // zero them. + f.SetSkip(true); continue; } From 88740acffe39e0ef9d97f45328b169ece01030a5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:28:51 +0100 Subject: [PATCH 13/33] fix for nit in base/protocols/krb/main.zeek --- scripts/base/protocols/krb/main.zeek | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/base/protocols/krb/main.zeek b/scripts/base/protocols/krb/main.zeek index ef0f3e7b2f..0b3066f89a 100644 --- a/scripts/base/protocols/krb/main.zeek +++ b/scripts/base/protocols/krb/main.zeek @@ -190,8 +190,7 @@ event krb_as_response(c: connection, msg: KDC_Response) &priority=-5 event krb_ap_request(c: connection, ticket: KRB::Ticket, opts: KRB::AP_Options) &priority=5 { - if ( set_session(c) ) - return; + set_session(c); } event krb_tgs_request(c: connection, msg: KDC_Request) &priority=5 From 3cf3cc3c5e7e59ba0b1b8ac2ba5cd4c37d7544a7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:29:36 +0100 Subject: [PATCH 14/33] ZVal constructor for booleans --- src/ZVal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ZVal.h b/src/ZVal.h index ccba8b4174..d7695140c0 100644 --- a/src/ZVal.h +++ b/src/ZVal.h @@ -62,6 +62,7 @@ union ZVal { ZVal(const TypePtr& t); // Construct directly. + ZVal(bool v) { int_val = v; } ZVal(zeek_int_t v) { int_val = v; } ZVal(zeek_uint_t v) { uint_val = v; } ZVal(double v) { double_val = v; } From 1d7e71b4994d8df7a75c618a30ed9a04e58e3449 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 09:30:03 +0100 Subject: [PATCH 15/33] type-checking fix for vector-of-string operations --- src/Expr.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Expr.cc b/src/Expr.cc index d87526b654..9e77d3ec19 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -1992,8 +1992,10 @@ EqExpr::EqExpr(ExprTag arg_tag, ExprPtr arg_op1, ExprPtr arg_op2) } } - else if ( bt1 == TYPE_PATTERN && bt2 == TYPE_STRING ) - ; + else if ( (bt1 == TYPE_PATTERN && bt2 == TYPE_STRING) || (bt1 == TYPE_STRING && bt2 == TYPE_PATTERN) ) { + if ( op1->GetType()->Tag() == TYPE_VECTOR ) + ExprError("cannot compare string vectors with pattern vectors"); + } else ExprError("type clash in comparison"); From 5fc2c601b4aac157479d378457c4535697c084b3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 10:13:35 +0100 Subject: [PATCH 16/33] updates to ZAM operations / gen-zam regularization, other than the operations themselves --- src/Func.h | 6 +- src/script_opt/ZAM/AM-Opt.cc | 67 ++++++--- src/script_opt/ZAM/Branches.cc | 4 +- src/script_opt/ZAM/BuiltIn.cc | 87 ++++++----- src/script_opt/ZAM/BuiltInSupport.cc | 7 +- src/script_opt/ZAM/BuiltInSupport.h | 24 ++-- src/script_opt/ZAM/Compile.h | 4 +- src/script_opt/ZAM/Driver.cc | 3 + src/script_opt/ZAM/Expr.cc | 103 ++++++------- src/script_opt/ZAM/IterInfo.h | 74 +++++++--- src/script_opt/ZAM/Low-Level.cc | 10 +- src/script_opt/ZAM/Stmt.cc | 207 +++++++++++++++------------ src/script_opt/ZAM/Vars.cc | 11 +- src/script_opt/ZAM/ZInst.cc | 37 ++--- src/script_opt/ZAM/ZInst.h | 58 +++++--- src/script_opt/ZAM/ZOp.cc | 2 +- src/script_opt/ZAM/ZOp.h | 12 ++ 17 files changed, 422 insertions(+), 294 deletions(-) diff --git a/src/Func.h b/src/Func.h index 8b31976e69..f6808484d5 100644 --- a/src/Func.h +++ b/src/Func.h @@ -291,6 +291,9 @@ protected: */ virtual void SetCaptures(Frame* f); + // Captures when using ZVal block instead of a Frame. + std::unique_ptr> captures_vec; + private: size_t frame_size = 0; @@ -304,9 +307,6 @@ private: OffsetMap* captures_offset_mapping = nullptr; - // Captures when using ZVal block instead of a Frame. - std::unique_ptr> captures_vec; - // The most recently added/updated body ... StmtPtr current_body; diff --git a/src/script_opt/ZAM/AM-Opt.cc b/src/script_opt/ZAM/AM-Opt.cc index 852c483c9b..68f8117f1d 100644 --- a/src/script_opt/ZAM/AM-Opt.cc +++ b/src/script_opt/ZAM/AM-Opt.cc @@ -303,7 +303,7 @@ bool ZAMCompiler::PruneUnused() { if ( assignmentless_op.count(inst->op) == 0 ) reporter->InternalError("inconsistency in re-flavoring instruction with side effects"); - inst->op_type = assignmentless_op_type[inst->op]; + inst->op_type = assignmentless_op_class[inst->op]; inst->op = assignmentless_op[inst->op]; inst->v1 = inst->v2; @@ -336,8 +336,8 @@ void ZAMCompiler::ComputeFrameLifetimes() { // Some special-casing. switch ( inst->op ) { - case OP_NEXT_TABLE_ITER_VV: - case OP_NEXT_TABLE_ITER_VAL_VAR_VVV: { + case OP_NEXT_TABLE_ITER_fb: + case OP_NEXT_TABLE_ITER_VAL_VAR_Vfb: { // These assign to an arbitrary long list of variables. auto& iter_vars = inst->aux->loop_vars; auto depth = inst->loop_depth; @@ -361,21 +361,21 @@ void ZAMCompiler::ComputeFrameLifetimes() { } // No need to check the additional "var" associated - // with OP_NEXT_TABLE_ITER_VAL_VAR_VVV as that's + // with OP_NEXT_TABLE_ITER_VAL_VAR_Vfb as that's // a slot-1 assignment. However, similar to other // loop variables, mark this as a usage. - if ( inst->op == OP_NEXT_TABLE_ITER_VAL_VAR_VVV ) + if ( inst->op == OP_NEXT_TABLE_ITER_VAL_VAR_Vfb ) ExtendLifetime(inst->v1, EndOfLoop(inst, depth)); } break; - case OP_NEXT_TABLE_ITER_NO_VARS_VV: break; + case OP_NEXT_TABLE_ITER_NO_VARS_fb: break; - case OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_VVV: { + case OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_Vfb: { auto depth = inst->loop_depth; ExtendLifetime(inst->v1, EndOfLoop(inst, depth)); } break; - case OP_NEXT_VECTOR_ITER_VAL_VAR_VVVV: { + case OP_NEXT_VECTOR_ITER_VAL_VAR_VVsb: { CheckSlotAssignment(inst->v2, inst); auto depth = inst->loop_depth; @@ -383,13 +383,13 @@ void ZAMCompiler::ComputeFrameLifetimes() { ExtendLifetime(inst->v2, EndOfLoop(inst, depth)); } break; - case OP_NEXT_VECTOR_BLANK_ITER_VAL_VAR_VVV: { + case OP_NEXT_VECTOR_BLANK_ITER_VAL_VAR_Vsb: { auto depth = inst->loop_depth; ExtendLifetime(inst->v1, EndOfLoop(inst, depth)); } break; - case OP_NEXT_VECTOR_ITER_VVV: - case OP_NEXT_STRING_ITER_VVV: + case OP_NEXT_VECTOR_ITER_Vsb: + case OP_NEXT_STRING_ITER_Vsb: // Sometimes loops are written that don't actually // use the iteration variable. However, we still // need to mark the variable as having usage @@ -401,12 +401,12 @@ void ZAMCompiler::ComputeFrameLifetimes() { ExtendLifetime(inst->v1, EndOfLoop(inst, inst->loop_depth)); break; - case OP_NEXT_VECTOR_BLANK_ITER_VV: - case OP_NEXT_STRING_BLANK_ITER_VV: break; + case OP_NEXT_VECTOR_BLANK_ITER_sb: + case OP_NEXT_STRING_BLANK_ITER_sb: break; - case OP_INIT_TABLE_LOOP_VV: - case OP_INIT_VECTOR_LOOP_VV: - case OP_INIT_STRING_LOOP_VV: { + case OP_INIT_TABLE_LOOP_Vf: + case OP_INIT_VECTOR_LOOP_Vs: + case OP_INIT_STRING_LOOP_Vs: { // For all of these, the scope of the aggregate being // looped over is the entire loop, even if it doesn't // directly appear in it, and not just the initializer. @@ -423,14 +423,30 @@ void ZAMCompiler::ComputeFrameLifetimes() { continue; } - case OP_STORE_GLOBAL_V: { + case OP_STORE_GLOBAL_g: { // Use of the global goes to here. auto slot = frame_layout1[globalsI[inst->v1].id.get()]; ExtendLifetime(slot, EndOfLoop(inst, 1)); break; } - case OP_LAMBDA_VV: { + case OP_DETERMINE_TYPE_MATCH_VV: { + auto aux = inst->aux; + int n = aux->n; + for ( int i = 0; i < n; ++i ) { + auto slot_i = aux->elems[i].Slot(); + if ( slot_i >= 0 ) { + CheckSlotAssignment(slot_i, inst); + // The variable gets used in the switch that + // immediately follows this instruction, hence + // "i + 1" in the following. + ExtendLifetime(slot_i, insts1[i + 1]); + } + } + break; + } + + case OP_LAMBDA_Vi: { auto aux = inst->aux; int n = aux->n; for ( int i = 0; i < n; ++i ) { @@ -486,8 +502,7 @@ void ZAMCompiler::ReMapFrame() { auto vars = inst_beginnings[inst]; for ( auto v : vars ) { - // Don't remap variables whose values aren't actually - // used. + // Don't remap variables whose values aren't actually used. int slot = frame_layout1[v]; if ( denizen_ending.count(slot) > 0 ) ReMapVar(v, slot, i); @@ -549,9 +564,15 @@ void ZAMCompiler::ReMapFrame() { // Handle special cases. switch ( inst->op ) { - case OP_NEXT_TABLE_ITER_VV: - case OP_NEXT_TABLE_ITER_VAL_VAR_VVV: { - // Rewrite iteration variables. + case OP_INIT_TABLE_LOOP_Vf: + case OP_NEXT_TABLE_ITER_fb: + case OP_NEXT_TABLE_ITER_VAL_VAR_Vfb: { + // Rewrite iteration variables. Strictly speaking we only + // need to do this for the INIT, not the NEXT, since the + // latter currently doesn't access the variables directly but + // instead uses pointers set up by the INIT. We do both types + // here, though, to keep things consistent and to help avoid + // surprises if the implementation changes in the future. auto& iter_vars = inst->aux->loop_vars; for ( auto& v : iter_vars ) { if ( v < 0 ) diff --git a/src/script_opt/ZAM/Branches.cc b/src/script_opt/ZAM/Branches.cc index a06347022d..717a4a0afd 100644 --- a/src/script_opt/ZAM/Branches.cc +++ b/src/script_opt/ZAM/Branches.cc @@ -25,13 +25,13 @@ ZAMStmt ZAMCompiler::GenGoTo(GoToSet& v) { } ZAMStmt ZAMCompiler::GoToStub() { - ZInstI z(OP_GOTO_V, 0); + ZInstI z(OP_GOTO_b, 0); z.op_type = OP_V_I1; return AddInst(z); } ZAMStmt ZAMCompiler::GoTo(const InstLabel l) { - ZInstI inst(OP_GOTO_V, 0); + ZInstI inst(OP_GOTO_b, 0); inst.target = l; inst.target_slot = 1; inst.op_type = OP_V_I1; diff --git a/src/script_opt/ZAM/BuiltIn.cc b/src/script_opt/ZAM/BuiltIn.cc index 6a6016e813..bdc61b62d8 100644 --- a/src/script_opt/ZAM/BuiltIn.cc +++ b/src/script_opt/ZAM/BuiltIn.cc @@ -27,8 +27,10 @@ SimpleZBI::SimpleZBI(std::string name, ZOp _const_op, ZOp _op, bool _ret_val_mat bool SimpleZBI::Build(ZAMCompiler* zam, const NameExpr* n, const ExprPList& args) const { ZInstI z; if ( nargs == 0 ) { - if ( n ) + if ( n ) { z = ZInstI(op, zam->Frame1Slot(n, OP1_WRITE)); + z.is_managed = ZVal::IsManagedType(n->GetType()); + } else z = ZInstI(op); } @@ -59,12 +61,9 @@ bool SimpleZBI::Build(ZAMCompiler* zam, const NameExpr* n, const ExprPList& args z.c = ZVal(args[0]->AsConstExpr()->ValuePtr(), t); } - z.t = t; + z.SetType(t); } - if ( n ) - z.is_managed = ZVal::IsManagedType(n->GetType()); - zam->AddInst(z); return true; @@ -104,7 +103,7 @@ bool CondZBI::BuildCond(ZAMCompiler* zam, const ExprPList& args, int& branch_v) auto a0_slot = zam->FrameSlot(a0->AsNameExpr()); z = ZInstI(cond_op, a0_slot, 0); z.op_type = OP_VV_I2; - z.t = a0->GetType(); + z.SetType(a0->GetType()); branch_v = 2; } @@ -129,7 +128,7 @@ bool OptAssignZBI::Build(ZAMCompiler* zam, const NameExpr* n, const ExprPList& a ASSERT(nargs == 1); auto a0 = zam->FrameSlot(args[0]->AsNameExpr()); z = ZInstI(op2, a0); - z.t = args[0]->GetType(); + z.SetType(args[0]->GetType()); } zam->AddInst(z); @@ -145,7 +144,7 @@ bool CatZBI::Build(ZAMCompiler* zam, const NameExpr* n, const ExprPList& args) c if ( args.empty() ) { // Weird, but easy enough to support. z = ZInstI(OP_CAT1_VC, nslot); - z.t = n->GetType(); + z.SetType(n->GetType()); z.c = ZVal(val_mgr->EmptyString()); } @@ -168,18 +167,18 @@ bool CatZBI::Build(ZAMCompiler* zam, const NameExpr* n, const ExprPList& args) c else if ( a0->GetType()->Tag() != TYPE_STRING ) { if ( a0->Tag() == EXPR_NAME ) { z = zam->GenInst(OP_CAT1FULL_VV, n, a0->AsNameExpr()); - z.t = a0->GetType(); + z.SetType(a0->GetType()); } else { z = ZInstI(OP_CAT1_VC, nslot); - z.t = n->GetType(); + z.SetType(n->GetType()); z.c = ZVal(ZAM_val_cat(a0->AsConstExpr()->ValuePtr())); } } else if ( a0->Tag() == EXPR_CONST ) { z = zam->GenInst(OP_CAT1_VC, n, a0->AsConstExpr()); - z.t = n->GetType(); + z.SetType(n->GetType()); } else @@ -388,12 +387,12 @@ bool MultiZBI::Build(ZAMCompiler* zam, const NameExpr* n, const ExprPList& args) z.is_managed = ZVal::IsManagedType(n->GetType()); if ( ! consts.empty() ) { - z.t = consts[0]->GetType(); - z.c = ZVal(consts[0], z.t); + z.SetType(consts[0]->GetType()); + z.c = ZVal(consts[0], z.GetType()); } - if ( type_arg >= 0 && ! z.t ) - z.t = args[type_arg]->GetType(); + if ( type_arg >= 0 && ! z.GetType() ) + z.SetType(args[type_arg]->GetType()); zam->AddInst(z); @@ -434,14 +433,14 @@ SimpleZBI sta_ZBI{"subnet_to_addr", OP_SUBNET_TO_ADDR_VV, 1}; SimpleZBI ttd_ZBI{"time_to_double", OP_TIME_TO_DOUBLE_VV, 1}; SimpleZBI tl_ZBI{"to_lower", OP_TO_LOWER_VV, 1}; -CondZBI ce_ZBI{"connection_exists", OP_CONN_EXISTS_VV, OP_CONN_EXISTS_COND_VV, 1}; -CondZBI iip_ZBI{"is_icmp_port", OP_IS_ICMP_PORT_VV, OP_IS_ICMP_PORT_COND_VV, 1}; -CondZBI itp_ZBI{"is_tcp_port", OP_IS_TCP_PORT_VV, OP_IS_TCP_PORT_COND_VV, 1}; -CondZBI iup_ZBI{"is_udp_port", OP_IS_UDP_PORT_VV, OP_IS_UDP_PORT_COND_VV, 1}; -CondZBI iv4_ZBI{"is_v4_addr", OP_IS_V4_ADDR_VV, OP_IS_V4_ADDR_COND_VV, 1}; -CondZBI iv6_ZBI{"is_v6_addr", OP_IS_V6_ADDR_VV, OP_IS_V6_ADDR_COND_VV, 1}; -CondZBI rlt_ZBI{"reading_live_traffic", OP_READING_LIVE_TRAFFIC_V, OP_READING_LIVE_TRAFFIC_COND_V, 0}; -CondZBI rt_ZBI{"reading_traces", OP_READING_TRACES_V, OP_READING_TRACES_COND_V, 0}; +CondZBI ce_ZBI{"connection_exists", OP_CONN_EXISTS_VV, OP_CONN_EXISTS_COND_Vb, 1}; +CondZBI iip_ZBI{"is_icmp_port", OP_IS_ICMP_PORT_VV, OP_IS_ICMP_PORT_COND_Vb, 1}; +CondZBI itp_ZBI{"is_tcp_port", OP_IS_TCP_PORT_VV, OP_IS_TCP_PORT_COND_Vb, 1}; +CondZBI iup_ZBI{"is_udp_port", OP_IS_UDP_PORT_VV, OP_IS_UDP_PORT_COND_Vb, 1}; +CondZBI iv4_ZBI{"is_v4_addr", OP_IS_V4_ADDR_VV, OP_IS_V4_ADDR_COND_Vb, 1}; +CondZBI iv6_ZBI{"is_v6_addr", OP_IS_V6_ADDR_VV, OP_IS_V6_ADDR_COND_Vb, 1}; +CondZBI rlt_ZBI{"reading_live_traffic", OP_READING_LIVE_TRAFFIC_V, OP_READING_LIVE_TRAFFIC_COND_b, 0}; +CondZBI rt_ZBI{"reading_traces", OP_READING_TRACES_V, OP_READING_TRACES_COND_b, 0}; // These have a different form to avoid invoking copy constructors. auto cat_ZBI = CatZBI(); @@ -468,48 +467,48 @@ OptAssignZBI rtc_ZBI{ "PacketAnalyzer::TEREDO::remove_teredo_connection", MultiZBI faa_ZBI{ "Files::__add_analyzer", {{{VVV}, {OP_FILES_ADD_ANALYZER_VVV, OP_VVV}}, - {{VCV}, {OP_FILES_ADD_ANALYZER_ViV, OP_VVC}}}, + {{VCV}, {OP_FILES_ADD_ANALYZER_VCV, OP_VVC}}}, {{{VVV}, {OP_FILES_ADD_ANALYZER_VVVV, OP_VVVV}}, - {{VCV}, {OP_FILES_ADD_ANALYZER_VViV, OP_VVVC}}}, + {{VCV}, {OP_FILES_ADD_ANALYZER_VVCV, OP_VVVC}}}, 1 }; MultiZBI fra_ZBI{ "Files::__remove_analyzer", {{{VVV}, {OP_FILES_REMOVE_ANALYZER_VVV, OP_VVV}}, - {{VCV}, {OP_FILES_REMOVE_ANALYZER_ViV, OP_VVC}}}, + {{VCV}, {OP_FILES_REMOVE_ANALYZER_VCV, OP_VVC}}}, {{{VVV}, {OP_FILES_REMOVE_ANALYZER_VVVV, OP_VVVV}}, - {{VCV}, {OP_FILES_REMOVE_ANALYZER_VViV, OP_VVVC}}}, + {{VCV}, {OP_FILES_REMOVE_ANALYZER_VVCV, OP_VVVC}}}, 1 }; MultiZBI fsrb_ZBI{ "Files::__set_reassembly_buffer", {{{VV}, {OP_FILES_SET_REASSEMBLY_BUFFER_VV, OP_VV}}, - {{VC}, {OP_FILES_SET_REASSEMBLY_BUFFER_VC, OP_VV_I2}}}, + {{VC}, {OP_FILES_SET_REASSEMBLY_BUFFER_Vi, OP_VV_I2}}}, {{{VV}, {OP_FILES_SET_REASSEMBLY_BUFFER_VVV, OP_VVV}}, - {{VC}, {OP_FILES_SET_REASSEMBLY_BUFFER_VVC, OP_VVV_I3}}} + {{VC}, {OP_FILES_SET_REASSEMBLY_BUFFER_VVi, OP_VVV_I3}}} }; MultiZBI lw_ZBI{ "Log::__write", {{{VV}, {OP_LOG_WRITE_VV, OP_VV}}, - {{CV}, {OP_LOG_WRITEC_V, OP_V}}}, + {{CV}, {OP_LOG_WRITE_CV, OP_V}}}, {{{VV}, {OP_LOG_WRITE_VVV, OP_VVV}}, - {{CV}, {OP_LOG_WRITEC_VV, OP_VV}}} + {{CV}, {OP_LOG_WRITEC_VCV, OP_VV}}} }; MultiZBI gccbt_ZBI{ "get_current_conn_bytes_threshold", true, {{{VV}, {OP_GET_BYTES_THRESH_VVV, OP_VVV}}, - {{VC}, {OP_GET_BYTES_THRESH_VVi, OP_VVC}}} + {{VC}, {OP_GET_BYTES_THRESH_VVC, OP_VVC}}} }; MultiZBI sccbt_ZBI{ "set_current_conn_bytes_threshold", {{{VVV}, {OP_SET_BYTES_THRESH_VVV, OP_VVV}}, - {{VVC}, {OP_SET_BYTES_THRESH_VVi, OP_VVC}}, - {{VCV}, {OP_SET_BYTES_THRESH_ViV, OP_VVC}}, - {{VCC}, {OP_SET_BYTES_THRESH_Vii, OP_VVC_I2}}}, + {{VVC}, {OP_SET_BYTES_THRESH_VVC, OP_VVC}}, + {{VCV}, {OP_SET_BYTES_THRESH_VCV, OP_VVC}}, + {{VCC}, {OP_SET_BYTES_THRESH_VCi, OP_VVC_I2}}}, {{{VVV}, {OP_SET_BYTES_THRESH_VVVV, OP_VVVV}}, - {{VVC}, {OP_SET_BYTES_THRESH_VVVi, OP_VVVC}}, - {{VCV}, {OP_SET_BYTES_THRESH_VViV, OP_VVVC}}, - {{VCC}, {OP_SET_BYTES_THRESH_VVii, OP_VVVC_I3}}} + {{VVC}, {OP_SET_BYTES_THRESH_VVVC, OP_VVVC}}, + {{VCV}, {OP_SET_BYTES_THRESH_VVCV, OP_VVVC}}, + {{VCC}, {OP_SET_BYTES_THRESH_VVCi, OP_VVVC_I3}}} }; MultiZBI sw_ZBI{ "starts_with", true, @@ -532,12 +531,12 @@ MultiZBI strstr_ZBI{ "strstr", true, MultiZBI sb_ZBI{ "sub_bytes", true, {{{VVV}, {OP_SUB_BYTES_VVVV, OP_VVVV}}, - {{VVC}, {OP_SUB_BYTES_VVVi, OP_VVVC}}, - {{VCV}, {OP_SUB_BYTES_VViV, OP_VVVC}}, - {{VCC}, {OP_SUB_BYTES_VVii, OP_VVVC_I3}}, - {{CVV}, {OP_SUB_BYTES_VVVC, OP_VVVC}}, - {{CVC}, {OP_SUB_BYTES_VViC, OP_VVVC_I3}}, - {{CCV}, {OP_SUB_BYTES_ViVC, OP_VVVC_I3}}} + {{VVC}, {OP_SUB_BYTES_VVVC, OP_VVVC}}, + {{VCV}, {OP_SUB_BYTES_VVCV, OP_VVVC}}, + {{VCC}, {OP_SUB_BYTES_VVCi, OP_VVVC_I3}}, + {{CVV}, {OP_SUB_BYTES_VCVV, OP_VVVC}}, + {{CVC}, {OP_SUB_BYTES_VCVi, OP_VVVC_I3}}, + {{CCV}, {OP_SUB_BYTES2_VCVi, OP_VVVC_I3}}} }; // clang-format on diff --git a/src/script_opt/ZAM/BuiltInSupport.cc b/src/script_opt/ZAM/BuiltInSupport.cc index 4bc833814d..50cd60c1d6 100644 --- a/src/script_opt/ZAM/BuiltInSupport.cc +++ b/src/script_opt/ZAM/BuiltInSupport.cc @@ -48,8 +48,7 @@ FixedCatArg::FixedCatArg(TypePtr _t) : t(std::move(_t)) { } } -void FixedCatArg::RenderInto(ZVal* zframe, int slot, char*& res) { - auto& z = zframe[slot]; +void FixedCatArg::RenderInto(const ZVal& z, char*& res) { int n; const char* text; std::string str; @@ -140,8 +139,8 @@ void FixedCatArg::RenderInto(ZVal* zframe, int slot, char*& res) { } } -size_t PatternCatArg::ComputeMaxSize(ZVal* zframe, int slot) { - text = zframe[slot].AsPattern()->AsPattern()->PatternText(); +size_t PatternCatArg::ComputeMaxSize(const ZVal& zv) { + text = zv.AsPattern()->AsPattern()->PatternText(); n = strlen(text); return n; } diff --git a/src/script_opt/ZAM/BuiltInSupport.h b/src/script_opt/ZAM/BuiltInSupport.h index 7169ea94d1..5ca4691f96 100644 --- a/src/script_opt/ZAM/BuiltInSupport.h +++ b/src/script_opt/ZAM/BuiltInSupport.h @@ -13,9 +13,9 @@ public: virtual ~CatArg() {} - size_t MaxSize(ZVal* zframe, int slot) { return max_size ? *max_size : ComputeMaxSize(zframe, slot); } + size_t MaxSize(const ZVal& zv) { return max_size ? *max_size : ComputeMaxSize(zv); } - virtual void RenderInto(ZVal* zframe, int slot, char*& res) { + virtual void RenderInto(const ZVal& zv, char*& res) { auto n = *max_size; memcpy(res, s->data(), n); res += n; @@ -25,7 +25,7 @@ protected: CatArg() {} CatArg(size_t _max_size) : max_size(_max_size) {} - virtual size_t ComputeMaxSize(ZVal* zframe, int slot) { return 0; } + virtual size_t ComputeMaxSize(const ZVal& zv) { return 0; } // Present if max size is known a priori. std::optional max_size; @@ -38,7 +38,7 @@ class FixedCatArg : public CatArg { public: FixedCatArg(TypePtr t); - void RenderInto(ZVal* zframe, int slot, char*& res) override; + void RenderInto(const ZVal& zv, char*& res) override; protected: TypePtr t; @@ -49,22 +49,22 @@ class StringCatArg : public CatArg { public: StringCatArg() : CatArg() {} - void RenderInto(ZVal* zframe, int slot, char*& res) override { - auto s = zframe[slot].AsString(); + void RenderInto(const ZVal& zv, char*& res) override { + auto s = zv.AsString(); auto n = s->Len(); memcpy(res, s->Bytes(), n); res += n; } protected: - size_t ComputeMaxSize(ZVal* zframe, int slot) override { return zframe[slot].AsString()->Len(); } + size_t ComputeMaxSize(const ZVal& zv) override { return zv.AsString()->Len(); } }; class PatternCatArg : public CatArg { public: PatternCatArg() : CatArg() {} - void RenderInto(ZVal* zframe, int slot, char*& res) override { + void RenderInto(const ZVal& zv, char*& res) override { *(res++) = '/'; strcpy(res, text); res += n; @@ -72,7 +72,7 @@ public: } protected: - size_t ComputeMaxSize(ZVal* zframe, int slot) override; + size_t ComputeMaxSize(const ZVal& zv) override; const char* text = nullptr; size_t n = 0; @@ -82,7 +82,7 @@ class DescCatArg : public CatArg { public: DescCatArg(TypePtr _t) : CatArg(), t(std::move(_t)) { d.SetStyle(RAW_STYLE); } - void RenderInto(ZVal* zframe, int slot, char*& res) override { + void RenderInto(const ZVal& zv, char*& res) override { auto n = d.Len(); memcpy(res, d.Bytes(), n); res += n; @@ -90,8 +90,8 @@ public: } protected: - size_t ComputeMaxSize(ZVal* zframe, int slot) override { - zframe[slot].ToVal(t)->Describe(&d); + size_t ComputeMaxSize(const ZVal& zv) override { + zv.ToVal(t)->Describe(&d); return d.Len(); } diff --git a/src/script_opt/ZAM/Compile.h b/src/script_opt/ZAM/Compile.h index 50d2c1ce1c..82b76d4ea3 100644 --- a/src/script_opt/ZAM/Compile.h +++ b/src/script_opt/ZAM/Compile.h @@ -229,8 +229,8 @@ private: const ZAMStmt CompileIndex(const NameExpr* n1, int n2_slot, const TypePtr& n2_type, const ListExpr* l, bool in_when); - const ZAMStmt BuildLambda(const NameExpr* n, LambdaExpr* le); - const ZAMStmt BuildLambda(int n_slot, LambdaExpr* le); + const ZAMStmt BuildLambda(const NameExpr* n, ExprPtr le); // marker + const ZAMStmt BuildLambda(int n_slot, ExprPtr le); // marker // Second argument is which instruction slot holds the branch target. const ZAMStmt GenCond(const Expr* e, int& branch_v); diff --git a/src/script_opt/ZAM/Driver.cc b/src/script_opt/ZAM/Driver.cc index b37e033d43..2c43932755 100644 --- a/src/script_opt/ZAM/Driver.cc +++ b/src/script_opt/ZAM/Driver.cc @@ -203,6 +203,7 @@ StmtPtr ZAMCompiler::CompileBody() { auto zb = make_intrusive(fname, this); zb->SetInsts(insts2); + zb->SetLocationInfo(body->GetLocationInfo()); // Could erase insts1 here to recover memory, but it's handy // for debugging. @@ -218,7 +219,9 @@ void ZAMCompiler::ResolveHookBreaks() { // Rewrite the breaks. for ( auto& b : breaks[0] ) { auto& i = insts1[b.stmt_num]; + auto aux = i->aux; *i = ZInstI(OP_HOOK_BREAK_X); + i->aux = aux; } } diff --git a/src/script_opt/ZAM/Expr.cc b/src/script_opt/ZAM/Expr.cc index 4670033811..4769c587a3 100644 --- a/src/script_opt/ZAM/Expr.cc +++ b/src/script_opt/ZAM/Expr.cc @@ -246,7 +246,7 @@ const ZAMStmt ZAMCompiler::CompileAssignExpr(const AssignExpr* e) { } if ( rhs->Tag() == EXPR_LAMBDA ) - return BuildLambda(lhs, rhs->AsLambdaExpr()); + return BuildLambda(lhs, op2); if ( rhs->Tag() == EXPR_COND && r1->GetType()->Tag() == TYPE_VECTOR ) return Bool_Vec_CondVVVV(lhs, r1->AsNameExpr(), r2->AsNameExpr(), r3->AsNameExpr()); @@ -466,20 +466,23 @@ const ZAMStmt ZAMCompiler::CompileFieldLHSAssignExpr(const FieldLHSAssignExpr* e auto field = e->Field(); if ( rhs->Tag() == EXPR_NAME ) - return Field_LHS_AssignFV(e, rhs->AsNameExpr()); + return Field_LHS_AssignFVi(e, rhs->AsNameExpr(), field); if ( rhs->Tag() == EXPR_CONST ) - return Field_LHS_AssignFC(e, rhs->AsConstExpr()); + return Field_LHS_AssignFCi(e, rhs->AsConstExpr(), field); auto r1 = rhs->GetOp1(); auto r2 = rhs->GetOp2(); if ( rhs->Tag() == EXPR_FIELD ) { auto rhs_f = rhs->AsFieldExpr(); - if ( r1->Tag() == EXPR_NAME ) - return Field_LHS_AssignFVi(e, r1->AsNameExpr(), rhs_f->Field()); - return Field_LHS_AssignFCi(e, r1->AsConstExpr(), rhs_f->Field()); + // Note, the LHS field comes after the RHS field rather than before, + // to maintain layout symmetry close to that for non-field RHS's. + if ( r1->Tag() == EXPR_NAME ) + return Field_LHS_AssignFVii(e, r1->AsNameExpr(), rhs_f->Field(), field); + + return Field_LHS_AssignFCii(e, r1->AsConstExpr(), rhs_f->Field(), field); } if ( r1 && r1->IsConst() ) @@ -564,7 +567,7 @@ const ZAMStmt ZAMCompiler::CompileEvent(EventHandler* h, const ListExpr* l) { else { auto n0 = exprs[0]->AsNameExpr(); z.v1 = FrameSlot(n0); - z.t = n0->GetType(); + z.SetType(n0->GetType()); if ( n == 1 ) { z.op = OP_EVENT1_V; @@ -574,7 +577,7 @@ const ZAMStmt ZAMCompiler::CompileEvent(EventHandler* h, const ListExpr* l) { else { auto n1 = exprs[1]->AsNameExpr(); z.v2 = FrameSlot(n1); - z.t2 = n1->GetType(); + z.SetType2(n1->GetType()); if ( n == 2 ) { z.op = OP_EVENT2_VV; @@ -630,7 +633,7 @@ const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, const NameExpr* n2, if ( op3_t->AsTableType()->IsPatternIndex() && op2_t->Tag() == TYPE_STRING ) a = n2 ? OP_STR_IN_PAT_TBL_VVV : OP_STR_IN_PAT_TBL_VCV; else - a = n2 ? OP_VAL_IS_IN_TABLE_VVV : OP_CONST_IS_IN_TABLE_VCV; + a = n2 ? OP_VAL_IS_IN_TABLE_VVV : OP_CONST_IS_IN_TABLE_VVC; } else if ( op2->GetType()->Tag() == TYPE_PATTERN ) a = n2 ? (n3 ? OP_P_IN_S_VVV : OP_P_IN_S_VVC) : OP_P_IN_S_VCV; @@ -692,11 +695,11 @@ const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, const ListExpr* l, else { auto l_e0_c = l_e[0]->AsConstExpr(); - ZOp op = is_vec ? OP_CONST_IS_IN_VECTOR_VCV : OP_CONST_IS_IN_TABLE_VCV; + ZOp op = is_vec ? OP_CONST_IS_IN_VECTOR_VCV : OP_CONST_IS_IN_TABLE_VVC; z = GenInst(op, n1, l_e0_c, n2); } - z.t = l_e[0]->GetType(); + z.SetType(l_e[0]->GetType()); return AddInst(z); } @@ -717,21 +720,21 @@ const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, const ListExpr* l, if ( l_e0_n && l_e1_n ) { z = GenInst(OP_VAL2_IS_IN_TABLE_VVVV, n1, l_e0_n, l_e1_n, n2); - z.t2 = l_e0_n->GetType(); + z.SetType2(l_e0_n->GetType()); } else if ( l_e0_n ) { ASSERT(l_e1_c); z = GenInst(OP_VAL2_IS_IN_TABLE_VVVC, n1, l_e0_n, n2, l_e1_c); - z.t2 = l_e0_n->GetType(); + z.SetType2(l_e0_n->GetType()); } else if ( l_e1_n ) { ASSERT(l_e0_c); z = GenInst(OP_VAL2_IS_IN_TABLE_VVCV, n1, l_e1_n, n2, l_e0_c); - z.t2 = l_e1_n->GetType(); + z.SetType2(l_e1_n->GetType()); } else { @@ -743,7 +746,7 @@ const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, const ListExpr* l, auto slot = TempForConst(l_e0_c); z = ZInstI(OP_VAL2_IS_IN_TABLE_VVVC, FrameSlot(n1), slot, FrameSlot(n2), l_e1_c); z.op_type = OP_VVVC; - z.t2 = l_e0_c->GetType(); + z.SetType2(l_e0_c->GetType()); } return AddInst(z); @@ -817,7 +820,7 @@ const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n1, int n2_slot, const T z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, n3_slot); } else { - auto zop = OP_INDEX_STRINGC_VVV; + auto zop = OP_INDEX_STRINGC_VVi; z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, c); z.op_type = OP_VVV_I3; } @@ -846,11 +849,11 @@ const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n1, int n2_slot, const T ZOp zop; if ( in_when ) - zop = OP_WHEN_INDEX_VECC_VVV; + zop = OP_WHEN_INDEX_VECC_VVi; else if ( is_any ) - zop = OP_INDEX_ANY_VECC_VVV; + zop = OP_INDEX_ANY_VECC_VVi; else - zop = OP_INDEX_VECC_VVV; + zop = OP_INDEX_VECC_VVi; z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, c); z.op_type = OP_VVV_I3; @@ -932,18 +935,18 @@ const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n1, int n2_slot, const T return AddInst(z); } -const ZAMStmt ZAMCompiler::BuildLambda(const NameExpr* n, LambdaExpr* le) { - return BuildLambda(Frame1Slot(n, OP1_WRITE), le); +const ZAMStmt ZAMCompiler::BuildLambda(const NameExpr* n, ExprPtr e) { + return BuildLambda(Frame1Slot(n, OP1_WRITE), std::move(e)); } -const ZAMStmt ZAMCompiler::BuildLambda(int n_slot, LambdaExpr* le) { - auto& captures = le->GetCaptures(); +const ZAMStmt ZAMCompiler::BuildLambda(int n_slot, ExprPtr e) { + auto lambda = cast_intrusive(e); + auto& captures = lambda->GetCaptures(); int ncaptures = captures ? captures->size() : 0; auto aux = new ZInstAux(ncaptures); - aux->primary_func = le->PrimaryFunc(); - aux->lambda_name = le->Name(); - aux->id_val = le->Ingredients()->GetID(); + aux->lambda = cast_intrusive(std::move(e)); + aux->id_val = lambda->Ingredients()->GetID(); for ( int i = 0; i < ncaptures; ++i ) { auto& id_i = (*captures)[i].Id(); @@ -954,7 +957,7 @@ const ZAMStmt ZAMCompiler::BuildLambda(int n_slot, LambdaExpr* le) { aux->Add(i, FrameSlot(id_i), id_i->GetType()); } - auto z = ZInstI(OP_LAMBDA_VV, n_slot, le->PrimaryFunc()->FrameSize()); + auto z = ZInstI(OP_LAMBDA_Vi, n_slot, lambda->PrimaryFunc()->FrameSize()); z.op_type = OP_VV_I2; z.aux = aux; @@ -1031,7 +1034,7 @@ const ZAMStmt ZAMCompiler::AssignVecElems(const Expr* e) { inst = Vector_Elem_AssignVVC(lhs, n2, c3); } - TopMainInst()->t = t3; + TopMainInst()->SetType(t3); return inst; } @@ -1048,7 +1051,7 @@ const ZAMStmt ZAMCompiler::AssignVecElems(const Expr* e) { else inst = Vector_Elem_AssignVVi(lhs, n3, index); - TopMainInst()->t = t3; + TopMainInst()->SetType(t3); return inst; } @@ -1068,7 +1071,7 @@ const ZAMStmt ZAMCompiler::AssignTableElem(const Expr* e) { z = GenInst(OP_TABLE_ELEM_ASSIGN_VC, op1, op3->AsConstExpr()); z.aux = InternalBuildVals(op2); - z.t = op3->GetType(); + z.SetType(op3->GetType()); if ( pfs->HasSideEffects(SideEffectsOp::WRITE, op1->GetType()) ) z.aux->can_change_non_locals = true; @@ -1159,7 +1162,7 @@ const ZAMStmt ZAMCompiler::DoCall(const CallExpr* c, const NameExpr* n) { } } - z.t = arg0->GetType(); + z.SetType(arg0->GetType()); } else { @@ -1184,10 +1187,12 @@ const ZAMStmt ZAMCompiler::DoCall(const CallExpr* c, const NameExpr* n) { default: if ( in_when ) { - if ( indirect ) - op = OP_WHENINDCALLN_VV; - else + if ( ! indirect ) op = OP_WHENCALLN_V; + else if ( func_id->IsGlobal() ) + op = OP_WHEN_ID_INDCALLN_V; + else + op = OP_WHENINDCALLN_VV; } else if ( indirect ) { @@ -1279,9 +1284,9 @@ const ZAMStmt ZAMCompiler::ConstructTable(const NameExpr* n, const Expr* e) { auto tt = cast_intrusive(n->GetType()); auto width = tt->GetIndices()->GetTypes().size(); - auto z = GenInst(OP_CONSTRUCT_TABLE_VV, n, width); + auto z = GenInst(OP_CONSTRUCT_TABLE_Vi, n, width); z.aux = InternalBuildVals(con, width + 1); - z.t = tt; + z.SetType(tt); ASSERT(e->Tag() == EXPR_TABLE_CONSTRUCTOR); z.aux->attrs = static_cast(e)->GetAttrs(); @@ -1291,7 +1296,7 @@ const ZAMStmt ZAMCompiler::ConstructTable(const NameExpr* n, const Expr* e) { if ( ! def_attr || def_attr->GetExpr()->Tag() != EXPR_LAMBDA ) return zstmt; - auto def_lambda = def_attr->GetExpr()->AsLambdaExpr(); + auto def_lambda = cast_intrusive(def_attr->GetExpr()); auto dl_t = def_lambda->GetType()->AsFuncType(); auto& captures = dl_t->GetCaptures(); @@ -1308,7 +1313,7 @@ const ZAMStmt ZAMCompiler::ConstructTable(const NameExpr* n, const Expr* e) { z = GenInst(OP_SET_TABLE_DEFAULT_LAMBDA_VV, n, slot); z.op_type = OP_VV; - z.t = def_lambda->GetType(); + z.SetType(def_lambda->GetType()); return AddInst(z); } @@ -1318,9 +1323,9 @@ const ZAMStmt ZAMCompiler::ConstructSet(const NameExpr* n, const Expr* e) { auto tt = n->GetType()->AsTableType(); auto width = tt->GetIndices()->GetTypes().size(); - auto z = GenInst(OP_CONSTRUCT_SET_VV, n, width); + auto z = GenInst(OP_CONSTRUCT_SET_Vi, n, width); z.aux = InternalBuildVals(con, width); - z.t = e->GetType(); + z.SetType(e->GetType()); ASSERT(e->Tag() == EXPR_SET_CONSTRUCTOR); z.aux->attrs = static_cast(e)->GetAttrs(); @@ -1391,13 +1396,13 @@ const ZAMStmt ZAMCompiler::ConstructRecord(const NameExpr* n, const Expr* e, boo if ( fi->empty() ) { if ( network_time_index >= 0 ) - op = OP_CONSTRUCT_KNOWN_RECORD_WITH_NT_VV; + op = OP_CONSTRUCT_KNOWN_RECORD_WITH_NT_Vi; else op = OP_CONSTRUCT_KNOWN_RECORD_V; } else { if ( network_time_index >= 0 ) - op = OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_AND_NT_VV; + op = OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_AND_NT_Vi; else op = OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_V; aux->field_inits = std::move(fi); @@ -1411,12 +1416,12 @@ const ZAMStmt ZAMCompiler::ConstructRecord(const NameExpr* n, const Expr* e, boo if ( is_from_rec ) { // Map non-from-rec operand to the from-rec equivalent. switch ( op ) { - case OP_CONSTRUCT_KNOWN_RECORD_WITH_NT_VV: op = OP_CONSTRUCT_KNOWN_RECORD_WITH_NT_FROM_VVV; break; + case OP_CONSTRUCT_KNOWN_RECORD_WITH_NT_Vi: op = OP_CONSTRUCT_KNOWN_RECORD_WITH_NT_FROM_VVi; break; case OP_CONSTRUCT_KNOWN_RECORD_V: op = OP_CONSTRUCT_KNOWN_RECORD_FROM_VV; break; - case OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_AND_NT_VV: - op = OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_AND_NT_FROM_VVV; + case OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_AND_NT_Vi: + op = OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_AND_NT_FROM_VVi; break; case OP_CONSTRUCT_KNOWN_RECORD_WITH_INITS_V: @@ -1448,7 +1453,7 @@ const ZAMStmt ZAMCompiler::ConstructRecord(const NameExpr* n, const Expr* e, boo z = network_time_index >= 0 ? GenInst(op, n, network_time_index) : GenInst(op, n); z.aux = aux; - z.t = rec_e->GetType(); + z.SetType(rec_e->GetType()); auto inst = AddInst(z); @@ -1488,7 +1493,7 @@ const ZAMStmt ZAMCompiler::ConstructRecord(const NameExpr* n, const Expr* e, boo // Need to add a separate instruction for concretizing the fields. z = GenInst(OP_CONCRETIZE_VECTOR_FIELDS_V, n); - z.t = rec_e->GetType(); + z.SetType(rec_e->GetType()); int nf = static_cast(vector_fields.size()); z.aux = new ZInstAux(nf); z.aux->elems_has_slots = false; // we're storing field offsets, not slots @@ -1503,7 +1508,7 @@ const ZAMStmt ZAMCompiler::ConstructVector(const NameExpr* n, const Expr* e) { auto z = GenInst(OP_CONSTRUCT_VECTOR_V, n); z.aux = InternalBuildVals(con); - z.t = e->GetType(); + z.SetType(e->GetType()); return AddInst(z); } @@ -1626,8 +1631,8 @@ const ZAMStmt ZAMCompiler::Is(const NameExpr* n, const Expr* e) { int op_slot = FrameSlot(op); ZInstI z(OP_IS_VV, Frame1Slot(n, OP_IS_VV), op_slot); - z.t2 = op->GetType(); z.SetType(is->TestType()); + z.SetType2(op->GetType()); return AddInst(z); } diff --git a/src/script_opt/ZAM/IterInfo.h b/src/script_opt/ZAM/IterInfo.h index be81e00e4b..061c680edc 100644 --- a/src/script_opt/ZAM/IterInfo.h +++ b/src/script_opt/ZAM/IterInfo.h @@ -15,21 +15,54 @@ namespace zeek::detail { class TableIterInfo { public: - // No constructor needed, as all of our member variables are - // instead instantiated via BeginLoop(). This allows us to - // reuse TableIterInfo objects to lower the overhead associated - // with executing ZBody::Exec for non-recursive functions. + // Empty constructor for a simple version that initializes all the + // member variables via BeginLoop(). Helpful for supporting recursive + // functions that include table iterations. + TableIterInfo() {} + + // Version that populates the fixed fields up front, with the + // dynamic ones being done with SetLoopVars(). + TableIterInfo(const std::vector* _loop_var_types, const std::vector* _lvt_is_managed, + TypePtr _value_var_type) { + SetIterInfo(_loop_var_types, _lvt_is_managed, std::move(_value_var_type)); + } + + // Sets the fixed fields. + void SetIterInfo(const std::vector* _loop_var_types, const std::vector* _lvt_is_managed, + TypePtr _value_var_type) { + loop_var_types = _loop_var_types; + lvt_is_managed = _lvt_is_managed; + value_var_type = std::move(_value_var_type); + } // We do, however, want to make sure that when we go out of scope, // if we have any pending iterators we clear them. ~TableIterInfo() { Clear(); } - // Start looping over the elements of the given table. "_aux" + // Start looping over the elements of the given table. "aux" // provides information about the index variables, their types, // and the type of the value variable (if any). - void BeginLoop(TableValPtr _tv, ZInstAux* _aux) { - tv = _tv; - aux = _aux; + void BeginLoop(TableValPtr _tv, ZVal* frame, ZInstAux* aux) { + tv = std::move(_tv); + + for ( auto lv : aux->loop_vars ) + if ( lv < 0 ) + loop_vars.push_back(nullptr); + else + loop_vars.push_back(&frame[lv]); + + SetIterInfo(&aux->types, &aux->is_managed, aux->value_var_type); + + PrimeIter(); + } + + void BeginLoop(TableValPtr _tv, std::vector _loop_vars) { + tv = std::move(_tv); + loop_vars = std::move(_loop_vars); + PrimeIter(); + } + + void PrimeIter() { auto tvd = tv->AsTable(); tbl_iter = tvd->begin(); tbl_end = tvd->end(); @@ -43,18 +76,17 @@ public: // Performs the next iteration (assuming IsDoneIterating() returned // false), assigning to the index variables. - void NextIter(ZVal* frame) { + void NextIter() { auto ind_lv = tv->RecreateIndex(*(*tbl_iter)->GetHashKey()); for ( int i = 0; i < ind_lv->Length(); ++i ) { - ValPtr ind_lv_p = ind_lv->Idx(i); - auto lv = aux->loop_vars[i]; - if ( lv < 0 ) + auto lv = loop_vars[i]; + if ( ! lv ) continue; - auto& var = frame[lv]; - if ( aux->is_managed[i] ) - ZVal::DeleteManagedType(var); - auto& t = aux->types[i]; - var = ZVal(ind_lv_p, t); + + ValPtr ind_lv_p = ind_lv->Idx(i); + if ( (*lvt_is_managed)[i] ) + ZVal::DeleteManagedType(*lv); + *lv = ZVal(ind_lv_p, (*loop_var_types)[i]); } IterFinished(); @@ -63,7 +95,7 @@ public: // For the current iteration, returns the corresponding value. ZVal IterValue() { auto tev = (*tbl_iter)->value; - return ZVal(tev->GetVal(), aux->value_var_type); + return ZVal(tev->GetVal(), value_var_type); } // Called upon finishing the iteration. @@ -78,8 +110,10 @@ public: private: TableValPtr tv = nullptr; - // Associated auxiliary information. - ZInstAux* aux = nullptr; + std::vector loop_vars; + const std::vector* loop_var_types; + const std::vector* lvt_is_managed; + TypePtr value_var_type; std::optional> tbl_iter; std::optional> tbl_end; diff --git a/src/script_opt/ZAM/Low-Level.cc b/src/script_opt/ZAM/Low-Level.cc index 52981cb5ee..25e132339c 100644 --- a/src/script_opt/ZAM/Low-Level.cc +++ b/src/script_opt/ZAM/Low-Level.cc @@ -127,9 +127,9 @@ const ZAMStmt ZAMCompiler::AddInst(const ZInstI& inst, bool suppress_non_local) auto gs = pending_global_store; pending_global_store = -1; - auto store_inst = ZInstI(OP_STORE_GLOBAL_V, gs); + auto store_inst = ZInstI(OP_STORE_GLOBAL_g, gs); store_inst.op_type = OP_V_I1; - store_inst.t = globalsI[gs].id->GetType(); + store_inst.SetType(globalsI[gs].id->GetType()); return AddInst(store_inst); } @@ -138,15 +138,15 @@ const ZAMStmt ZAMCompiler::AddInst(const ZInstI& inst, bool suppress_non_local) auto cs = pending_capture_store; pending_capture_store = -1; - auto& cv = *func->GetType()->AsFuncType()->GetCaptures(); + auto& cv = *func->GetType()->GetCaptures(); auto& c_id = cv[cs].Id(); ZOp op; if ( ZVal::IsManagedType(c_id->GetType()) ) - op = OP_STORE_MANAGED_CAPTURE_VV; + op = OP_STORE_MANAGED_CAPTURE_Vi; else - op = OP_STORE_CAPTURE_VV; + op = OP_STORE_CAPTURE_Vi; auto store_inst = ZInstI(op, RawSlot(c_id.get()), cs); store_inst.op_type = OP_VV_I2; diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index a8372ba332..f5964d5679 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -132,7 +132,7 @@ const ZAMStmt ZAMCompiler::IfElse(const Expr* e, const Stmt* s1, const Stmt* s2) if ( e->Tag() == EXPR_NAME ) { auto n = e->AsNameExpr(); - ZOp op = (s1 && s2) ? OP_IF_ELSE_VV : (s1 ? OP_IF_VV : OP_IF_NOT_VV); + ZOp op = (s1 && s2) ? OP_IF_ELSE_Vb : (s1 ? OP_IF_Vb : OP_IF_NOT_Vb); ZInstI cond(op, FrameSlot(n), 0); cond_stmt = AddInst(cond); @@ -160,66 +160,67 @@ const ZAMStmt ZAMCompiler::IfElse(const Expr* e, const Stmt* s1, const Stmt* s2) // Only the else clause is non-empty. auto s2_end = CompileStmt(s2); + AddCFT(insts1.back(), CFT_BLOCK_END); // For complex conditionals, we need to invert their sense since // we're switching to "if ( ! cond ) s2". auto z = insts1[cond_stmt.stmt_num]; switch ( z->op ) { - case OP_IF_ELSE_VV: - case OP_IF_VV: - case OP_IF_NOT_VV: + case OP_IF_ELSE_Vb: + case OP_IF_Vb: + case OP_IF_NOT_Vb: // These are generated correctly above, no need // to fix up. break; - case OP_HAS_FIELD_COND_VVV: z->op = OP_NOT_HAS_FIELD_COND_VVV; break; - case OP_NOT_HAS_FIELD_COND_VVV: z->op = OP_HAS_FIELD_COND_VVV; break; + case OP_HAS_FIELD_COND_Vib: z->op = OP_NOT_HAS_FIELD_COND_Vib; break; + case OP_NOT_HAS_FIELD_COND_Vib: z->op = OP_HAS_FIELD_COND_Vib; break; - case OP_CONN_EXISTS_COND_VV: z->op = OP_NOT_CONN_EXISTS_COND_VV; break; - case OP_NOT_CONN_EXISTS_COND_VV: z->op = OP_CONN_EXISTS_COND_VV; break; + case OP_CONN_EXISTS_COND_Vb: z->op = OP_NOT_CONN_EXISTS_COND_Vb; break; + case OP_NOT_CONN_EXISTS_COND_Vb: z->op = OP_CONN_EXISTS_COND_Vb; break; - case OP_IS_ICMP_PORT_COND_VV: z->op = OP_NOT_IS_ICMP_PORT_COND_VV; break; - case OP_NOT_IS_ICMP_PORT_COND_VV: z->op = OP_IS_ICMP_PORT_COND_VV; break; + case OP_IS_ICMP_PORT_COND_Vb: z->op = OP_NOT_IS_ICMP_PORT_COND_Vb; break; + case OP_NOT_IS_ICMP_PORT_COND_Vb: z->op = OP_IS_ICMP_PORT_COND_Vb; break; - case OP_IS_TCP_PORT_COND_VV: z->op = OP_NOT_IS_TCP_PORT_COND_VV; break; - case OP_NOT_IS_TCP_PORT_COND_VV: z->op = OP_IS_TCP_PORT_COND_VV; break; + case OP_IS_TCP_PORT_COND_Vb: z->op = OP_NOT_IS_TCP_PORT_COND_Vb; break; + case OP_NOT_IS_TCP_PORT_COND_Vb: z->op = OP_IS_TCP_PORT_COND_Vb; break; - case OP_IS_UDP_PORT_COND_VV: z->op = OP_NOT_IS_UDP_PORT_COND_VV; break; - case OP_NOT_IS_UDP_PORT_COND_VV: z->op = OP_IS_UDP_PORT_COND_VV; break; + case OP_IS_UDP_PORT_COND_Vb: z->op = OP_NOT_IS_UDP_PORT_COND_Vb; break; + case OP_NOT_IS_UDP_PORT_COND_Vb: z->op = OP_IS_UDP_PORT_COND_Vb; break; - case OP_IS_V4_ADDR_COND_VV: z->op = OP_NOT_IS_V4_ADDR_COND_VV; break; - case OP_NOT_IS_V4_ADDR_COND_VV: z->op = OP_IS_V4_ADDR_COND_VV; break; + case OP_IS_V4_ADDR_COND_Vb: z->op = OP_NOT_IS_V4_ADDR_COND_Vb; break; + case OP_NOT_IS_V4_ADDR_COND_Vb: z->op = OP_IS_V4_ADDR_COND_Vb; break; - case OP_IS_V6_ADDR_COND_VV: z->op = OP_NOT_IS_V6_ADDR_COND_VV; break; - case OP_NOT_IS_V6_ADDR_COND_VV: z->op = OP_IS_V6_ADDR_COND_VV; break; + case OP_IS_V6_ADDR_COND_Vb: z->op = OP_NOT_IS_V6_ADDR_COND_Vb; break; + case OP_NOT_IS_V6_ADDR_COND_Vb: z->op = OP_IS_V6_ADDR_COND_Vb; break; - case OP_READING_LIVE_TRAFFIC_COND_V: z->op = OP_NOT_READING_LIVE_TRAFFIC_COND_V; break; - case OP_NOT_READING_LIVE_TRAFFIC_COND_V: z->op = OP_READING_LIVE_TRAFFIC_COND_V; break; + case OP_READING_LIVE_TRAFFIC_COND_b: z->op = OP_NOT_READING_LIVE_TRAFFIC_COND_b; break; + case OP_NOT_READING_LIVE_TRAFFIC_COND_b: z->op = OP_READING_LIVE_TRAFFIC_COND_b; break; - case OP_READING_TRACES_COND_V: z->op = OP_NOT_READING_TRACES_COND_V; break; - case OP_NOT_READING_TRACES_COND_V: z->op = OP_READING_TRACES_COND_V; break; + case OP_READING_TRACES_COND_b: z->op = OP_NOT_READING_TRACES_COND_b; break; + case OP_NOT_READING_TRACES_COND_b: z->op = OP_READING_TRACES_COND_b; break; - case OP_TABLE_HAS_ELEMENTS_COND_VV: z->op = OP_NOT_TABLE_HAS_ELEMENTS_COND_VV; break; - case OP_NOT_TABLE_HAS_ELEMENTS_COND_VV: z->op = OP_TABLE_HAS_ELEMENTS_COND_VV; break; + case OP_TABLE_HAS_ELEMENTS_COND_Vb: z->op = OP_NOT_TABLE_HAS_ELEMENTS_COND_Vb; break; + case OP_NOT_TABLE_HAS_ELEMENTS_COND_Vb: z->op = OP_TABLE_HAS_ELEMENTS_COND_Vb; break; - case OP_VECTOR_HAS_ELEMENTS_COND_VV: z->op = OP_NOT_VECTOR_HAS_ELEMENTS_COND_VV; break; - case OP_NOT_VECTOR_HAS_ELEMENTS_COND_VV: z->op = OP_VECTOR_HAS_ELEMENTS_COND_VV; break; + case OP_VECTOR_HAS_ELEMENTS_COND_Vb: z->op = OP_NOT_VECTOR_HAS_ELEMENTS_COND_Vb; break; + case OP_NOT_VECTOR_HAS_ELEMENTS_COND_Vb: z->op = OP_VECTOR_HAS_ELEMENTS_COND_Vb; break; - case OP_VAL_IS_IN_TABLE_COND_VVV: z->op = OP_VAL_IS_NOT_IN_TABLE_COND_VVV; break; - case OP_VAL_IS_NOT_IN_TABLE_COND_VVV: z->op = OP_VAL_IS_IN_TABLE_COND_VVV; break; + case OP_VAL_IS_IN_TABLE_COND_VVb: z->op = OP_NOT_VAL_IS_IN_TABLE_COND_VVb; break; + case OP_NOT_VAL_IS_IN_TABLE_COND_VVb: z->op = OP_VAL_IS_IN_TABLE_COND_VVb; break; - case OP_CONST_IS_IN_TABLE_COND_VVC: z->op = OP_CONST_IS_NOT_IN_TABLE_COND_VVC; break; - case OP_CONST_IS_NOT_IN_TABLE_COND_VVC: z->op = OP_CONST_IS_IN_TABLE_COND_VVC; break; + case OP_CONST_IS_IN_TABLE_COND_VCb: z->op = OP_NOT_CONST_IS_IN_TABLE_COND_VCb; break; + case OP_NOT_CONST_IS_IN_TABLE_COND_VCb: z->op = OP_CONST_IS_IN_TABLE_COND_VCb; break; - case OP_VAL2_IS_IN_TABLE_COND_VVVV: z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVVV; break; - case OP_VAL2_IS_NOT_IN_TABLE_COND_VVVV: z->op = OP_VAL2_IS_IN_TABLE_COND_VVVV; break; + case OP_VAL2_IS_IN_TABLE_COND_VVVb: z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVVb; break; + case OP_VAL2_IS_NOT_IN_TABLE_COND_VVVb: z->op = OP_VAL2_IS_IN_TABLE_COND_VVVb; break; - case OP_VAL2_IS_IN_TABLE_COND_VVVC: z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVVC; break; - case OP_VAL2_IS_NOT_IN_TABLE_COND_VVVC: z->op = OP_VAL2_IS_IN_TABLE_COND_VVVC; break; + case OP_VAL2_IS_IN_TABLE_COND_VVbC: z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVbC; break; + case OP_VAL2_IS_NOT_IN_TABLE_COND_VVbC: z->op = OP_VAL2_IS_IN_TABLE_COND_VVbC; break; - case OP_VAL2_IS_IN_TABLE_COND_VVCV: z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVCV; break; - case OP_VAL2_IS_NOT_IN_TABLE_COND_VVCV: z->op = OP_VAL2_IS_IN_TABLE_COND_VVCV; break; + case OP_VAL2_IS_IN_TABLE_COND_VVCb: z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVCb; break; + case OP_VAL2_IS_NOT_IN_TABLE_COND_VVCb: z->op = OP_VAL2_IS_IN_TABLE_COND_VVCb; break; default: reporter->InternalError("inconsistency in ZAMCompiler::IfElse"); } @@ -234,7 +235,7 @@ const ZAMStmt ZAMCompiler::GenCond(const Expr* e, int& branch_v) { if ( e->Tag() == EXPR_HAS_FIELD ) { auto hf = e->AsHasFieldExpr(); - auto z = GenInst(OP_HAS_FIELD_COND_VVV, op1->AsNameExpr(), hf->Field()); + auto z = GenInst(OP_HAS_FIELD_COND_Vib, op1->AsNameExpr(), hf->Field()); z.op_type = OP_VVV_I2_I3; branch_v = 3; return AddInst(z); @@ -251,15 +252,15 @@ const ZAMStmt ZAMCompiler::GenCond(const Expr* e, int& branch_v) { } if ( op1->Tag() == EXPR_NAME ) { - auto z = GenInst(OP_VAL_IS_IN_TABLE_COND_VVV, op1->AsNameExpr(), op2, 0); - z.t = op1->GetType(); + auto z = GenInst(OP_VAL_IS_IN_TABLE_COND_VVb, op1->AsNameExpr(), op2, 0); + z.SetType(op1->GetType()); branch_v = 3; return AddInst(z); } if ( op1->Tag() == EXPR_CONST ) { - auto z = GenInst(OP_CONST_IS_IN_TABLE_COND_VVC, op2, op1->AsConstExpr(), 0); - z.t = op1->GetType(); + auto z = GenInst(OP_CONST_IS_IN_TABLE_COND_VCb, op2, op1->AsConstExpr(), 0); + z.SetType(op1->GetType()); branch_v = 2; return AddInst(z); } @@ -286,30 +287,30 @@ const ZAMStmt ZAMCompiler::GenCond(const Expr* e, int& branch_v) { ZInstI z; if ( name0 && name1 ) { - z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVVV, n0, n1, op2, 0); + z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVVb, n0, n1, op2, 0); branch_v = 4; - z.t2 = n0->GetType(); + z.SetType2(n0->GetType()); } else if ( name0 ) { - z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVVC, n0, op2, c1, 0); + z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVbC, n0, op2, c1, 0); branch_v = 3; - z.t2 = n0->GetType(); + z.SetType2(n0->GetType()); } else if ( name1 ) { - z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVCV, n1, op2, c0, 0); + z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVCb, n1, op2, c0, 0); branch_v = 3; - z.t2 = n1->GetType(); + z.SetType2(n1->GetType()); } else { // Both are constants, assign first to temporary. auto slot = TempForConst(c0); - z = ZInstI(OP_VAL2_IS_IN_TABLE_COND_VVVC, slot, FrameSlot(op2), 0, c1); + z = ZInstI(OP_VAL2_IS_IN_TABLE_COND_VVbC, slot, FrameSlot(op2), 0, c1); z.op_type = OP_VVVC_I3; branch_v = 3; - z.t2 = c0->GetType(); + z.SetType2(c0->GetType()); } return AddInst(z); @@ -328,9 +329,9 @@ const ZAMStmt ZAMCompiler::GenCond(const Expr* e, int& branch_v) { ZOp op; if ( aggr->GetType()->Tag() == TYPE_TABLE ) - op = OP_TABLE_HAS_ELEMENTS_COND_VV; + op = OP_TABLE_HAS_ELEMENTS_COND_Vb; else - op = OP_VECTOR_HAS_ELEMENTS_COND_VV; + op = OP_VECTOR_HAS_ELEMENTS_COND_Vb; branch_v = 2; return AddInst(GenInst(op, aggr, +0)); @@ -409,37 +410,42 @@ const ZAMStmt ZAMCompiler::ValueSwitch(const SwitchStmt* sw, const NameExpr* v, // Figure out which jump table we're using. auto t = v ? v->GetType() : c->GetType(); + + return GenSwitch(sw, slot, t->InternalType()); +} + +const ZAMStmt ZAMCompiler::GenSwitch(const SwitchStmt* sw, int slot, InternalTypeTag it) { int tbl = 0; ZOp op; - switch ( t->InternalType() ) { + switch ( it ) { case TYPE_INTERNAL_INT: - op = OP_SWITCHI_VVV; + op = OP_SWITCHI_Vii; tbl = int_casesI.size(); break; case TYPE_INTERNAL_UNSIGNED: - op = OP_SWITCHU_VVV; + op = OP_SWITCHU_Vii; tbl = uint_casesI.size(); break; case TYPE_INTERNAL_DOUBLE: - op = OP_SWITCHD_VVV; + op = OP_SWITCHD_Vii; tbl = double_casesI.size(); break; case TYPE_INTERNAL_STRING: - op = OP_SWITCHS_VVV; + op = OP_SWITCHS_Vii; tbl = str_casesI.size(); break; case TYPE_INTERNAL_ADDR: - op = OP_SWITCHA_VVV; + op = OP_SWITCHA_Vii; tbl = str_casesI.size(); break; case TYPE_INTERNAL_SUBNET: - op = OP_SWITCHN_VVV; + op = OP_SWITCHN_Vii; tbl = str_casesI.size(); break; @@ -723,29 +729,39 @@ const ZAMStmt ZAMCompiler::LoopOverTable(const ForStmt* f, const NameExpr* val) auto iter_slot = table_iters.size(); table_iters.emplace_back(); - auto z = ZInstI(OP_INIT_TABLE_LOOP_VV, FrameSlot(val), iter_slot); - z.op_type = OP_VV_I2; - z.SetType(value_var ? value_var->GetType() : nullptr); - z.aux = aux; + auto zi = ZInstI(OP_INIT_TABLE_LOOP_Vf, FrameSlot(val), iter_slot); + zi.op_type = OP_VV_I2; + if ( value_var ) + zi.SetType(value_var->GetType()); + zi.aux = aux; - auto init_end = AddInst(z); + (void)AddInst(zi); + + ZInstI zn; auto iter_head = StartingBlock(); if ( value_var ) { - ZOp op = no_loop_vars ? OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_VVV : OP_NEXT_TABLE_ITER_VAL_VAR_VVV; - z = ZInstI(op, FrameSlot(value_var), iter_slot, 0); - z.CheckIfManaged(value_var->GetType()); - z.op_type = OP_VVV_I2_I3; + ZOp op = no_loop_vars ? OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_Vfb : OP_NEXT_TABLE_ITER_VAL_VAR_Vfb; + zn = ZInstI(op, FrameSlot(value_var), iter_slot, 0); + zn.CheckIfManaged(value_var->GetType()); + zn.op_type = OP_VVV_I2_I3; } else { - ZOp op = no_loop_vars ? OP_NEXT_TABLE_ITER_NO_VARS_VV : OP_NEXT_TABLE_ITER_VV; - z = ZInstI(op, iter_slot, 0); - z.op_type = OP_VV_I1_I2; + ZOp op = no_loop_vars ? OP_NEXT_TABLE_ITER_NO_VARS_fb : OP_NEXT_TABLE_ITER_fb; + zn = ZInstI(op, iter_slot, 0); + zn.op_type = OP_VV_I1_I2; } - z.aux = aux; // so ZOpt.cc can get to it + // Need a separate instance of aux so the CFT info doesn't get shared with + // the loop init. We populate it with the loop_vars (only) because the + // optimizer needs access to those for (1) tracking their lifetime, and + // (2) remapping them (not strictly needed, see the comment in ReMapFrame()). + zn.aux = new ZInstAux(0); + zn.aux->loop_vars = aux->loop_vars; + AddCFT(&zn, CFT_LOOP); + AddCFT(&zn, CFT_LOOP_COND); - return FinishLoop(iter_head, z, body, iter_slot, true); + return FinishLoop(iter_head, zn, body, iter_slot, true); } const ZAMStmt ZAMCompiler::LoopOverVector(const ForStmt* f, const NameExpr* val) { @@ -755,7 +771,7 @@ const ZAMStmt ZAMCompiler::LoopOverVector(const ForStmt* f, const NameExpr* val) int iter_slot = num_step_iters++; - auto z = ZInstI(OP_INIT_VECTOR_LOOP_VV, FrameSlot(val), iter_slot); + auto z = ZInstI(OP_INIT_VECTOR_LOOP_Vs, FrameSlot(val), iter_slot); z.op_type = OP_VV_I2; auto init_end = AddInst(z); @@ -765,29 +781,31 @@ const ZAMStmt ZAMCompiler::LoopOverVector(const ForStmt* f, const NameExpr* val) if ( value_var ) { if ( slot >= 0 ) { - z = ZInstI(OP_NEXT_VECTOR_ITER_VAL_VAR_VVVV, slot, FrameSlot(value_var), iter_slot, 0); + z = ZInstI(OP_NEXT_VECTOR_ITER_VAL_VAR_VVsb, slot, FrameSlot(value_var), iter_slot, 0); z.op_type = OP_VVVV_I3_I4; } else { - z = ZInstI(OP_NEXT_VECTOR_BLANK_ITER_VAL_VAR_VVV, FrameSlot(value_var), iter_slot, 0); + z = ZInstI(OP_NEXT_VECTOR_BLANK_ITER_VAL_VAR_Vsb, FrameSlot(value_var), iter_slot, 0); z.op_type = OP_VVV_I2_I3; } - z.t = value_var->GetType(); - z.is_managed = ZVal::IsManagedType(z.t); + z.SetType(value_var->GetType()); } else { if ( slot >= 0 ) { - z = ZInstI(OP_NEXT_VECTOR_ITER_VVV, slot, iter_slot, 0); + z = ZInstI(OP_NEXT_VECTOR_ITER_Vsb, slot, iter_slot, 0); z.op_type = OP_VVV_I2_I3; } else { - z = ZInstI(OP_NEXT_VECTOR_BLANK_ITER_VV, iter_slot, 0); + z = ZInstI(OP_NEXT_VECTOR_BLANK_ITER_sb, iter_slot, 0); z.op_type = OP_VV_I1_I2; } } + AddCFT(&z, CFT_LOOP); + AddCFT(&z, CFT_LOOP_COND); + return FinishLoop(iter_head, z, f->LoopBody(), iter_slot, false); } @@ -802,12 +820,12 @@ const ZAMStmt ZAMCompiler::LoopOverString(const ForStmt* f, const Expr* e) { ZInstI z; if ( n ) { - z = ZInstI(OP_INIT_STRING_LOOP_VV, FrameSlot(n), iter_slot); + z = ZInstI(OP_INIT_STRING_LOOP_Vs, FrameSlot(n), iter_slot); z.op_type = OP_VV_I2; } else { ASSERT(c); - z = ZInstI(OP_INIT_STRING_LOOP_VC, iter_slot, c); + z = ZInstI(OP_INIT_STRING_LOOP_Cs, iter_slot, c); z.op_type = OP_VC_I1; } @@ -815,15 +833,18 @@ const ZAMStmt ZAMCompiler::LoopOverString(const ForStmt* f, const Expr* e) { auto iter_head = StartingBlock(); if ( loop_var->IsBlank() ) { - z = ZInstI(OP_NEXT_STRING_BLANK_ITER_VV, iter_slot, 0); + z = ZInstI(OP_NEXT_STRING_BLANK_ITER_sb, iter_slot, 0); z.op_type = OP_VV_I1_I2; } else { - z = ZInstI(OP_NEXT_STRING_ITER_VVV, FrameSlot(loop_var), iter_slot, 0); + z = ZInstI(OP_NEXT_STRING_ITER_Vsb, FrameSlot(loop_var), iter_slot, 0); z.op_type = OP_VVV_I2_I3; z.is_managed = true; } + AddCFT(&z, CFT_LOOP); + AddCFT(&z, CFT_LOOP_COND); + return FinishLoop(iter_head, z, f->LoopBody(), iter_slot, false); } @@ -832,7 +853,11 @@ const ZAMStmt ZAMCompiler::Loop(const Stmt* body) { PushBreaks(); auto head = StartingBlock(); - (void)CompileStmt(body); + auto b = CompileStmt(body); + + AddCFT(insts1[head.stmt_num], CFT_LOOP); + AddCFT(insts1[b.stmt_num], CFT_BLOCK_END); + auto tail = GoTo(GoToTarget(head)); ResolveNexts(GoToTarget(head)); @@ -845,11 +870,12 @@ const ZAMStmt ZAMCompiler::FinishLoop(const ZAMStmt iter_head, ZInstI& iter_stmt bool is_table) { auto loop_iter = AddInst(iter_stmt); auto body_end = CompileStmt(body); + AddCFT(insts1[body_end.stmt_num], CFT_BLOCK_END); // We only need cleanup for looping over tables, but for now we // need some sort of placeholder instruction (until the optimizer // can elide it) to resolve loop exits. - ZOp op = is_table ? OP_END_TABLE_LOOP_V : OP_NOP; + ZOp op = is_table ? OP_END_TABLE_LOOP_f : OP_NOP; auto loop_end = GoTo(GoToTarget(iter_head)); auto z = ZInstI(op, iter_slot); @@ -875,6 +901,12 @@ const ZAMStmt ZAMCompiler::CompileReturn(const ReturnStmt* r) { if ( retvars.empty() ) { // a "true" return if ( e ) { + if ( pf->ProfiledFunc()->Flavor() == FUNC_FLAVOR_HOOK ) { + ASSERT(e->GetType()->Tag() == TYPE_BOOL); + auto true_c = make_intrusive(val_mgr->True()); + return ReturnC(true_c.get()); + } + if ( e->Tag() == EXPR_NAME ) return ReturnV(e->AsNameExpr()); else @@ -970,7 +1002,7 @@ const ZAMStmt ZAMCompiler::CompileWhen(const WhenStmt* ws) { auto timeout = wi->TimeoutExpr(); auto lambda = NewSlot(true); - (void)BuildLambda(lambda, wi->Lambda().get()); + (void)BuildLambda(lambda, wi->Lambda()); std::vector local_aggr_slots; for ( auto& l : wi->WhenExprLocals() ) @@ -1006,8 +1038,7 @@ const ZAMStmt ZAMCompiler::CompileWhen(const WhenStmt* ws) { if ( ws->IsReturn() ) { (void)AddInst(z); - z = ZInstI(OP_RETURN_C); - z.c = ZVal(); + z = ZInstI(OP_WHEN_RETURN_X); } return AddInst(z); diff --git a/src/script_opt/ZAM/Vars.cc b/src/script_opt/ZAM/Vars.cc index 3994c12f2d..722da4e158 100644 --- a/src/script_opt/ZAM/Vars.cc +++ b/src/script_opt/ZAM/Vars.cc @@ -40,7 +40,7 @@ void ZAMCompiler::LoadParam(const ID* id) { ZOp op; - op = AssignmentFlavor(OP_LOAD_VAL_VV, id->GetType()->Tag()); + op = AssignmentFlavor(OP_LOAD_VAL_Vi, id->GetType()->Tag()); int slot = AddToFrame(id); @@ -57,9 +57,9 @@ const ZAMStmt ZAMCompiler::LoadGlobal(const ID* id) { if ( id->IsType() ) // Need a special load for these, as they don't fit // with the usual template. - op = OP_LOAD_GLOBAL_TYPE_VV; + op = OP_LOAD_GLOBAL_TYPE_Vg; else - op = AssignmentFlavor(OP_LOAD_GLOBAL_VV, id->GetType()->Tag()); + op = AssignmentFlavor(OP_LOAD_GLOBAL_Vg, id->GetType()->Tag()); auto slot = RawSlot(id); @@ -78,13 +78,14 @@ const ZAMStmt ZAMCompiler::LoadCapture(const ID* id) { ZOp op; if ( ZVal::IsManagedType(id->GetType()) ) - op = OP_LOAD_MANAGED_CAPTURE_VV; + op = OP_LOAD_MANAGED_CAPTURE_Vi; else - op = OP_LOAD_CAPTURE_VV; + op = OP_LOAD_CAPTURE_Vi; auto slot = RawSlot(id); ZInstI z(op, slot, CaptureOffset(id)); + z.SetType(id->GetType()); z.op_type = OP_VV_I2; return AddInst(z, true); diff --git a/src/script_opt/ZAM/ZInst.cc b/src/script_opt/ZAM/ZInst.cc index f3a24812b9..1f0539a50b 100644 --- a/src/script_opt/ZAM/ZInst.cc +++ b/src/script_opt/ZAM/ZInst.cc @@ -230,16 +230,16 @@ ValPtr ZInst::ConstVal() const { bool ZInst::IsLoopIterationAdvancement() const { switch ( op ) { - case OP_NEXT_TABLE_ITER_VV: - case OP_NEXT_TABLE_ITER_NO_VARS_VV: - case OP_NEXT_TABLE_ITER_VAL_VAR_VVV: - case OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_VVV: - case OP_NEXT_VECTOR_ITER_VVV: - case OP_NEXT_VECTOR_BLANK_ITER_VV: - case OP_NEXT_VECTOR_ITER_VAL_VAR_VVVV: - case OP_NEXT_VECTOR_BLANK_ITER_VAL_VAR_VVV: - case OP_NEXT_STRING_ITER_VVV: - case OP_NEXT_STRING_BLANK_ITER_VV: return true; + case OP_NEXT_TABLE_ITER_fb: + case OP_NEXT_TABLE_ITER_NO_VARS_fb: + case OP_NEXT_TABLE_ITER_VAL_VAR_Vfb: + case OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_Vfb: + case OP_NEXT_VECTOR_ITER_Vsb: + case OP_NEXT_VECTOR_BLANK_ITER_sb: + case OP_NEXT_VECTOR_ITER_VAL_VAR_VVsb: + case OP_NEXT_VECTOR_BLANK_ITER_VAL_VAR_Vsb: + case OP_NEXT_STRING_ITER_Vsb: + case OP_NEXT_STRING_BLANK_ITER_sb: return true; default: return false; } @@ -282,7 +282,7 @@ bool ZInst::AssignsToSlot1() const { bool ZInst::AssignsToSlot(int slot) const { switch ( op ) { - case OP_NEXT_VECTOR_ITER_VAL_VAR_VVVV: return slot == 1 || slot == 2; + case OP_NEXT_VECTOR_ITER_VAL_VAR_VVsb: return slot == 1 || slot == 2; default: return slot == 1 && AssignsToSlot1(); } @@ -351,7 +351,7 @@ TraversalCode ZInstAux::Traverse(TraversalCallback* cb) const { HANDLE_TC_STMT_PRE(tc); } - for ( auto& lvt : loop_var_types ) { + for ( auto& lvt : types ) { tc = lvt->Traverse(cb); HANDLE_TC_STMT_PRE(tc); } @@ -437,8 +437,9 @@ string ZInstI::VName(int n, const FrameMap* frame_ids, const FrameReMap* remappi bool ZInstI::DoesNotContinue() const { switch ( op ) { - case OP_GOTO_V: + case OP_GOTO_b: case OP_HOOK_BREAK_X: + case OP_WHEN_RETURN_X: case OP_RETURN_C: case OP_RETURN_V: case OP_RETURN_X: return true; @@ -476,7 +477,7 @@ bool ZInstI::IsDirectAssignment() const { bool ZInstI::HasCaptures() const { switch ( op ) { - case OP_LAMBDA_VV: + case OP_LAMBDA_Vi: case OP_WHEN_V: case OP_WHEN_TIMEOUT_VV: case OP_WHEN_TIMEOUT_VC: return true; @@ -636,7 +637,7 @@ void ZInstI::UpdateSlots(std::vector& slot_mapping) { } bool ZInstI::IsGlobalLoad() const { - if ( op == OP_LOAD_GLOBAL_TYPE_VV ) + if ( op == OP_LOAD_GLOBAL_TYPE_Vg ) // These don't have flavors. return true; @@ -645,7 +646,7 @@ bool ZInstI::IsGlobalLoad() const { if ( global_ops.empty() ) { // Initialize the set. for ( int t = 0; t < NUM_TYPES; ++t ) { TypeTag tag = TypeTag(t); - ZOp global_op_flavor = AssignmentFlavor(OP_LOAD_GLOBAL_VV, tag, false); + ZOp global_op_flavor = AssignmentFlavor(OP_LOAD_GLOBAL_Vg, tag, false); if ( global_op_flavor != OP_NOP ) global_ops.insert(global_op_flavor); @@ -655,11 +656,11 @@ bool ZInstI::IsGlobalLoad() const { return global_ops.count(op) > 0; } -bool ZInstI::IsCaptureLoad() const { return op == OP_LOAD_CAPTURE_VV || op == OP_LOAD_MANAGED_CAPTURE_VV; } +bool ZInstI::IsCaptureLoad() const { return op == OP_LOAD_CAPTURE_Vi || op == OP_LOAD_MANAGED_CAPTURE_Vi; } void ZInstI::InitConst(const ConstExpr* ce) { auto v = ce->ValuePtr(); - t = ce->GetType(); + SetType(ce->GetType()); c = ZVal(v, t); if ( ZAM_error ) diff --git a/src/script_opt/ZAM/ZInst.h b/src/script_opt/ZAM/ZInst.h index 35ed4fc233..3cf5828f3d 100644 --- a/src/script_opt/ZAM/ZInst.h +++ b/src/script_opt/ZAM/ZInst.h @@ -127,9 +127,19 @@ public: // Meta-data associated with the execution. +protected: + // These are protected to ensure that setting 't' is done via SetType(), + // so we can keep is_managed consistent with it. We don't need that + // for 't2' but keep them together for consistency. + // Type, usually for interpreting the constant. - TypePtr t = nullptr; - TypePtr t2 = nullptr; // just a few ops need two types + TypePtr t; + + TypePtr t2; // just a few ops need two types + +public: + const TypePtr& GetType() const { return t; } + const TypePtr& GetType2() const { return t2; } // Auxiliary information. We could in principle use this to // consolidate a bunch of the above, though at the cost of @@ -143,7 +153,7 @@ public: // Whether v1 represents a frame slot type for which we // explicitly manage the memory. - bool is_managed = false; + std::optional is_managed; }; // A intermediary ZAM instruction, one that includes information/methods @@ -217,7 +227,7 @@ public: // True if this instruction always branches elsewhere. Different // from DoesNotContinue() in that returns & hook breaks do not // continue, but they are not branches. - bool IsUnconditionalBranch() const { return op == OP_GOTO_V; } + bool IsUnconditionalBranch() const { return op == OP_GOTO_b; } // True if this instruction is of the form "v1 = v2". bool IsDirectAssignment() const; @@ -257,19 +267,19 @@ public: bool IsLoad() const { return op_type == OP_VV_FRAME || IsNonLocalLoad(); } // True if the instruction corresponds to storing a global. - bool IsGlobalStore() const { return op == OP_STORE_GLOBAL_V; } + bool IsGlobalStore() const { return op == OP_STORE_GLOBAL_g; } - void CheckIfManaged(const TypePtr& t) { - if ( ZVal::IsManagedType(t) ) - is_managed = true; - } + void CheckIfManaged(const TypePtr& t) { is_managed = ZVal::IsManagedType(t); } void SetType(TypePtr _t) { t = std::move(_t); + ASSERT(t); if ( t ) CheckIfManaged(t); } + void SetType2(TypePtr _t) { t2 = std::move(_t); } + // Whether the instruction should be included in final code // generation. bool live = true; @@ -340,6 +350,21 @@ public: return zv; } + // The same, but for read-only access for which memory-management is + // not required. + const ZVal& ToDirectZVal(const ZVal* frame) const { + if ( c ) + return zc; + if ( i >= 0 ) + return frame[i]; + + // Currently the way we use AuxElem's we shouldn't get here, but + // just in case we do, return something sound rather than mis-indexing + // the frame. + static ZVal null_zval; + return null_zval; + } + int Slot() const { return i; } int IntVal() const { return i; } const ValPtr& Constant() const { return c; } @@ -453,11 +478,8 @@ public: AuxElem* elems = nullptr; bool elems_has_slots = true; - // Ingredients associated with lambdas ... - ScriptFuncPtr primary_func; - - // ... and its name. - std::string lambda_name; + // Info for constructing lambdas. + LambdaExprPtr lambda; // For "when" statements. std::shared_ptr wi; @@ -466,11 +488,11 @@ public: std::unique_ptr* cat_args = nullptr; // Used for accessing function names. - IDPtr id_val = nullptr; + IDPtr id_val; // Interpreter call expression associated with this instruction, // for error reporting and stack backtraces. - CallExprPtr call_expr = nullptr; + CallExprPtr call_expr; // Used for direct calls. Func* func = nullptr; @@ -555,8 +577,8 @@ extern std::unordered_map> assignment_flav // value is superfluous. extern std::unordered_map assignmentless_op; -// Maps flavorful assignments to what op-type their non-assignment +// Maps flavorful assignments to what operand class their non-assignment // counterpart uses. -extern std::unordered_map assignmentless_op_type; +extern std::unordered_map assignmentless_op_class; } // namespace zeek::detail diff --git a/src/script_opt/ZAM/ZOp.cc b/src/script_opt/ZAM/ZOp.cc index 161276f2cc..d91ad8b713 100644 --- a/src/script_opt/ZAM/ZOp.cc +++ b/src/script_opt/ZAM/ZOp.cc @@ -57,7 +57,7 @@ bool op_side_effects[] = { std::unordered_map> assignment_flavor; std::unordered_map assignmentless_op; -std::unordered_map assignmentless_op_type; +std::unordered_map assignmentless_op_class; ZOp AssignmentFlavor(ZOp orig, TypeTag tag, bool strict) { static bool did_init = false; diff --git a/src/script_opt/ZAM/ZOp.h b/src/script_opt/ZAM/ZOp.h index 12cb0c4118..72ae94ddd3 100644 --- a/src/script_opt/ZAM/ZOp.h +++ b/src/script_opt/ZAM/ZOp.h @@ -4,6 +4,8 @@ #pragma once +#include + namespace zeek::detail { // Opcodes associated with ZAM instructions. @@ -59,6 +61,16 @@ enum ZAMOp1Flavor { OP1_INTERNAL, // we're doing some internal manipulation of the slot }; +// Used to describe ZAM instructions for validation. +struct ZAMInstDesc { + std::string op_class; // associated class + std::string op_types; // operand types + std::string op_eval; // evaluation +}; + +// Provides access to the validation description of each operation. +extern std::unordered_map zam_inst_desc; + // Maps an operand to its flavor. extern ZAMOp1Flavor op1_flavor[]; From e94764982d02406faa35240cc03f308eee2f9de9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 17:57:43 +0100 Subject: [PATCH 17/33] factoring of ZAM operation specifications into separate files --- auxil/gen-zam | 2 +- src/CMakeLists.txt | 18 +- src/script_opt/ZAM/OPs/README.txt | 286 ++ src/script_opt/ZAM/OPs/ZAM.op | 3368 -------------------- src/script_opt/ZAM/OPs/ZBI.op | 627 ++++ src/script_opt/ZAM/OPs/aggr-assignments.op | 89 + src/script_opt/ZAM/OPs/binary-exprs.op | 104 + src/script_opt/ZAM/OPs/calls.op | 180 ++ src/script_opt/ZAM/OPs/coercions.op | 151 + src/script_opt/ZAM/OPs/constructors.op | 251 ++ src/script_opt/ZAM/OPs/indexing.op | 212 ++ src/script_opt/ZAM/OPs/internal.op | 124 + src/script_opt/ZAM/OPs/iterations.op | 124 + src/script_opt/ZAM/OPs/macros.op | 74 + src/script_opt/ZAM/OPs/non-uniform.op | 267 ++ src/script_opt/ZAM/OPs/rel-exprs.op | 55 + src/script_opt/ZAM/OPs/script-idioms.op | 57 + src/script_opt/ZAM/OPs/stmts.op | 339 ++ src/script_opt/ZAM/OPs/unary-exprs.op | 181 ++ 19 files changed, 3139 insertions(+), 3370 deletions(-) create mode 100644 src/script_opt/ZAM/OPs/README.txt delete mode 100644 src/script_opt/ZAM/OPs/ZAM.op create mode 100644 src/script_opt/ZAM/OPs/ZBI.op create mode 100644 src/script_opt/ZAM/OPs/aggr-assignments.op create mode 100644 src/script_opt/ZAM/OPs/binary-exprs.op create mode 100644 src/script_opt/ZAM/OPs/calls.op create mode 100644 src/script_opt/ZAM/OPs/coercions.op create mode 100644 src/script_opt/ZAM/OPs/constructors.op create mode 100644 src/script_opt/ZAM/OPs/indexing.op create mode 100644 src/script_opt/ZAM/OPs/internal.op create mode 100644 src/script_opt/ZAM/OPs/iterations.op create mode 100644 src/script_opt/ZAM/OPs/macros.op create mode 100644 src/script_opt/ZAM/OPs/non-uniform.op create mode 100644 src/script_opt/ZAM/OPs/rel-exprs.op create mode 100644 src/script_opt/ZAM/OPs/script-idioms.op create mode 100644 src/script_opt/ZAM/OPs/stmts.op create mode 100644 src/script_opt/ZAM/OPs/unary-exprs.op diff --git a/auxil/gen-zam b/auxil/gen-zam index 610cf8527d..cfc0c7b9de 160000 --- a/auxil/gen-zam +++ b/auxil/gen-zam @@ -1 +1 @@ -Subproject commit 610cf8527dad7033b971595a1d556c2c95294f2b +Subproject commit cfc0c7b9de63f44419c2a57040ae6b7081a66a33 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c7ae4f183c..5a623745f6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -162,7 +162,23 @@ list(APPEND BINPAC_OUTPUTS "${BINPAC_OUTPUT_CC}") include(Gen-ZAM) set(GEN_ZAM_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/OPs) -set(GEN_ZAM_SRC ${GEN_ZAM_SRC_DIR}/ZAM.op) +set(ZAM_OP_SRCS + ${GEN_ZAM_SRC_DIR}/aggr-assignments.op + ${GEN_ZAM_SRC_DIR}/binary-exprs.op + ${GEN_ZAM_SRC_DIR}/calls.op + ${GEN_ZAM_SRC_DIR}/coercions.op + ${GEN_ZAM_SRC_DIR}/constructors.op + ${GEN_ZAM_SRC_DIR}/indexing.op + ${GEN_ZAM_SRC_DIR}/internal.op + ${GEN_ZAM_SRC_DIR}/iterations.op + ${GEN_ZAM_SRC_DIR}/macros.op + ${GEN_ZAM_SRC_DIR}/non-uniform.op + ${GEN_ZAM_SRC_DIR}/rel-exprs.op + ${GEN_ZAM_SRC_DIR}/script-idioms.op + ${GEN_ZAM_SRC_DIR}/stmts.op + ${GEN_ZAM_SRC_DIR}/unary-exprs.op + ${GEN_ZAM_SRC_DIR}/ZBI.op) +set(GEN_ZAM_SRC ${ZAM_OP_SRCS}) gen_zam_target(${GEN_ZAM_SRC_DIR}) diff --git a/src/script_opt/ZAM/OPs/README.txt b/src/script_opt/ZAM/OPs/README.txt new file mode 100644 index 0000000000..c8174b5c2c --- /dev/null +++ b/src/script_opt/ZAM/OPs/README.txt @@ -0,0 +1,286 @@ +# See the file "COPYING" in the main distribution directory for copyright. + +# This directory contains templates used to generate virtual functions, opcodes, +# and evaluation code for compiled code. Each template describes a ZAM +# "operation", which generally corresponds to a set of concrete ZAM +# "instructions". (See ZInst.h for the layout of ZAM instructions.) Often +# a single ZAM operation gives rise to a family of instructions that differ +# in either the nature of the instruction's operands (typically, whether +# they are variables residing on the ZAM execution frame, or constants) +# and/or the Zeek type of the operands (e.g., "count" or "double" or "addr"). +# +# The Gen-ZAM utility processes this file to generate numerous C++ inclusion +# files that are then compiled into Zeek. These files span the range of (1) +# hooks that enable run-time generation of ZAM code to execute ASTs (which +# have first been transformed to "reduced" form), (2) specifications of the +# properties of the different instructions, (3) code to evaluate (execute) +# each instruction, and (4) macros (C++ #define's) to aid in writing that +# code. See Gen-ZAM.h for a list of the different inclusion files. +# +# Operation templates are declarative, other than the imperative C++ snippets +# they include for instruction evaluation/execution. You specify a template +# using lines of text for which, for the most part, the first word on the +# line designates an "attribute" associated with the template, and the +# remainder of the line provides specifiers/arguments for that attribute. +# A blank line (or end of file) ends the template. By convention, for +# templates that include C++ evaluation snippets, those are specified as the +# last attribute. Comments begin with '#' at the start of the line (no +# leading whitespace allowed), and can be intermingled with a template's +# attributes. +# +# Each ZAM instruction includes up to 4 integer values and one constant +# (specified as a ZVal). Often, the integer values are interpreted as offsets +# ("slots") into the ZAM execution "frame", though sometimes they have other +# meanings, such as the offset of a particular field in a record, or an index +# into the ZAM code for a branch instruction. Most instructions compute +# some sort of result (expressed as a ZVal) that is stored into the frame +# slot specified by the instruction's first integer value. We refer to this +# target as the "assignment slot", and to the other 3 integer values as +# "operands". Thus, for example, an instruction with two operands used the +# first 3 integer values, the first as the assignment slot and the other two +# for computing the result to put in that slot. +# +# Instruction templates have one or more "type"s associated with them (as +# discussed below) specifying the types of operands (variables corresponding +# to slots, or constants) associated with the instruction. In the evaluation +# code for an instruction, these are referred to with $-parameters, such as +# $1 for the first operand. The special parameter $$ refers to the *assignment +# target* of the instruction, if applicable. These parameters always come +# first when specifying an instruction's type. For example, a type of "VVC" +# specifies an instruction with two variables and one constant associated +# with it. If the instruction assigns a value, then in the evaluation these +# will be specified as $$, $1 and $2, respectively. If it does not (usually +# reflected by the template having the "op1-read" attribute) then they +# are specified as $1, $2 and $3, respectively. See "eval" below. +# +# The first attribute of each template states the type of operation specified +# in the template, along with the name of the operation. The possible types +# are: +# +# op an operation that generally corresponds to a single ZAM +# instruction, and is fully specified +# +# expr-op an operation corresponding to an AST expression node +# (some sort of Expr object). Gen-ZAM generates code for +# automatically converting Expr objects to ZAM instructions. +# The name of the operation must match that used in the AST +# tag, so for example for "expr-op Foo" there must be a +# corresponding "EXPR_FOO" tag. +# +# unary-expr-op an expr-op for a unary Expr object +# binary-expr-op an expr-op for a binary Expr object +# rel-expr-op an expr-op for a (binary) Expr object that +# represents a relational operation +# +# assign-op directly assigning either a ZVal or a record field +# to either a frame slot or a record field +# +# unary-op an operation with one operand that requires special +# treatment that doesn't fit with how unary-expr-op's +# are expressed +# +# direct-unary-op an operation with one operand that corresponds to +# a specific ZAMCompiler method for generating its +# instruction +# +# internal-op similar to "op", but for ZAM instructions only used +# internally, and thus not having any AST counterpart +# internal-assignment-op the same, for operations that assign ZVals +# produced by loading interpreter variables +# or calling functions +# +# After specifying the type of operation, you list additional attributes to +# fill out the template, ending by convention with the C++ evaluation snippet +# (if appropriate). The most significant (and complex) of these are: +# +# class specifies how to interpret the operation in terms of ZAM +# instruction slots (and constant). The specification is +# in terms of single-letter mnemonics for the different +# possible classes: +# +# F special value designating a record field being +# assigned to +# H event handler +# L list of values +# O opaque value (here, "opaque" refers to ZAM +# internals, not OpaqueVal) +# R record +# V variable (frame slot) +# X used to indicate an empty specifier +# b branch target +# f iteration information associated with table "for" loop +# g access to a global +# i integer constant, often a record field offset +# s iteration information associated with stepping +# through a vector or string +# +# The full specification consists of concatenating mnemonics +# with the order left-to-right corresponding to each of the +# instruction's 4 integer values (stopping with the last one +# used). If the operation includes a constant, then it is +# listed at the point reflecting where the constant is used as +# an operand. For example, a class of "VVCV" means that the +# first integer is used as a frame variable (i.e., the usual +# "assignment slot"), the second integer (first "operand") is +# also a frame variable, the second operand is the instruction's +# constant, and the third operand is the instruction's third +# integer value, with the fourth integer value not being used. +# +# classes specifies a number of "class" values to instantiate over. +# Cannot be combined with "class", nor used for expressions. +# +# op-type for some form of expr-op, specifies to which Zeek scripting +# types the expression applies: +# +# A addr +# D double +# F file +# I int +# N subnet +# P pattern +# R record +# S string +# T table +# U count +# V vector +# +# along with two special types: 'X' indicates that Gen-ZAM +# should not iterate over any possible values, and '*' +# indicates that Gen-ZAM should additionally iterate over +# all of possible values not explicitly listed (used in +# conjunction with eval-type - see below) +# +# op-types similar to op-type, but lists a type for each operand +# (including assignment target), so for example "A N A" +# would correspond to a 3-operand instruction for which +# the first operand (or assignment target) is an "addr", +# the second a "subnet", and the third another "addr". +# +# Note that these types collectively apply to each instance of +# an operation, whereas listing multiple "op-type" types +# iterates through those one-at-a-time in turn (and generally +# the point is that the each type applies to *all* operands, +# rather than a per-operand list). Given that, the two are +# incompatible. +# +# For operands corresponding to 'i' or any of the internal types, +# such as 'b', 'f', 'g', and 's', the corresponding type to +# list is 'I', used for integer access. +# +# eval specifies a block of C++ code used to evaluation the +# execution of the instruction. The block begins with the +# remainder of the "eval" line and continues until either a +# blank line or a line that starts with non-whitespace. +# +# Blocks can include special '$' parameters that Gen-ZAM +# automatically expands. "$1" refers to an operation's first +# operand, "$2" to its second, etc. "$$" refers to the +# operation's assignment target. +# +# For simple expr-op's you can express the block as simply +# the C++ expression to compute. For example, for multiplication +# (named "Times"), the "eval" block is simply "$1 * $2", +# rather than "$$ = $1 * $2"; Gen-ZAM knows to expand it +# accordingly. +# +# Finally, to help with avoiding duplicate code, you can +# define macros that expand to code snippets you want to use +# in multiple places. You specify these using a "macro" +# keyword followed by the name of the macro and an evaluation +# block. Macros behave identically to C++ #define's, except +# you don't use "\" to continue them across line breaks, but +# instead just indent the lines you want included, ending +# (as with "eval" blocks) with an empty line or a line that +# starts with non-whitespace. +# +# We list the remaining types of attributes alphabetically. Note that some +# only apply to certain types of operations. +# +# assign-val for an assignment operation, the name of the +# C++ variable that holds the value to assign +# +# custom-method a ZAMCompiler method that Gen-ZAM should use for +# this operation, rather than generating one +# +# eval-mixed an expression "eval" block that applies to two +# different op-type's +# +# eval-type evaluation code associated with one specific op-type +# +# explicit-result-type the operation's evaluation yields a ZVal +# rather than a low-level C++ type +# +# field-op the operation is a direct assignment to a record field +# +# includes-field-op the operation should include a version +# that assigns to a record field as well as a +# version for assigning to a frame variable +# +# indirect-call the operation represents an indirect call (through +# a global variable, rather than directly). Only +# meaningful if num-call-args is also specified. +# +# indirect-local-call same, but via a local variable rather than +# global +# +# method-post C++ code to add to the end of the method that +# dynamically generates ZAM code +# +# no-const do not generate a version of the unary-expr-op +# where the operand is a constant +# +# no-eval this operation does not have an "eval" block +# (because it will be translated instead into internal +# operations) +# +# num-call-args indicates that the operation is a function call, +# and specifies how many arguments the call takes. +# A specification of 'n' means "build a ZAM instruction +# for calling with an arbitrary number of arguments". +# +# op1-internal states that the operation's treatment of the +# instruction's first integer value is for internal +# purposes; the value does not correspond to a frame +# variable +# +# op1-read the operation treats the instruction's first integer +# value as a frame variable, but only reads the value. +# (The default is that the frame variable is written +# to but not read.) +# +# op1-read-write the operation treats the instruction's first integer +# value as a frame variable, and both reads and +# writes the value. +# +# precheck a test conducted before evaluating an expression, +# which is skipped if the test is true. Must be used +# in conjunction with precheck-action. +# +# precheck-action code to execute if a precheck is true, instead +# of evaluating the expression. Must be used in +# conjunction with precheck. +# +# set-type the instruction's primary type comes from either the +# assignment target ("$$"), the first operand ("$1"), +# or the second operand ("$2") +# +# set-type2 the same as set-type but for the instruction's +# secondary type +# +# side-effects the operation has side-effects, so even if its +# assignment target winds up being "dead" (the value is +# no longer used), the operation should still occur. +# Optionally, this attribute can include two arguments +# specifying the ZAM opcode to use if the assignment +# is dead, and the internal "type" of that opcode. +# +# For example, "side-effects OP_CALL1_V OP_V" means +# "this operation has side-effects; if eliminating +# its assignment, change the ZAM op-code to OP_CALL1_V, +# which has an internal type of OP_V". +# +# vector generate a version of the operation that takes +# vectors as operands +# +# Finally, a note concernning comments: due to internal use of C++ #define +# macros, comments in C++ code should use /* ... */ rather than // delimiters. diff --git a/src/script_opt/ZAM/OPs/ZAM.op b/src/script_opt/ZAM/OPs/ZAM.op deleted file mode 100644 index 6b18b4156e..0000000000 --- a/src/script_opt/ZAM/OPs/ZAM.op +++ /dev/null @@ -1,3368 +0,0 @@ -# See the file "COPYING" in the main distribution directory for copyright. - -# This file contains templates used to generate virtual functions, opcodes, -# and evaluation code for compiled code. Each template describes a ZAM -# "operation", which generally corresponds to a set of concrete ZAM -# "instructions". (See ZInst.h for the layout of ZAM instructions.) Often -# a single ZAM operation gives rise to a family of instructions that differ -# in either the nature of the instruction's operands (typically, whether -# they are variables residing on the ZAM execution frame, or constants) -# and/or the Zeek type of the operands (e.g., "count" or "double" or "addr"). -# -# The Gen-ZAM utility processes this file to generate numerous C++ inclusion -# files that are then compiled into Zeek. These files span the range of (1) -# hooks that enable run-time generation of ZAM code to execute ASTs (which -# have first been transformed to "reduced" form), (2) specifications of the -# properties of the different instructions, (3) code to evaluate (execute) -# each instruction, and (4) macros (C++ #define's) to aid in writing that -# code. See Gen-ZAM.h for a list of the different inclusion files. -# -# Operation templates are declarative, other than the imperative C++ snippets -# they include for instruction evaluation/execution. You specify a template -# using lines of text for which, for the most part, the first word on the -# line designates an "attribute" associated with the template, and the -# remainder of the line provides specifiers/arguments for that attribute. -# A blank line (or end of file) ends the template. By convention, for -# templates that include C++ evaluation snippets, those are specified as the -# last attribute. Comments begin with '#' at the start of the line (no -# leading whitespace allowed), and can be intermingled with a template's -# attributes. -# -# Each ZAM instruction includes up to 4 integer values and one constant -# (specified as a ZVal). Often, the integer values are interpreted as offsets -# ("slots") into the ZAM execution "frame", though sometimes they have other -# meanings, such as the offset of a particular field in a record, or an index -# into the ZAM code for a branch instruction. Most instructions compute -# some sort of result (expressed as a ZVal) that is stored into the frame -# slot specified by the instruction's first integer value. We refer to this -# target as the "assignment slot", and to the other 3 integer values as -# "operands". Thus, for example, an instruction with two operands used the -# first 3 integer values, the first as the assignment slot and the other two -# for computing the result to put in that slot. -# -# The first attribute of each template states the type of operation specified -# in the template, along with the name of the operation. The possible types -# are: -# -# op an operation that generally corresponds to a single ZAM -# instruction, and is fully specified -# -# expr-op an operation corresponding to an AST expression node -# (some sort of Expr object). Gen-ZAM generates code for -# automatically converting Expr objects to ZAM instructions. -# The name of the operation must match that used in the AST -# tag, so for example for "expr-op Foo" there must be a -# corresponding "EXPR_FOO" tag. -# -# unary-expr-op an expr-op for a unary Expr object -# binary-expr-op an expr-op for a binary Expr object -# rel-expr-op an expr-op for a (binary) Expr object that -# represents a relational operation -# -# assign-op directly assigning either a ZVal or a record field -# to either a frame slot or a record field -# -# unary-op an operation with one operand that requires special -# treatment that doesn't fit with how unary-expr-op's -# are expressed -# -# direct-unary-op an operation with one operand that corresponds to -# a specific ZAMCompiler method for generating its -# instruction -# -# internal-op similar to "op", but for ZAM instructions only used -# internally, and thus not having any AST counterpart -# internal-binary-op the same, for operations that take two operands -# internal-assignment-op the same, for operations that assign ZVals -# produced by loading interpreter variables -# or calling functions -# -# After specifying the type of operation, you list additional attributes to -# fill out the template, ending by convention with the C++ evaluation snippet -# (if appropriate). The most significant (and complex) of these are: -# -# type specifies how to interpret the operation in terms of ZAM -# instruction slots (and constant). The specification is -# in terms of single-letter mnemonics for the different -# possible types: -# -# F special value designating a record field being -# assigned to -# H event handler -# L list of values -# O opaque value (here, "opaque" refers to ZAM -# internals, not OpaqueVal) -# R record -# V variable (frame slot) -# X used to indicate an empty specifier -# i integer constant, often a record field offset -# -# The full specification consists of concatenating mnemonics -# with the order left-to-right corresponding to each of the -# instruction's 4 integer values (stopping with the last one -# used). If the operation includes a constant, then it is -# listed at the point reflecting where the constant is used as -# an operand. For example, a type of "VVCV" means that the -# first integer is used as a frame variable (i.e., the usual -# "assignment slot"), the second integer (first "operand") is -# also a frame variable, the second operand is the instruction's -# constant, and the third operand is the instruction's third -# integer value, with the fourth integer value not being used. -# -# op-type for some form of expr-op, specifies to which Zeek scripting -# types the expression applies: -# -# A addr -# D double -# F file -# I int -# N subnet -# P pattern -# S string -# T table -# U count -# V vector -# -# along with two special types: 'X' indicates that Gen-ZAM -# should not iterate over any possible values, and '*' -# indicates that Gen-ZAM should additionally iterate over -# all of possible values not explicitly listed (used in -# conjunction with eval-type - see below) -# -# eval specifies a block of C++ code used to evaluation the -# execution of the instruction. The block begins with the -# remainder of the "eval" line and continues until either a -# blank line or a line that starts with non-whitespace. -# -# Blocks can include special '$' parameters that Gen-ZAM -# automatically expands. "$1" refers to an operation's first -# operand, "$2" to its second, etc. "$$" refers to the -# operation's assignment target. -# -# For simple expr-op's you can express the block as simply -# the C++ expression to compute. For example, for multiplication -# (named "Times"), the "eval" block is simply "$1 * $2", -# rather than "$$ = $1 * $2"; Gen-ZAM knows to expand it -# accordingly. -# -# Finally, to help with avoiding duplicate code, you can -# define macros that expand to code snippets you want to use -# in multiple places. You specify these using a "macro" -# keyword followed by the name of the macro and an evaluation -# block. Macros behave identically to C++ #define's, except -# you don't use "\" to continue them across line breaks, but -# instead just indent the lines you want included, ending -# (as with "eval" blocks) with an empty line or a line that -# starts with non-whitespace. -# -# We list the remaining types of attributes alphabetically. Note that some -# only apply to certain types of operations. -# -# assign-val for an assignment operation, the name of the -# C++ variable that holds the value to assign -# -# custom-method a ZAMCompiler method that Gen-ZAM should use for -# this operation, rather than generating one -# -# eval-mixed an expression "eval" block that applies to two -# different op-type's -# -# eval-pre code to add to the beginning of the "eval" block. -# This can be required for operations where Gen-ZAM -# generates elements of the C++ (such as for expr-op's). -# -# eval-type evaluation code associated with one specific op-type -# -# explicit-result-type the operation's evaluation yields a ZVal -# rather than a low-level C++ type -# -# field-op the operation is a direct assignment to a record field -# -# includes-field-op the operation should include a version -# that assigns to a record field as well as a -# version for assigning to a frame variable -# -# indirect-call the operation represents an indirect call (through -# a global variable, rather than directly). Only -# meaningful if num-call-args is also specified. -# -# indirect-local-call same, but via a local variable rather than -# global -# -# method-post C++ code to add to the end of the method that -# dynamically generates ZAM code -# -# no-const do not generate a version of the unary-expr-op -# where the operand is a constant -# -# no-eval this operation does not have an "eval" block -# (because it will be translated instead into internal -# operations) -# -# num-call-args indicates that the operation is a function call, -# and specifies how many arguments the call takes. -# A specification of 'n' means "build a ZAM instruction -# for calling with an arbitrary number of arguments". -# -# op-accessor tells Gen-ZAM what ZVal accessor to use to get to -# the underlying values of the operand(s) -# -# op1-accessor the same as op-accessor except only for the first -# operand -# -# op1-internal states that the operation's treatment of the -# instruction's first integer value is for internal -# purposes; the value does not correspond to a frame -# variable -# -# op1-read the operation treats the instruction's first integer -# value as a frame variable, but only reads the value. -# (The default is that the frame variable is written -# to but not read.) -# -# op1-read-write the operation treats the instruction's first integer -# value as a frame variable, and both reads and -# writes the value. -# -# op2-accessor the same as op-accessor except only for the second -# operand -# -# set-type the instruction's primary type comes from either the -# assignment target ("$$"), the first operand ("$1"), -# or the second operand ("$2") -# -# set-type2 the same as set-type but for the instruction's -# secondary type -# -# side-effects the operation has side-effects, so even if its -# assignment target winds up being "dead" (the value is -# no longer used), the operation should still occur. -# Optionally, this attribute can include two arguments -# specifying the ZAM opcode to use if the assignment -# is dead, and the internal "type" of that opcode. -# -# For example, "side-effects OP_CALL1_V OP_V" means -# "this operation has side-effects; if eliminating -# its assignment, change the ZAM op-code to OP_CALL1_V, -# which has an internal type of OP_V". -# -# vector generate a version of the operation that takes -# vectors as operands - - -# The following abstracts the process of creating a frame-assignable value. -macro BuildVal(v, t) ZVal(v, t) - -# Returns a memory-managed-if-necessary copy of an existing value. -macro CopyVal(v) (ZVal::IsManagedType(z.t) ? BuildVal((v).ToVal(z.t), z.t) : (v)) - -# Managed assignments to frame[s.v1]. -macro AssignV1T(v, t) { - if ( z.is_managed ) - { - /* It's important to hold a reference to v here prior - to the deletion in case frame[z.v1] points to v. */ - auto v2 = v; - ZVal::DeleteManagedType(frame[z.v1]); - frame[z.v1] = v2; - } - else - frame[z.v1] = v; - } -# Convenience macro for when the value of the assigned type comes from -# the instruction. -macro AssignV1(v) AssignV1T(v, z.t) - -macro BRANCH(target_slot) { DO_ZAM_PROFILE; pc = z.target_slot; continue; } - -########## Unary Ops ########## - -# Direct assignment of an existing value. -assign-op Assign -type V - -# The same, but where the assignment target (LHS) is a record field. -assign-op Field-LHS-Assign -op1-read -type F - -unary-expr-op Clone -no-const -op-type X -set-type $$ -set-type2 $1 -eval auto v = frame[z.v2].ToVal(z.t2)->Clone(); - AssignV1(BuildVal(v, z.t)) - -unary-expr-op Size -no-const -op-type I U D A N S T V * -explicit-result-type -set-type $$ -set-type2 $1 -eval-type I $$ = ZVal(zeek_int_t($1 < 0 ? -$1 : $1)); -eval-type U $$ = ZVal($1); -eval-type D $$ = ZVal($1 < 0 ? -$1 : $1); -eval-type A $$ = ZVal(zeek_uint_t($1->AsAddr().GetFamily() == IPv4 ? 32 : 128)); -eval-type N $$ = ZVal(pow(2.0, double(128 - $1->AsSubNet().LengthIPv6()))); -eval-type S $$ = ZVal(zeek_uint_t($1->Len())); -eval-type T $$ = ZVal(zeek_uint_t($1->Size())); -eval-type V $$ = ZVal(zeek_uint_t($1->Size())); -eval auto v = frame[z.v2].ToVal(z.t2)->SizeVal(); - $$ = BuildVal(v, z.t); - -unary-expr-op Not -op-type I -eval ! $1 - -unary-expr-op Complement -op-type U -eval ~ $1 - -unary-expr-op Positive -op-type I U D -vector -eval $1 - -unary-expr-op Negate -op-type I U D -vector -eval -$1 - -op IncrI -op1-read-write -type V -eval ++frame[z.v1].int_val; - -op IncrU -op1-read-write -type V -eval ++frame[z.v1].uint_val; - -op DecrI -op1-read-write -type V -eval --frame[z.v1].int_val; - -op DecrU -op1-read-write -type V -eval auto& u = frame[z.v1].uint_val; - if ( u == 0 ) - ZAM_run_time_warning(z.loc, "count underflow"); - --u; - -unary-op AppendTo -# Note, even though it feels like appending both reads and modifies -# its first operand, for our purposes it just reads it (to get the -# aggregate), and then modifies its *content* but not the operand's -# value itself. -op1-read -set-type $1 -eval auto vv = frame[z.v1].vector_val; - if ( vv->Size() == 0 ) - // Use the slightly more expensive Assign(), since it - // knows how to deal with empty vectors that do not yet - // have concrete types. - vv->Assign(0, $1.ToVal(z.t)); - else - { - vv->RawVec().push_back(CopyVal($1)); - vv->Modified(); - } - -# For vectors-of-any, we always go through the Assign() interface because -# it's needed for tracking the potentially differing types. -unary-op AppendToAnyVec -op1-read -set-type $1 -eval auto vv = frame[z.v1].vector_val; - vv->Assign(vv->Size(), $1.ToVal(z.t)); - -internal-op AddPatternToField -type VVi -op1-read -eval EvalAddPatternToField(frame[z.v2], v3) - -macro EvalAddPatternToField(v, f) - auto fpat = frame[z.v1].record_val->GetField(z.f)->AsPatternVal(); - if ( fpat ) - { - v.re_val->AddTo(fpat, false); - frame[z.v1].record_val->Modified(); - } - else - ZAM_run_time_error(z.loc, util::fmt("field value missing: $%s", frame[z.v1].record_val->GetType()->AsRecordType()->FieldName(z.f))); - -internal-op AddPatternToField -type VCi -op1-read -eval EvalAddPatternToField(z.c, v2) - -unary-op ExtendPattern -op1-read -eval $1.re_val->AddTo(frame[z.v1].re_val, false); - -unary-op AddVecToVec -op1-read -eval if ( ! $1.vector_val->AddTo(frame[z.v1].vector_val, false) ) - ZAM_run_time_error(z.loc, "incompatible vector element assignment"); - -unary-op AddTableToTable -op1-read -eval auto t = frame[z.v1].table_val; - auto v = $1.table_val; - if ( v->Size() > 0 ) - { - v->AddTo(t, false); - t->Modified(); - } - -unary-op RemoveTableFromTable -op1-read -eval auto t = frame[z.v1].table_val; - auto v = $1.table_val; - if ( v->Size() > 0 ) - { - v->RemoveFrom(t); - t->Modified(); - } - -unary-expr-op Cast -op-type X -set-type $$ -set-type2 $1 -eval EvalCast(frame[z.v2].ToVal(z.t2)) - -macro EvalCast(rhs) - std::string error; - auto res = cast_value(rhs, z.t, error); - if ( res ) - AssignV1(BuildVal(res, z.t)) - else - ZAM_run_time_error(z.loc, error.c_str()); - -# Cast an "any" type to the given type. Only needed for type-based switch -# statements. -internal-op Cast-Any -type VV -eval ValPtr rhs = {NewRef{}, frame[z.v2].any_val}; - EvalCast(rhs) - -direct-unary-op Is Is - -internal-op Is -type VV -eval auto rhs = frame[z.v2].ToVal(z.t2).get(); - frame[z.v1].int_val = can_cast_value_to_type(rhs, z.t.get()); - -########## Binary Ops ########## - -binary-expr-op Add -op-type I U D S -vector -eval $1 + $2 -eval-type S vector strings; - strings.push_back($1->AsString()); - strings.push_back($2->AsString()); - auto res = new StringVal(concatenate(strings)); - $$ = res; - -binary-expr-op Sub -op-type I U D T -vector -eval $1 - $2 -# -eval-type T auto v = $1->Clone(); - auto s = v.release()->AsTableVal(); - $2->RemoveFrom(s); - $$ = s; - -binary-expr-op Times -op-type I U D -vector -eval $1 * $2 - -binary-expr-op Divide -op-type I U D -vector -# -eval-pre if ( $2 == 0 ) - { - ZAM_run_time_error(z.loc, "division by zero"); - break; - } -eval $1 / $2 - -binary-expr-op Mask -op-type I -vector -### Note that this first "eval" is a dummy - we'll never generate code -### that uses it because "Mask" expressions don't have LHS operands of -### type "int". We could omit this if we modified Gen-ZAM to understand -### that an op-type of 'X' for a binary-expr-op means "skip the usual case -### of two operands of the same type". -eval $1 / $2 -eval-mixed A I auto mask = static_cast($2); - auto a = $1->AsAddr(); - if ( a.GetFamily() == IPv4 && mask > 32 ) - ZAM_run_time_error(z.loc, util::fmt("bad IPv4 subnet prefix length: %" PRIu32, mask)); - if ( a.GetFamily() == IPv6 && mask > 128 ) - ZAM_run_time_error(z.loc, util::fmt("bad IPv6 subnet prefix length: %" PRIu32, mask)); - auto v = make_intrusive(a, mask); - Unref(frame[z.v1].subnet_val); - frame[z.v1].subnet_val = v.release(); - -binary-expr-op Mod -op-type I U -vector -eval-pre if ( $2 == 0 ) - { - ZAM_run_time_error(z.loc, "modulo by zero"); - break; - } -eval $1 % $2 - -binary-expr-op And-And -op-type I -vector -eval zeek_int_t($1 && $2) - -binary-expr-op Or-Or -op-type I -vector -eval zeek_int_t($1 || $2) - -binary-expr-op And -op-type U P T -vector -eval $1 & $2 -# -eval-type P $$ = new PatternVal(RE_Matcher_conjunction($1->AsPattern(), $2->AsPattern())); -# -eval-type T $$ = $1->Intersection(*$2).release(); - -binary-expr-op Or -op-type U P T -vector -eval $1 | $2 -# -eval-type P $$ = new PatternVal(RE_Matcher_disjunction($1->AsPattern(), $2->AsPattern())); -# -eval-type T auto v = $1->Clone(); - auto s = v.release()->AsTableVal(); - (void) $2->AddTo(s, false, false); - $$ = s; - -binary-expr-op Xor -op-type U -vector -eval $1 ^ $2 - -binary-expr-op Lshift -op-type I U -vector -eval-type I if ( $1 < 0 ) - ZAM_run_time_error(z.loc, "left shifting a negative number is undefined"); - $$ = $1 << $2; -eval $1 << $2 - -binary-expr-op Rshift -op-type I U -vector -eval $1 >> $2 - -########## Relationals ########## - -rel-expr-op LT -op-type I U D S T A -vector -eval $1 < $2 -eval-type S Bstr_cmp($1->AsString(), $2->AsString()) < 0 -eval-type T $1->IsSubsetOf(*$2) && $1->Size() < $2->Size() -eval-type A $1->AsAddr() < $2->AsAddr() - -rel-expr-op LE -op-type I U D S T A -vector -eval $1 <= $2 -eval-type S Bstr_cmp($1->AsString(), $2->AsString()) <= 0 -eval-type T $1->IsSubsetOf(*$2) -eval-type A $1->AsAddr() < $2->AsAddr() || $1->AsAddr() == $2->AsAddr() - -rel-expr-op EQ -op-type I U D S T A N F -vector -eval $1 == $2 -eval-type S Bstr_cmp($1->AsString(), $2->AsString()) == 0 -eval-type T $1->EqualTo(*$2) -eval-type A $1->AsAddr() == $2->AsAddr() -eval-type N $1->AsSubNet() == $2->AsSubNet() -eval-type F util::streq($1->Name(), $2->Name()) -eval-mixed P S $1->MatchExactly($2->AsString()) - -rel-expr-op NE -op-type I U D S T A N F -vector -eval $1 != $2 -eval-type S Bstr_cmp($1->AsString(), $2->AsString()) != 0 -eval-type T ! $1->EqualTo(*$2) -eval-type A $1->AsAddr() != $2->AsAddr() -eval-type N $1->AsSubNet() != $2->AsSubNet() -eval-type F ! util::streq($1->Name(), $2->Name()) -eval-mixed P S ! $1->MatchExactly($2->AsString()) - -# Note, canonicalization means that GE and GT shouldn't occur -# for Sets (type T). -rel-expr-op GE -op-type I U D S A -vector -eval $1 >= $2 -eval-type S Bstr_cmp($1->AsString(), $2->AsString()) >= 0 -eval-type A ! ($1->AsAddr() < $2->AsAddr()) - -rel-expr-op GT -op-type I U D S A -vector -eval $1 > $2 -eval-type S Bstr_cmp($1->AsString(), $2->AsString()) > 0 -eval-type A ! ($1->AsAddr() < $2->AsAddr()) && $1->AsAddr() != $2->AsAddr() - -########## Nonuniform Expressions ########## - -assign-op Field -type R -field-op -assign-val v -eval auto r = frame[z.v2].record_val; - auto& rv = r->RawOptField(z.v3); - if ( ! rv ) - { - auto def = r->GetType()->FieldDefault(z.v3); - if ( def ) - rv = ZVal(def, z.t); - else - { - ZAM_run_time_error(z.loc, util::fmt("field value missing: $%s", r->GetType()->AsRecordType()->FieldName(z.v3))); - break; - } - } - auto v = *rv; - -expr-op Has-Field -type VRi -includes-field-op -eval frame[z.v1].int_val = frame[z.v2].record_val->HasField(z.v3); - -internal-op Has-Field-Cond -op1-read -type VVV -eval if ( ! frame[z.v1].record_val->HasField(z.v2) ) - BRANCH(v3) - -internal-op Not-Has-Field-Cond -op1-read -type VVV -eval if ( frame[z.v1].record_val->HasField(z.v2) ) - BRANCH(v3) - -internal-op Table-Has-Elements -type VV -eval frame[z.v1].int_val = frame[z.v2].table_val->Size() > 0; - -internal-op Table-Has-Elements-Cond -op1-read -type VV -eval if ( frame[z.v1].table_val->Size() == 0 ) - BRANCH(v2) - -internal-op Not-Table-Has-Elements-Cond -op1-read -type VV -eval if ( frame[z.v1].table_val->Size() > 0 ) - BRANCH(v2) - -internal-op Vector-Has-Elements -type VV -eval frame[z.v1].int_val = frame[z.v2].vector_val->Size() > 0; - -internal-op Vector-Has-Elements-Cond -op1-read -type VV -eval if ( frame[z.v1].vector_val->Size() == 0 ) - BRANCH(v2) - -internal-op Not-Vector-Has-Elements-Cond -op1-read -type VV -eval if ( frame[z.v1].vector_val->Size() > 0 ) - BRANCH(v2) - -expr-op In -type VVV -custom-method return CompileInExpr(n1, n2, n3); -no-eval - -expr-op In -type VCV -custom-method return CompileInExpr(n1, c, n2); -no-eval - -expr-op In -type VVC -custom-method return CompileInExpr(n1, n2, c); -no-eval - -macro EvalPInS(op1, op2) - frame[z.v1].int_val = op1.re_val->MatchAnywhere(op2.string_val->AsString()) != 0; - -internal-op P-In-S -type VVV -eval EvalPInS(frame[z.v2], frame[z.v3]) - -internal-op P-In-S -type VCV -eval EvalPInS(z.c, frame[z.v2]) - -internal-op P-In-S -type VVC -eval EvalPInS(frame[z.v2], z.c) - -macro EvalStrInPatTbl(op1, op2) - frame[z.v1].int_val = op2.table_val->MatchPattern({NewRef{}, op1.string_val}); - -internal-op Str-In-Pat-Tbl -type VVV -eval EvalStrInPatTbl(frame[z.v2], frame[z.v3]) - -internal-op Str-In-Pat-Tbl -type VCV -eval EvalStrInPatTbl(z.c, frame[z.v2]) - -internal-binary-op S-In-S -op-accessor string_val -op-type I -eval auto sc = reinterpret_cast(op1->CheckString()); - auto cmp = util::strstr_n(op2->Len(), op2->Bytes(), op1->Len(), sc); - $$ = cmp != -1; - -internal-binary-op A-In-S -op1-accessor addr_val -op2-accessor subnet_val -op-type I -eval $$ = op2->Contains(op1->AsAddr()); - - -# Handled differently because of the unusual middle argument. -op L-In-T -type VLV -custom-method return CompileInExpr(n1, l, n2); -no-eval - -op L-In-T -type VLC -custom-method return CompileInExpr(n, l, c); -no-eval - -op L-In-Vec -type VLV -custom-method return CompileInExpr(n1, l, n2); -no-eval - -op L-In-Vec -type VLC -custom-method return CompileInExpr(n, l, c); -no-eval - - -internal-op Val-Is-In-Table -type VVV -# No set-type as these are internal ops. -eval auto op1 = frame[z.v2].ToVal(z.t); - frame[z.v1].int_val = frame[z.v3].table_val->Find(op1) != nullptr; - -internal-op Val-Is-In-Table-Cond -op1-read -type VVV -eval auto op1 = frame[z.v1].ToVal(z.t); - if ( ! frame[z.v2].table_val->Find(op1) ) - BRANCH(v3) - -internal-op Val-Is-Not-In-Table-Cond -op1-read -type VVV -eval auto op1 = frame[z.v1].ToVal(z.t); - if ( frame[z.v2].table_val->Find(op1) ) - BRANCH(v3) - -# Variants for indexing two values, one of which might be a constant. -# We set the instructions's *second* type to be that of the first variable -# index. We get the type of the second variable (if any) by digging it -# out of the table's type. For a constant in either position, we use -# the main instruction type, as always. - -macro EvalVal2InTableCore(op1, op2) - auto lvp = zeek::make_intrusive(TYPE_ANY); - lvp->Append(op1); - lvp->Append(op2); - -macro EvalVal2InTableAssignCore(slot) - frame[z.v1].int_val = frame[z.slot].table_val->Find(std::move(lvp)) != nullptr; - -macro EvalVal2InTablePre(op1, op2, op3) - auto& tt_ind = frame[z.op3].table_val->GetType()->AsTableType()->GetIndexTypes(); - EvalVal2InTableCore(frame[z.op1].ToVal(z.t2), frame[z.op2].ToVal(tt_ind[1])) - -internal-op Val2-Is-In-Table -type VVVV -eval EvalVal2InTablePre(v2,v3,v4) - EvalVal2InTableAssignCore(v4) - -internal-op Val2-Is-In-Table-Cond -op1-read -type VVVV -eval EvalVal2InTablePre(v1,v2,v3) - EvalVal2InTableCond(v3, lvp, v4, !) - -macro EvalVal2InTableCond(cond, op, target, negate) - if ( negate frame[z.cond].table_val->Find(op) ) - BRANCH(target) - -internal-op Val2-Is-Not-In-Table-Cond -op1-read -type VVVV -eval EvalVal2InTablePre(v1,v2,v3) - EvalVal2InTableCond(v3, lvp, v4,) - if ( frame[z.v3].table_val->Find(lvp) ) - BRANCH(v4) - -internal-op Val2-Is-In-Table -type VVVC -eval EvalVal2InTableCore(frame[z.v2].ToVal(z.t2), z.c.ToVal(z.t)) - EvalVal2InTableAssignCore(v3) - -internal-op Val2-Is-In-Table-Cond -op1-read -type VVVC -eval EvalVal2InTableCore(frame[z.v1].ToVal(z.t2), z.c.ToVal(z.t)) - EvalVal2InTableCond(v2, lvp, v3, !) - -internal-op Val2-Is-Not-In-Table-Cond -op1-read -type VVVC -eval EvalVal2InTableCore(frame[z.v1].ToVal(z.t2), z.c.ToVal(z.t)) - EvalVal2InTableCond(v2, lvp, v3, ) - -internal-op Val2-Is-In-Table -type VVCV -eval EvalVal2InTableCore(z.c.ToVal(z.t), frame[z.v2].ToVal(z.t2)) - EvalVal2InTableAssignCore(v3) - -internal-op Val2-Is-In-Table-Cond -op1-read -type VVCV -eval EvalVal2InTableCore(z.c.ToVal(z.t), frame[z.v1].ToVal(z.t2)) - EvalVal2InTableCond(v2, lvp, v3, !) - -internal-op Val2-Is-Not-In-Table-Cond -op1-read -type VVCV -eval EvalVal2InTableCore(z.c.ToVal(z.t), frame[z.v1].ToVal(z.t2)) - EvalVal2InTableCond(v2, lvp, v3, ) - - -internal-op Const-Is-In-Table -type VCV -eval auto op1 = z.c.ToVal(z.t); - frame[z.v1].int_val = frame[z.v2].table_val->Find(op1) != nullptr; - -internal-op Const-Is-In-Table-Cond -op1-read -type VVC -eval auto op1 = z.c.ToVal(z.t); - if ( ! frame[z.v1].table_val->Find(op1) ) - BRANCH(v2) - -internal-op Const-Is-Not-In-Table-Cond -op1-read -type VVC -eval auto op1 = z.c.ToVal(z.t); - if ( frame[z.v1].table_val->Find(op1) ) - BRANCH(v2) - -internal-op List-Is-In-Table -type VV -eval auto op1 = z.aux->ToListVal(frame); - frame[z.v1].int_val = frame[z.v2].table_val->Find(std::move(op1)) != nullptr; - -internal-op List-Is-In-Table -type VC -eval auto op1 = z.aux->ToListVal(frame); - frame[z.v1].int_val = z.c.table_val->Find(std::move(op1)) != nullptr; - -internal-op Val-Is-In-Vector -type VVV -eval auto& vec = frame[z.v3].vector_val; - auto ind = frame[z.v2].int_val; - frame[z.v1].int_val = vec->Has(ind); - -internal-op Const-Is-In-Vector -type VCV -eval auto& vec = frame[z.v2].vector_val; - auto ind = z.c.int_val; - frame[z.v1].int_val = vec->Has(ind); - -expr-op Cond -type VVVV -set-type $2 -eval AssignV1(frame[z.v2].int_val ? CopyVal(frame[z.v3]) : CopyVal(frame[z.v4])) - -expr-op Cond -type VVVC -set-type $2 -eval AssignV1(frame[z.v2].int_val ? CopyVal(frame[z.v3]) : CopyVal(z.c)) - -expr-op Cond -type VVCV -set-type $2 -eval AssignV1(frame[z.v2].int_val ? CopyVal(z.c) : CopyVal(frame[z.v3])) - -op Bool-Vec-Cond -type VVVV -set-type $2 -eval auto& vsel = frame[z.v2].vector_val->RawVec(); - auto& v1 = frame[z.v3].vector_val->RawVec(); - auto& v2 = frame[z.v4].vector_val->RawVec(); - auto n = v1.size(); - auto res = new vector>(n); - for ( auto i = 0U; i < n; ++i ) - if ( vsel[i] ) - (*res)[i] = vsel[i]->int_val ? v1[i] : v2[i]; - auto& full_res = frame[z.v1].vector_val; - Unref(full_res); - full_res = new VectorVal(cast_intrusive(z.t), res); - -# Our instruction format doesn't accommodate two constants, so for -# the singular case of a V ? C1 : C2 conditional, we split it into -# two operations, V ? C1 and !V ? C2. -op CondC1 -type VVC -set-type $$ -eval if ( frame[z.v2].int_val ) - AssignV1(CopyVal(z.c)) - -op CondC2 -set-type $$ -type VVC -eval if ( ! frame[z.v2].int_val ) - AssignV1(CopyVal(z.c)) - -########## Index Expressions ########## - -op IndexVecBoolSelect -type VVV -set-type $$ -eval EvalIndexVecBoolSelect(frame[z.v2], frame[z.v3]) - -macro EvalIndexVecBoolSelect(op1, op2) - if ( op1.vector_val->Size() != op2.vector_val->Size() ) - { - ZAM_run_time_error(z.loc, "size mismatch, boolean index and vector"); - break; - } - auto vt = cast_intrusive(z.t); - auto v2 = op1.vector_val; - auto v3 = op2.vector_val; - auto v = vector_bool_select(std::move(vt), v2, v3); - Unref(frame[z.v1].vector_val); - frame[z.v1].vector_val = v.release(); - -op IndexVecBoolSelect -type VCV -set-type $$ -eval EvalIndexVecBoolSelect(z.c, frame[z.v2]) - -op IndexVecIntSelect -type VVV -set-type $$ -eval EvalIndexVecIntSelect(frame[z.v2], frame[z.v3]) - -macro EvalIndexVecIntSelect(op1, op2) - auto vt = cast_intrusive(z.t); - auto v2 = op1.vector_val; - auto v3 = op2.vector_val; - auto v = vector_int_select(std::move(vt), v2, v3); - Unref(frame[z.v1].vector_val); - frame[z.v1].vector_val = v.release(); - -op IndexVecIntSelect -type VCV -set-type $$ -eval EvalIndexVecIntSelect(z.c, frame[z.v2]) - -op Index -type VVL -custom-method return CompileIndex(n1, n2, l, false); - -op Index -type VCL -custom-method return CompileIndex(n, c, l, false); - -op WhenIndex -type VVL -custom-method return CompileIndex(n1, n2, l, true); - -op WhenIndex -type VCL -custom-method return CompileIndex(n, c, l, true); - -internal-op Index-Vec -type VVV -eval EvalIndexVec(frame[z.v3].uint_val) - -macro EvalIndexVec(index) - auto& vv = frame[z.v2].vector_val->RawVec(); - const auto& vec = vv; - zeek_int_t ind = index; - if ( ind < 0 ) - ind += vv.size(); - if ( ind < 0 || ind >= int(vv.size()) ) - ZAM_run_time_error(z.loc, "no such index"); - AssignV1(CopyVal(*vec[ind])) - -internal-op Index-VecC -type VVV -eval EvalIndexVec(z.v3) - -internal-op Index-Any-Vec -type VVV -eval EvalIndexAnyVec(frame[z.v3].uint_val) - -macro EvalIndexAnyVec(index) - auto vv = frame[z.v2].vector_val; - zeek_int_t ind = index; - if ( ind < 0 ) - ind += vv->Size(); - if ( ind < 0 || ind >= int(vv->Size()) ) - ZAM_run_time_error(z.loc, "no such index"); - AssignV1(ZVal(vv->ValAt(ind).release())) - -internal-op Index-Any-VecC -type VVV -eval EvalIndexAnyVec(z.v3) - -macro WhenIndexResCheck() - auto& res = frame[z.v1].vector_val; - if ( res && IndexExprWhen::evaluating > 0 ) - IndexExprWhen::results.push_back({NewRef{}, res}); - -internal-op When-Index-Vec -type VVV -eval EvalIndexAnyVec(frame[z.v3].uint_val) - WhenIndexResCheck() - -internal-op When-Index-VecC -type VVV -eval EvalIndexAnyVec(z.v3) - WhenIndexResCheck() - -macro EvalVecSlice() - auto vec = frame[z.v2].vector_val; - auto lv = z.aux->ToListVal(frame); - auto v = index_slice(vec, lv.get()); - Unref(frame[z.v1].vector_val); - frame[z.v1].vector_val = v.release(); - -internal-op Index-Vec-Slice -type VV -eval EvalVecSlice() - -internal-op When-Index-Vec-Slice -type VV -eval EvalVecSlice() - WhenIndexResCheck() - -internal-op Table-Index -type VV -eval EvalTableIndex(z.aux->ToListVal(frame)) - AssignV1(BuildVal(v, z.t)) - -macro EvalTablePatStr(index) - auto& lhs = frame[z.v1]; - auto vec = ZVal(frame[z.v2].table_val->LookupPattern({NewRef{}, index.string_val})); - ZVal::DeleteManagedType(lhs); - lhs = vec; - -internal-op Table-PatStr-Index -type VVV -eval EvalTablePatStr(frame[z.v3]) - -internal-op Table-PatStr-Index -type VVC -eval EvalTablePatStr(z.c) - -internal-op When-Table-Index -type VV -eval EvalTableIndex(z.aux->ToListVal(frame)) - if ( IndexExprWhen::evaluating > 0 ) - IndexExprWhen::results.emplace_back(v); - AssignV1(BuildVal(v, z.t)) - -macro EvalTableIndex(index) - auto v = frame[z.v2].table_val->FindOrDefault(index); - if ( ! v ) - { - ZAM_run_time_error(z.loc, "no such index"); - break; - } - -internal-op When-PatStr-Index -type VV -eval auto args = z.aux->ToListVal(frame); - auto arg0 = args->Idx(0); - auto v = frame[z.v2].table_val->LookupPattern({NewRef{}, arg0->AsStringVal()}); - if ( IndexExprWhen::evaluating > 0 ) - IndexExprWhen::results.emplace_back(v); - AssignV1(BuildVal(v, z.t)) - -internal-assignment-op Table-Index1 -type VVV -assign-val v -eval EvalTableIndex(frame[z.v3].ToVal(z.t)) -# No AssignV1 needed, as this is an assignment-op - -internal-assignment-op Table-Index1 -type VVC -assign-val v -eval EvalTableIndex(z.c.ToVal(z.t)) - -# This version is for a variable v3. -internal-op Index-String -type VVV -eval EvalIndexString(frame[z.v3].int_val) - -macro EvalIndexString(index) - auto str = frame[z.v2].string_val->AsString(); - auto len = str->Len(); - auto idx = index; - if ( idx < 0 ) - idx += len; - auto v = str->GetSubstring(idx, 1); - Unref(frame[z.v1].string_val); - frame[z.v1].string_val = new StringVal(v ? v : new String("")); - -# This version is for a constant v3. -internal-op Index-StringC -type VVV -eval EvalIndexString(z.v3) - -internal-op Index-String-Slice -type VV -eval auto str = frame[z.v2].string_val->AsString(); - auto lv = z.aux->ToListVal(frame); - auto slice = index_string(str, lv.get()); - Unref(frame[z.v1].string_val); - frame[z.v1].string_val = new StringVal(slice->ToStdString()); - -op AnyIndex -type VVi -set-type $$ -eval auto lv = frame[z.v2].any_val->AsListVal(); - if ( z.v3 < 0 || z.v3 >= lv->Length() ) - reporter->InternalError("bad \"any\" element index"); - ValPtr elem = lv->Idx(z.v3); - if ( CheckAnyType(elem->GetType(), z.t, z.loc) ) - AssignV1(BuildVal(elem, z.t)) - else - ZAM_error = true; - - -########## Constructors ########## - -# Table construction requires atypical evaluation of list elements -# using information from their expression specifics. -direct-unary-op Table-Constructor ConstructTable - -macro ConstructTableOrSetPre() - auto tt = cast_intrusive(z.t); - auto new_t = new TableVal(tt, z.aux->attrs); - auto aux = z.aux; - auto n = aux->n; - auto ind_width = z.v2; - -macro ConstructTableOrSetPost() - auto& t = frame[z.v1].table_val; - Unref(t); - t = new_t; - -internal-op Construct-Table -type VV -eval ConstructTableOrSetPre() - for ( auto i = 0; i < n; ++i ) - { - auto indices = aux->ToIndices(frame, i, ind_width); - auto v = aux->ToVal(frame, i + ind_width); - new_t->Assign(indices, v); - i += ind_width; - } - ConstructTableOrSetPost() - -# When tables are constructed, if their &default is a lambda with captures -# then we need to explicitly set up the default. -internal-op Set-Table-Default-Lambda -type VV -op1-read -eval auto& tbl = frame[z.v1].table_val; - auto lambda = frame[z.v2].ToVal(z.t); - tbl->InitDefaultVal(std::move(lambda)); - -direct-unary-op Set-Constructor ConstructSet - -internal-op Construct-Set -type VV -eval ConstructTableOrSetPre() - for ( auto i = 0; i < n; i += ind_width ) - { - auto indices = aux->ToIndices(frame, i, ind_width); - new_t->Assign(indices, nullptr); - } - ConstructTableOrSetPost() - -direct-unary-op Record-Constructor ConstructRecord - -direct-unary-op Rec-Construct-With-Rec ConstructRecordFromRecord - -macro ConstructRecordPost() - auto& r = frame[z.v1].record_val; - Unref(r); - r = new RecordVal(cast_intrusive(z.t), std::move(init_vals)); - -op Construct-Direct-Record -type V -eval auto init_vals = z.aux->ToZValVec(frame); - ConstructRecordPost() - -op Construct-Known-Record -type V -eval auto init_vals = z.aux->ToZValVecWithMap(frame); - ConstructRecordPost() - -macro AssignFromRec() - /* The following is defined below, for use by Rec-Assign-Fields */ - SetUpRecFieldOps(lhs_map) - auto is_managed = aux->is_managed; - for ( size_t i = 0U; i < n; ++i ) - { - auto rhs_i = rhs->RawField(rhs_map[i]); - if ( is_managed[i] ) - zeek::Ref(rhs_i.ManagedVal()); - init_vals[lhs_map[i]] = rhs_i; - } - -op Construct-Known-Record-From -type VV -eval auto init_vals = z.aux->ToZValVecWithMap(frame); - AssignFromRec() - ConstructRecordPost() - -macro DoNetworkTimeInit(slot) - init_vals[slot] = ZVal(run_state::network_time); - -op Construct-Known-Record-With-NT -type VV -eval auto init_vals = z.aux->ToZValVecWithMap(frame); - DoNetworkTimeInit(z.v2) - ConstructRecordPost() - -op Construct-Known-Record-With-NT-From -type VVV -eval auto init_vals = z.aux->ToZValVecWithMap(frame); - DoNetworkTimeInit(z.v3) - AssignFromRec() - ConstructRecordPost() - -macro GenInits() - auto init_vals = z.aux->ToZValVecWithMap(frame); - for ( auto& fi : *z.aux->field_inits ) - init_vals[fi.first] = fi.second->Generate(); - -op Construct-Known-Record-With-Inits -type V -eval GenInits() - ConstructRecordPost() - -op Construct-Known-Record-With-Inits-From -type VV -eval GenInits() - AssignFromRec() - ConstructRecordPost() - -op Construct-Known-Record-With-Inits-And-NT -type VV -eval GenInits() - DoNetworkTimeInit(z.v2) - ConstructRecordPost() - -op Construct-Known-Record-With-Inits-And-NT-From -type VVV -eval GenInits() - DoNetworkTimeInit(z.v3) - AssignFromRec() - ConstructRecordPost() - -macro SetUpRecFieldOps(which_lhs_map) - auto lhs = frame[z.v1].record_val; - auto rhs = frame[z.v2].record_val; - auto aux = z.aux; - auto& lhs_map = aux->which_lhs_map; - auto& rhs_map = aux->rhs_map; - auto n = rhs_map.size(); - -op Rec-Assign-Fields -op1-read -type VV -eval SetUpRecFieldOps(map) - for ( size_t i = 0U; i < n; ++i ) - lhs->RawOptField(lhs_map[i]) = rhs->RawField(rhs_map[i]); - -macro DoManagedRecAssign() - auto is_managed = aux->is_managed; - for ( size_t i = 0U; i < n; ++i ) - if ( is_managed[i] ) - { - auto& lhs_i = lhs->RawOptField(lhs_map[i]); - auto rhs_i = rhs->RawField(rhs_map[i]); - zeek::Ref(rhs_i.ManagedVal()); - if ( lhs_i ) - ZVal::DeleteManagedType(*lhs_i); - lhs_i = rhs_i; - } - else - lhs->RawOptField(lhs_map[i]) = rhs->RawField(rhs_map[i]); -op Rec-Assign-Fields-Managed -op1-read -type VV -eval SetUpRecFieldOps(map) - DoManagedRecAssign() - -op Rec-Assign-Fields-All-Managed -op1-read -type VV -eval SetUpRecFieldOps(map) - for ( size_t i = 0U; i < n; ++i ) - { - auto& lhs_i = lhs->RawOptField(lhs_map[i]); - auto rhs_i = rhs->RawField(rhs_map[i]); - zeek::Ref(rhs_i.ManagedVal()); - if ( lhs_i ) - ZVal::DeleteManagedType(*lhs_i); - lhs_i = rhs_i; - } - -op Rec-Add-Int-Fields -op1-read -type VV -eval SetUpRecFieldOps(map) - for ( size_t i = 0U; i < n; ++i ) - lhs->RawField(lhs_map[i]).int_val += rhs->RawField(rhs_map[i]).int_val; - -op Rec-Add-Double-Fields -op1-read -type VV -eval SetUpRecFieldOps(map) - for ( size_t i = 0U; i < n; ++i ) - lhs->RawField(lhs_map[i]).double_val += rhs->RawField(rhs_map[i]).double_val; - -op Rec-Add-Fields -op1-read -type VV -eval SetUpRecFieldOps(map) - auto& types = aux->types; - for ( size_t i = 0U; i < n; ++i ) - { - auto& lhs_i = lhs->RawField(lhs_map[i]); - auto rhs_i = rhs->RawField(rhs_map[i]); - auto tag = types[i]->Tag(); - if ( tag == TYPE_INT ) - lhs_i.int_val += rhs_i.int_val; - else if ( tag == TYPE_COUNT ) - lhs_i.uint_val += rhs_i.uint_val; - else - lhs_i.double_val += rhs_i.double_val; - } - -# Special instruction for concretizing vectors that are fields in a -# newly-constructed record. "aux" holds which fields in the record to -# inspect. -op Concretize-Vector-Fields -op1-read -type V -eval auto rt = cast_intrusive(z.t); - auto r = frame[z.v1].record_val; - auto aux = z.aux; - auto n = aux->n; - for ( auto i = 0; i < n; ++i ) - { - auto ind = aux->elems[i].IntVal(); - auto v_i = r->GetField(ind); - ASSERT(v_i); - if ( v_i->GetType()->IsUnspecifiedVector() ) - { - const auto& t_i = rt->GetFieldType(ind); - v_i->AsVectorVal()->Concretize(t_i->Yield()); - } - } - -direct-unary-op Vector-Constructor ConstructVector - -internal-op Construct-Vector -type V -eval auto new_vv = new VectorVal(cast_intrusive(z.t)); - auto aux = z.aux; - auto n = aux->n; - for ( auto i = 0; i < n; ++i ) - new_vv->Assign(i, aux->ToVal(frame, i)); - auto& vv = frame[z.v1].vector_val; - Unref(vv); - vv = new_vv; - -########## Coercions ########## - -direct-unary-op Arith-Coerce ArithCoerce - -internal-op Coerce-UI -type VV -eval auto v = frame[z.v2].int_val; - if ( v < 0 ) - { - ZAM_run_time_error(z.loc, "underflow converting int to count"); - break; - } - frame[z.v1].uint_val = zeek_uint_t(v); - -internal-op Coerce-UD -type VV -eval auto v = frame[z.v2].double_val; - if ( v < 0.0 ) - { - ZAM_run_time_error(z.loc, "underflow converting double to count"); - break; - } - if ( v > static_cast(UINT64_MAX) ) - { - ZAM_run_time_error(z.loc, "overflow converting double to count"); - break; - } - frame[z.v1].uint_val = zeek_uint_t(v); - -internal-op Coerce-IU -type VV -eval auto v = frame[z.v2].uint_val; - if ( v > INT64_MAX ) - { - ZAM_run_time_error(z.loc, "overflow converting count to int"); - break; - } - frame[z.v1].int_val = zeek_int_t(v); - -internal-op Coerce-ID -type VV -eval auto v = frame[z.v2].double_val; - if ( v < static_cast(INT64_MIN) ) - { - ZAM_run_time_error(z.loc, "underflow converting double to int"); - break; - } - if ( v > static_cast(INT64_MAX) ) - { - ZAM_run_time_error(z.loc, "overflow converting double to int"); - break; - } - frame[z.v1].int_val = zeek_int_t(v); - -internal-op Coerce-DI -type VV -eval frame[z.v1].double_val = double(frame[z.v2].int_val); - -internal-op Coerce-DU -type VV -eval frame[z.v1].double_val = double(frame[z.v2].uint_val); - - -macro EvalCoerceVec(coercer) - auto old_v1 = frame[z.v1].vector_val; - frame[z.v1].vector_val = coercer(frame[z.v2].vector_val, z); - Unref(old_v1); // delayed to allow for same value on both sides - -internal-op Coerce-UI-Vec -type VV -eval EvalCoerceVec(vec_coerce_UI) - -internal-op Coerce-UD-Vec -type VV -eval EvalCoerceVec(vec_coerce_UD) - -internal-op Coerce-IU-Vec -type VV -eval EvalCoerceVec(vec_coerce_IU) - -internal-op Coerce-ID-Vec -type VV -eval EvalCoerceVec(vec_coerce_ID) - -internal-op Coerce-DI-Vec -type VV -eval EvalCoerceVec(vec_coerce_DI) - -internal-op Coerce-DU-Vec -type VV -eval EvalCoerceVec(vec_coerce_DU) - - -direct-unary-op Record-Coerce RecordCoerce - -internal-op Record-Coerce -type VV -eval auto rt = cast_intrusive(z.t); - auto v = frame[z.v2].record_val; - auto to_r = coerce_to_record(std::move(rt), v, z.aux->map); - Unref(frame[z.v1].record_val); - frame[z.v1].record_val = to_r.release(); - -direct-unary-op Table-Coerce TableCoerce - -internal-op Table-Coerce -type VV -eval auto tv = frame[z.v2].table_val; - if ( tv->Size() > 0 ) - { - ZAM_run_time_error(z.loc, "coercion of non-empty table/set"); - break; - } - auto tt = cast_intrusive(z.t); - AttributesPtr attrs = tv->GetAttrs(); - auto t = make_intrusive(tt, attrs); - Unref(frame[z.v1].table_val); - frame[z.v1].table_val = t.release(); - -direct-unary-op Vector-Coerce VectorCoerce - -internal-op Vector-Coerce -type VV -eval if ( frame[z.v2].vector_val->Size() > 0 ) - { - ZAM_run_time_error(z.loc, "coercion of non-empty vector"); - break; - } - auto vv = new VectorVal(cast_intrusive(z.t)); - Unref(frame[z.v1].vector_val); - frame[z.v1].vector_val = vv; - -unary-expr-op To-Any-Coerce -op-type X -set-type $1 -eval AssignV1(ZVal(frame[z.v2].ToVal(z.t), ZAM::any_base_type)) - -unary-expr-op From-Any-Coerce -op-type X -set-type $$ -eval auto v = frame[z.v2].any_val; - AssignV1(ZVal({NewRef{}, v}, z.t)) - -unary-expr-op From-Any-Vec-Coerce -op-type X -set-type $$ -eval auto vv = frame[z.v2].vector_val; - if ( ! vv->Concretize(z.t->Yield()) ) - { - ZAM_run_time_error(z.loc, "incompatible vector-of-any"); - break; - } - zeek::Ref(vv); - AssignV1(ZVal(vv)) - - -########## Aggregate Assignments ########## - -macro VectorElemAssignPre() - auto ind = frame[z.v2].uint_val; - auto vv = frame[z.v1].vector_val; - -macro EvalVectorElemAssign(val_setup, assign_op) - VectorElemAssignPre() - val_setup - if ( ! assign_op ) - ZAM_run_time_error(z.loc, "value used but not set"); - -op Vector-Elem-Assign -op1-read -set-type $1 -type VVV -eval EvalVectorElemAssign(, copy_vec_elem(vv, ind, frame[z.v3], z.t)) - -op Any-Vector-Elem-Assign -op1-read -set-type $1 -type VVV -eval EvalVectorElemAssign(, vv->Assign(ind, frame[z.v3].ToVal(z.t))) - -op Vector-Elem-Assign-Any -op1-read -type VVV -eval EvalVectorElemAssign(auto any_v = frame[z.v3].any_val;, vv->Assign(ind, {NewRef{}, any_v})) - -op Vector-Elem-Assign -op1-read -set-type $2 -type VVC -eval VectorElemAssignPre() - (void) copy_vec_elem(vv, ind, z.c, z.t); - -op Any-Vector-Elem-Assign -op1-read -set-type $1 -type VVC -eval VectorElemAssignPre() - if ( ! vv->Assign(ind, z.c.ToVal(z.t)) ) - ZAM_run_time_error(z.loc, "vector index assignment failed for invalid type"); - -# These versions are used when the constant is the index, not the new value. -op Vector-Elem-Assign -op1-read -set-type $1 -type VVi -eval auto vv = frame[z.v1].vector_val; - if ( ! copy_vec_elem(vv, z.v3, frame[z.v2], z.t) ) - ZAM_run_time_error(z.loc, "value used but not set"); - -op Any-Vector-Elem-Assign -op1-read -set-type $1 -type VVi -eval auto vv = frame[z.v1].vector_val; - if ( ! vv->Assign(z.v3, frame[z.v2].ToVal(z.t)) ) - ZAM_run_time_error(z.loc, "value used but not set"); - -op Vector-Elem-Assign-Any -op1-read -type VVi -eval auto vv = frame[z.v1].vector_val; - auto any_v = frame[z.v2].any_val; - vv->Assign(z.v3, {NewRef{}, any_v}); - -internal-op Vector-Slice-Assign -op1-read -type VV -eval ValPtr vec = {NewRef{}, frame[z.v1].vector_val}; - auto slice = z.aux->ToListVal(frame); - ValPtr vals = {NewRef{}, frame[z.v2].vector_val}; - bool iterators_invalidated; - auto error = assign_to_index(std::move(vec), std::move(slice), std::move(vals), iterators_invalidated); - if ( error ) - ZAM_run_time_error(z.loc, error); - if ( iterators_invalidated ) - ZAM_run_time_warning(z.loc, "possible loop/iterator invalidation"); - - -internal-op Table-Elem-Assign -op1-read -type VV -eval EvalTableElemAssign(frame[z.v2]) - -macro EvalTableElemAssign(value) - auto indices = z.aux->ToListVal(frame); - auto val = value.ToVal(z.t); - bool iterators_invalidated = false; - frame[z.v1].table_val->Assign(std::move(indices), std::move(val), true, &iterators_invalidated); - if ( iterators_invalidated ) - ZAM_run_time_warning(z.loc, "possible loop/iterator invalidation"); - -internal-op Table-Elem-Assign -op1-read -type VC -eval EvalTableElemAssign(z.c) - - -########## Function Calls ########## - -# A call with no arguments and no return value. -internal-op Call0 -op1-read -type X -side-effects -num-call-args 0 - -# A call with no arguments and a return value. -internal-assignment-op Call0 -type V -side-effects OP_CALL0_X OP_X -assign-val v -num-call-args 0 - -# Calls with 1 argument and no return value. -internal-op Call1 -op1-read -type V -side-effects -num-call-args 1 - -internal-op Call1 -op1-read -type C -side-effects -num-call-args 1 - -# Same but with a return value. -internal-assignment-op Call1 -type VV -side-effects OP_CALL1_V OP_V -assign-val v -num-call-args 1 - -internal-assignment-op Call1 -type VC -side-effects OP_CALL1_C OP_C -assign-val v -num-call-args 1 - -# Calls with 2-5 arguments and no return value. -internal-op Call2 -type X -side-effects -num-call-args 2 - -# Same with a return value. -internal-assignment-op Call2 -type V -side-effects OP_CALL2_X OP_X -assign-val v -num-call-args 2 - -internal-op Call3 -type X -side-effects -num-call-args 3 - -# Same with a return value. -internal-assignment-op Call3 -type V -side-effects OP_CALL3_X OP_X -assign-val v -num-call-args 3 - -internal-op Call4 -type X -side-effects -num-call-args 4 - -# Same with a return value. -internal-assignment-op Call4 -type V -side-effects OP_CALL4_X OP_X -assign-val v -num-call-args 4 - -internal-op Call5 -type X -side-effects -num-call-args 5 - -# Same with a return value. -internal-assignment-op Call5 -type V -side-effects OP_CALL5_X OP_X -assign-val v -num-call-args 5 - -# ... and with an arbitrary number of arguments. - -internal-op CallN -type X -side-effects -num-call-args n - -# Same with a return value. -internal-assignment-op CallN -type V -side-effects OP_CALLN_X OP_X -assign-val v -num-call-args n - -# Same, but for indirect calls via a global variable. -internal-op IndCallN -type X -side-effects -indirect-call -num-call-args n - -# Same with a return value. -internal-assignment-op IndCallN -type V -side-effects OP_INDCALLN_X OP_X -assign-val v -indirect-call -num-call-args n - -# And versions with a local variable rather than a global. -internal-op Local-IndCallN -op1-read -type V -side-effects -indirect-local-call -num-call-args n - -internal-assignment-op Local-IndCallN -type VV -side-effects OP_LOCAL_INDCALLN_V OP_V -assign-val v -indirect-local-call -num-call-args n - -# A call made in a "when" context. These always have assignment targets. -# To keep things simple, we just use one generic flavor (for N arguments, -# doing a less-streamlined-but-simpler Val-based assignment). -macro WhenCall(func) - if ( ! func ) - throw ZAMDelayedCallException(); - auto& lhs = frame[z.v1]; - auto trigger = f->GetTrigger(); - Val* v = trigger ? trigger->Lookup(z.aux->call_expr.get()) : nullptr; - ValPtr vp; - if ( v ) - vp = {NewRef{}, v}; - else - { - auto aux = z.aux; - auto current_assoc = f->GetTriggerAssoc(); - auto n = aux->n; - std::vector args; - for ( auto i = 0; i < n; ++i ) - args.push_back(aux->ToVal(frame, i)); - f->SetCall(z.aux->call_expr.get()); - /* It's possible that this function will call another that - * itself returns null because *it* is the actual blocker. - * That will set ZAM_error, which we need to ignore. - */ - auto hold_ZAM_error = ZAM_error; - vp = func->Invoke(&args, f); - ZAM_error = hold_ZAM_error; - f->SetTriggerAssoc(current_assoc); - if ( ! vp ) - throw ZAMDelayedCallException(); - } - if ( z.is_managed ) - ZVal::DeleteManagedType(lhs); - lhs = ZVal(vp, z.t); - -internal-op WhenCallN -type V -side-effects -eval WhenCall(z.aux->func) - -internal-op WhenIndCallN -type VV -side-effects -eval auto sel = z.v2; - auto func = (sel < 0) ? z.aux->id_val->GetVal()->AsFunc() : frame[sel].AsFunc(); - WhenCall(func) - - -########## Statements ########## - -macro EvalScheduleArgs(time, is_delta, build_args) - if ( run_state::terminating ) - break; - double dt = time.double_val; - if ( is_delta ) - dt += run_state::network_time; - auto handler = EventHandlerPtr(z.aux->event_handler); - ValVec args; - build_args - auto timer = new ScheduleTimer(handler, std::move(args), dt); - timer_mgr->Add(timer); - -macro EvalSchedule(time, is_delta) - EvalScheduleArgs(time, is_delta, z.aux->FillValVec(args, frame);) - -op Schedule -type ViHL -op1-read -custom-method return CompileSchedule(n, nullptr, i, h, l); -eval EvalSchedule(frame[z.v1], z.v2) - -op Schedule -type CiHL -op1-read -custom-method return CompileSchedule(nullptr, c, i, h, l); -eval EvalSchedule(z.c, z.v1) - -internal-op Schedule0 -type ViH -op1-read -eval EvalScheduleArgs(frame[z.v1], z.v2,) - -internal-op Schedule0 -type CiH -op1-read -eval EvalScheduleArgs(z.c, z.v1,) - -macro QueueEvent(eh, args) - if ( *eh ) - event_mgr.Enqueue(eh, std::move(args)); - -op Event -type HL -op1-read -custom-method return CompileEvent(h, l); -eval ValVec args; - z.aux->FillValVec(args, frame); - QueueEvent(z.aux->event_handler, args); - -internal-op Event0 -type X -eval ValVec args(0); - QueueEvent(z.aux->event_handler, args); - -internal-op Event1 -type V -op1-read -eval ValVec args(1); - args[0] = frame[z.v1].ToVal(z.t); - QueueEvent(z.aux->event_handler, args); - -internal-op Event2 -type VV -op1-read -eval ValVec args(2); - args[0] = frame[z.v1].ToVal(z.t); - args[1] = frame[z.v2].ToVal(z.t2); - QueueEvent(z.aux->event_handler, args); - -internal-op Event3 -type VVV -op1-read -eval ValVec args(3); - auto& aux = z.aux; - args[0] = frame[z.v1].ToVal(z.t); - args[1] = frame[z.v2].ToVal(z.t2); - args[2] = frame[z.v3].ToVal(aux->elems[2].GetType()); - QueueEvent(z.aux->event_handler, args); - -internal-op Event4 -type VVVV -op1-read -eval ValVec args(4); - auto& aux = z.aux; - args[0] = frame[z.v1].ToVal(z.t); - args[1] = frame[z.v2].ToVal(z.t2); - args[2] = frame[z.v3].ToVal(aux->elems[2].GetType()); - args[3] = frame[z.v4].ToVal(aux->elems[3].GetType()); - QueueEvent(z.aux->event_handler, args); - - -op Return -type X -eval EvalReturn(nullptr,) - -macro EvalReturn(val, type) - ret_u = val; - type - DO_ZAM_PROFILE - pc = end_pc; - continue; - -op Return -op1-read -type V -set-type $$ -eval EvalReturn(&frame[z.v1], ret_type = z.t;) - -op Return -type C -eval EvalReturn(&z.c, ret_type = z.t;) - - -# Branch on the value of v1 using switch table v2, with default branch to v3 - -macro EvalSwitchBody(cases, postscript) - { - auto t = cases[z.v2]; - if ( t.find(v) == t.end() ) - pc = z.v3; - else - pc = t[v]; - postscript - DO_ZAM_PROFILE - continue; - } - -internal-op SwitchI -type VVV -op1-read -eval auto v = frame[z.v1].int_val; - EvalSwitchBody(int_cases,) - -internal-op SwitchU -op1-read -type VVV -eval auto v = frame[z.v1].uint_val; - EvalSwitchBody(uint_cases,) - -internal-op SwitchD -op1-read -type VVV -eval auto v = frame[z.v1].double_val; - EvalSwitchBody(double_cases,) - -internal-op SwitchS -op1-read -type VVV -eval auto vs = frame[z.v1].string_val->AsString()->Render(); - std::string v(vs); - EvalSwitchBody(str_cases,delete[] vs;) - -internal-op SwitchA -op1-read -type VVV -eval auto v = frame[z.v1].addr_val->AsAddr().AsString(); - EvalSwitchBody(str_cases,) - -internal-op SwitchN -op1-read -type VVV -eval auto v = frame[z.v1].subnet_val->AsSubNet().AsString(); - EvalSwitchBody(str_cases,) - - -internal-op Branch-If-Not-Type -op1-read -type VV -eval auto v = frame[z.v1].any_val; - if ( ! can_cast_value_to_type(v, z.t.get()) ) - BRANCH(v2) - - -internal-op Init-Table-Loop -type VV -op1-read -eval auto& ti = (*tiv_ptr)[z.v2]; - ti.BeginLoop({NewRef{}, frame[z.v1].table_val}, z.aux); - -internal-op Next-Table-Iter -op1-read -# v1 = iteration info -# v2 = branch target if loop done -type VV -eval NextTableIterPre(v1, v2) - ti.NextIter(frame); - -macro NextTableIterPre(iter, branch) - auto& ti = (*tiv_ptr)[z.iter]; - if ( ti.IsDoneIterating() ) - BRANCH(branch) - -internal-op Next-Table-Iter-No-Vars -op1-read -# v1 = iteration info -# v2 = branch target if loop done -type VV -eval NextTableIterPre(v1, v2) - ti.IterFinished(); - -internal-op Next-Table-Iter-Val-Var -# v1 = slot of the "ValueVar" -# v2 = iteration info -# v3 = branch target if loop done -type VVV -eval NextTableIterPre(v2, v3) - AssignV1(ti.IterValue()); - ti.NextIter(frame); - -internal-op Next-Table-Iter-Val-Var-No-Vars -# v1 = slot of the "ValueVar" -# v2 = iteration info -# v3 = branch target if loop done -type VVV -eval NextTableIterPre(v2, v3) - AssignV1(ti.IterValue()); - ti.IterFinished(); - - -internal-op Init-Vector-Loop -type VV -op1-read -eval auto& vv = frame[z.v1].vector_val->RawVec(); - step_iters[z.v2].InitLoop(&vv); - -macro NextVectorIterCore(info, branch) - auto& si = step_iters[info]; - if ( si.IsDoneIterating() ) - BRANCH(branch) - const auto& vv = *si.vv; - if ( ! vv[si.iter] ) - { /* Account for vector hole. Re-execute for next position. */ - si.IterFinished(); - --pc; /* so we then increment to here again */ - break; - } - -internal-op Next-Vector-Iter -# v1 = iteration variable -# v2 = iteration info -# v3 = branch target if loop done -type VVV -eval NextVectorIterCore(z.v2, v3) - frame[z.v1].uint_val = si.iter; - si.IterFinished(); - -internal-op Next-Vector-Blank-Iter -# v1 = iteration info -# v2 = branch target if loop done -op1-internal -type VV -eval NextVectorIterCore(z.v1, v2) - si.IterFinished(); - -internal-op Next-Vector-Iter-Val-Var -# v1 = iteration variable -# v2 = value variable -# v3 = iteration info -# v4 = branch target if loop done -op1-read-write -type VVVV -eval NextVectorIterCore(z.v3, v4) - frame[z.v1].uint_val = si.iter; - if ( z.is_managed ) - frame[z.v2] = BuildVal(vv[si.iter]->ToVal(z.t), z.t); - else - frame[z.v2] = *vv[si.iter]; - si.IterFinished(); - -internal-op Next-Vector-Blank-Iter-Val-Var -# v1 = value variable -# v2 = iteration info -# v3 = branch target if loop done -type VVV -eval NextVectorIterCore(z.v2, v3) - if ( z.is_managed ) - frame[z.v1] = BuildVal(vv[si.iter]->ToVal(z.t), z.t); - else - frame[z.v1] = *vv[si.iter]; - si.IterFinished(); - - -internal-op Init-String-Loop -type VV -op1-read -eval step_iters[z.v2].InitLoop(frame[z.v1].string_val->AsString()); - -internal-op Init-String-Loop -type VC -eval step_iters[z.v1].InitLoop(z.c.string_val->AsString()); - -internal-op Next-String-Iter -# v1 = iteration variable -# v2 = iteration info -# v3 = branch target if loop done -type VVV -eval auto& si = step_iters[z.v2]; - if ( si.IsDoneIterating() ) - BRANCH(v3) - auto bytes = (const char*) si.s->Bytes() + si.iter; - auto sv = new StringVal(1, bytes); - Unref(frame[z.v1].string_val); - frame[z.v1].string_val = sv; - si.IterFinished(); - -internal-op Next-String-Blank-Iter -# v1 = iteration info -# v2 = branch target if loop done -op1-internal -type VV -eval auto& si = step_iters[z.v1]; - if ( si.IsDoneIterating() ) - BRANCH(v2) - si.IterFinished(); - -internal-op End-Table-Loop -op1-internal -type V -eval (*tiv_ptr)[z.v1].Clear(); - - -op CheckAnyLen -op1-read -type Vi -eval auto v = frame[z.v1].list_val; - if ( v->Vals().size() != static_cast(z.v2) ) - ZAM_run_time_error(z.loc, "mismatch in list lengths"); - -op Print -type O -eval do_print_stmt(z.aux->ToValVec(frame)); -method-post z.aux = v->aux; - -op Print1 -op1-read -type V -set-type $$ -eval EvalPrint1(frame[z.v1]) - -macro EvalPrint1(value) - std::vector vals; - vals.push_back(value.ToVal(z.t)); - do_print_stmt(vals); - -op Print1 -op1-read -type C -set-type $$ -eval EvalPrint1(z.c) - - -internal-op If-Else -op1-read -type VV -eval if ( ! frame[z.v1].int_val ) BRANCH(v2) - -internal-op If -op1-read -type VV -eval if ( ! frame[z.v1].int_val ) BRANCH(v2) - -internal-op If-Not -op1-read -type VV -eval if ( frame[z.v1].int_val ) BRANCH(v2) - - -op AddStmt -op1-read -type VO -eval EvalAddStmt(z.aux->ToListVal(frame)) -method-post z.aux = v->aux; - -macro EvalAddStmt(ind) - auto index = ind; - bool iterators_invalidated = false; - frame[z.v1].table_val->Assign(std::move(index), nullptr, true, &iterators_invalidated); - if ( iterators_invalidated ) - ZAM_run_time_warning(z.loc, "possible loop/iterator invalidation"); - -op AddStmt1 -op1-read -set-type $1 -type VV -eval EvalAddStmt(frame[z.v2].ToVal(z.t)) - -op AddStmt1 -op1-read -type VC -eval EvalAddStmt(z.c.ToVal(z.t)) - - -op ClearTable -op1-read -type V -eval frame[z.v1].table_val->RemoveAll(); - -op ClearVector -op1-read -type V -eval frame[z.v1].vector_val->Resize(0); - - -op DelTable -op1-read -type VO -eval auto index = z.aux->ToListVal(frame); - bool iterators_invalidated = false; - frame[z.v1].table_val->Remove(*index, true, &iterators_invalidated); - if ( iterators_invalidated ) - ZAM_run_time_warning(z.loc, "possible loop/iterator invalidation"); -method-post z.aux = v->aux; - -op DelField -op1-read -type Vi -eval frame[z.v1].record_val->Remove(z.v2); - - -internal-op Init-Record -type V -eval auto r = new RecordVal(cast_intrusive(z.t)); - Unref(frame[z.v1].record_val); - frame[z.v1].record_val = r; - -internal-op Init-Vector -type V -eval auto vt = cast_intrusive(z.t); - auto vec = new VectorVal(std::move(vt)); - Unref(frame[z.v1].vector_val); - frame[z.v1].vector_val = vec; - -internal-op Init-Table -type V -eval auto tt = cast_intrusive(z.t); - auto t = new TableVal(tt, z.aux->attrs); - Unref(frame[z.v1].table_val); - frame[z.v1].table_val = t; - -op When -type V -op1-read -eval BuildWhen(-1.0) - -op When-Timeout -type VV -op1-read -eval BuildWhen(frame[z.v2].double_val) - -op When-Timeout -type VC -op1-read -eval BuildWhen(z.c.double_val) - -macro BuildWhen(timeout) - auto& aux = z.aux; - auto wi = aux->wi; - FuncPtr func{NewRef{}, frame[z.v1].func_val}; - auto lambda = make_intrusive(func); - wi->Instantiate(std::move(lambda)); - std::vector local_aggrs; - for ( int i = 0; i < aux->n; ++i ) - { - auto v = aux->ToVal(frame, i); - if ( v ) - local_aggrs.push_back(v); - } - (void)make_intrusive(wi, wi->WhenExprGlobals(), local_aggrs, timeout, f, z.loc->Loc()); - -######################################## -# Internal -######################################## - -# These two are only needed for type-based switch statements. Could think -# about replacing them using CoerceFromAnyExpr. -op Assign-Any -type VV -set-type $1 -eval EvalAssignAny(frame[z.v2]) - -macro EvalAssignAny(value) - auto v = value.ToVal(z.t); - frame[z.v1].any_val = v.release(); - -op Assign-Any -type VC -set-type $1 -eval EvalAssignAny(z.c) - -# Lazy way to assign without having to track the specific type of -# a constant. -internal-op Assign-Const -type VC -eval AssignV1(BuildVal(z.c.ToVal(z.t), z.t)) - -internal-assignment-op Load-Val -type VV -assign-val v -eval auto& v = f->GetElement(z.v2); - -internal-assignment-op Load-Global -type VV -assign-val v -eval auto& v = globals[z.v2].id->GetVal(); - if ( ! v ) - { - ZAM_run_time_error(z.loc, "value used but not set", z.aux->id_val.get()); - break; - } - -# We need a special form here for loading global types, as they don't -# fit the usual template. -internal-op Load-Global-Type -type VV -eval auto& v = frame[z.v1].type_val; - Unref(v); - auto& t = globals[z.v2].id->GetType(); - v = new TypeVal(t, true); - -internal-op Load-Capture -type VV -eval frame[z.v1] = f->GetFunction()->GetCapturesVec()[z.v2]; - -internal-op Load-Managed-Capture -type VV -eval auto& lhs = frame[z.v1]; - auto& rhs = f->GetFunction()->GetCapturesVec()[z.v2]; - zeek::Ref(rhs.ManagedVal()); - ZVal::DeleteManagedType(lhs); - lhs = rhs; - -internal-op Store-Global -op1-internal -type V -eval auto& g = globals[z.v1]; - g.id->SetVal(frame[g.slot].ToVal(z.t)); - -# Both of these have the LHS as v2 not v1, to keep with existing -# conventions of OP_VV_I2 op type (as opposed to OP_VV_I1_V2, which doesn't -# currently exist, and would be a pain to add). -internal-op Store-Capture -op1-read -type VV -eval f->GetFunction()->GetCapturesVec()[z.v2] = frame[z.v1]; - -internal-op Store-Managed-Capture -op1-read -type VV -eval auto& lhs = f->GetFunction()->GetCapturesVec()[z.v2]; - auto& rhs = frame[z.v1]; - zeek::Ref(rhs.ManagedVal()); - ZVal::DeleteManagedType(lhs); - lhs = rhs; - - -internal-op Copy-To -type VC -set-type $1 -eval AssignV1(CopyVal(z.c)) - -internal-op GoTo -type V -eval BRANCH(v1) - -internal-op Hook-Break -type X -eval flow = FLOW_BREAK; - pc = end_pc; - DO_ZAM_PROFILE - continue; - -# Slot 2 gives frame size. -internal-op Lambda -type VV -eval auto& aux = z.aux; - auto& primary_func = aux->primary_func; - auto& body = primary_func->GetBodies()[0].stmts; - ASSERT(body->Tag() == STMT_ZAM); - auto lamb = make_intrusive(aux->id_val); - lamb->AddBody(body, z.v2); - lamb->SetName(aux->lambda_name.c_str()); - if ( aux->n > 0 ) - { - auto captures = std::make_unique>(); - for ( auto i = 0; i < aux->n; ++i ) - { - auto slot = aux->elems[i].Slot(); - if ( slot >= 0 ) - { - auto& cp = frame[slot]; - if ( aux->elems[i].IsManaged() ) - zeek::Ref(cp.ManagedVal()); - captures->push_back(cp); - } - else - // Used for when-locals. - captures->push_back(ZVal()); - } - lamb->CreateCaptures(std::move(captures)); - } - ZVal::DeleteManagedType(frame[z.v1]); - frame[z.v1].func_val = lamb.release(); - -######################################## -# Built-in Functions -######################################## - -macro EvalSubBytes(arg1, arg2, arg3) - { - auto sv = ZAM_sub_bytes(arg1.AsString(), arg2, arg3); - Unref(frame[z.v1].AsString()); - frame[z.v1].string_val = sv; - } - -internal-op Remove-Teredo -op1-read -type V -eval auto teredo = zeek::packet_mgr->GetAnalyzer("Teredo"); - if ( teredo ) - { - zeek::detail::ConnKey conn_key(frame[z.v1].record_val); - static_cast(teredo.get())->RemoveConnection(conn_key); - } - -internal-op Remove-Teredo -side-effects OP_REMOVE_TEREDO_V OP_V -type VV -eval auto teredo = zeek::packet_mgr->GetAnalyzer("Teredo"); - if ( teredo ) - { - zeek::detail::ConnKey conn_key(frame[z.v2].record_val); - static_cast(teredo.get())->RemoveConnection(conn_key); - } - frame[z.v1].int_val = 1; - -internal-op Remove-GTPv1 -op1-read -type V -eval auto gtpv1 = zeek::packet_mgr->GetAnalyzer("GTPv1"); - if ( gtpv1 ) - { - zeek::detail::ConnKey conn_key(frame[z.v1].record_val); - static_cast(gtpv1.get())->RemoveConnection(conn_key); - } - -internal-op Remove-GTPv1 -side-effects OP_REMOVE_GTPV1_V OP_V -type VV -eval auto gtpv1 = zeek::packet_mgr->GetAnalyzer("GTPv1"); - if ( gtpv1 ) - { - zeek::detail::ConnKey conn_key(frame[z.v2].record_val); - static_cast(gtpv1.get())->RemoveConnection(conn_key); - } - frame[z.v1].int_val = 1; - -internal-op Set-File-Handle -op1-read -type V -eval auto handle = frame[z.v1].string_val; - auto bytes = reinterpret_cast(handle->Bytes()); - auto h = std::string(bytes, handle->Len()); - zeek::file_mgr->SetHandle(h); - -internal-op Subnet-To-Addr -type VV -eval auto addr_v = make_intrusive(frame[z.v2].subnet_val->Prefix()); - Unref(frame[z.v1].addr_val); - frame[z.v1] = ZVal(std::move(addr_v)); - -internal-op Sub-Bytes -type VVVV -eval EvalSubBytes(frame[z.v2], frame[z.v3].uint_val, frame[z.v4].int_val) - -internal-op Sub-Bytes -type VVVi -eval EvalSubBytes(frame[z.v2], frame[z.v3].uint_val, z.c.int_val) - -internal-op Sub-Bytes -type VViV -eval EvalSubBytes(frame[z.v2], z.c.uint_val, frame[z.v3].int_val) - -internal-op Sub-Bytes -type VVii -eval EvalSubBytes(frame[z.v2], z.c.uint_val, z.v3) - -internal-op Sub-Bytes -type VVVC -eval EvalSubBytes(z.c, frame[z.v2].uint_val, frame[z.v3].uint_val) - -internal-op Sub-Bytes -type VViC -eval EvalSubBytes(z.c, frame[z.v2].uint_val, z.v3) - -internal-op Sub-Bytes -type ViVC -eval EvalSubBytes(z.c, zeek_uint_t(z.v3), frame[z.v2].uint_val) - -internal-op Time-To-Double -type VV -eval frame[z.v1] = frame[z.v2]; - - -internal-op To-Lower -type VV -eval auto sv = ZAM_to_lower(frame[z.v2].string_val); - Unref(frame[z.v1].string_val); - frame[z.v1].string_val = sv; - -# A ZAM version of Log::__write. In calls to it, the first argument -# is generally a constant (enum) *if we inlined*, but otherwise a -# parameter, so we support both VVV ad VVC. -# -# It's actually the case that the return value is pretty much always -# ignored ... plus optimization can elide it away. See the second -# pair of built-ins for versions that discard the return value. -# -# Could speed things up further by modifying the Write method to just -# take the raw enum value, as it appears that that's all that's ever -# actually used. - -macro LogWritePre(id_val, columns_slot) - auto id = id_val.ToVal(ZAM::log_ID_enum_type); - auto columns = frame[z.columns_slot].record_val; - -macro LogWriteResPost() - bool result = log_mgr->Write(id->AsEnumVal(), columns->AsRecordVal()); - frame[z.v1].int_val = result; - -macro LogWriteNoResPost() - (void) log_mgr->Write(id->AsEnumVal(), columns->AsRecordVal()); - -internal-op Log-Write -side-effects OP_LOG_WRITE_VV OP_VV -type VVV -eval LogWritePre(frame[z.v2], v3) - LogWriteResPost() - -internal-op Log-WriteC -side-effects OP_LOG_WRITEC_V OP_V -type VV -eval LogWritePre(z.c, v2) - LogWriteResPost() - -# Versions that discard the return value. -internal-op Log-Write -side-effects -op1-read -type VV -eval LogWritePre(frame[z.v1], v2) - LogWriteNoResPost() - -internal-op Log-WriteC -side-effects -op1-read -type V -eval LogWritePre(z.c, v1) - LogWriteNoResPost() - -internal-op Broker-Flush-Logs -side-effects OP_BROKER_FLUSH_LOGS_X OP_X -type V -eval frame[z.v1].uint_val = broker_mgr->FlushLogBuffers(); - -internal-op Broker-Flush-Logs -side-effects -type X -eval (void) broker_mgr->FlushLogBuffers(); - -internal-op Get-Port-Transport-Proto -type VV -eval auto mask = frame[z.v2].uint_val & PORT_SPACE_MASK; - auto v = 0; /* TRANSPORT_UNKNOWN */ - if ( mask == TCP_PORT_MASK ) - v = 1; - else if ( mask == UDP_PORT_MASK ) - v = 2; - else if ( mask == ICMP_PORT_MASK ) - v = 3; - frame[z.v1].uint_val = v; - -internal-op Conn-Exists -type VV -eval frame[z.v1].int_val = session_mgr->FindConnection(frame[z.v2].record_val) != nullptr; - -internal-op Conn-Exists-Cond -op1-read -type VV -eval if ( ! session_mgr->FindConnection(frame[z.v1].record_val) ) - BRANCH(v2) - -internal-op Not-Conn-Exists-Cond -op1-read -type VV -eval if ( session_mgr->FindConnection(frame[z.v1].record_val) ) - BRANCH(v2) - -internal-op Lookup-Conn -type VV -eval auto cid = frame[z.v2].record_val; - Connection* conn = session_mgr->FindConnection(cid); - ValPtr res; - if ( conn ) - res = conn->GetVal(); - else - { - ZAM_run_time_error(z.loc, "connection ID not a known connection", cid); - res = build_dummy_conn_record(); - } - AssignV1(ZVal(res, res->GetType())); - -internal-op Is-ICMP-Port -type VV -eval frame[z.v1].int_val = (frame[z.v2].uint_val & PORT_SPACE_MASK) == ICMP_PORT_MASK; - -internal-op Is-ICMP-Port-Cond -op1-read -type VV -eval if ( (frame[z.v1].uint_val & PORT_SPACE_MASK) != ICMP_PORT_MASK ) - BRANCH(v2) - -internal-op Not-Is-ICMP-Port-Cond -op1-read -type VV -eval if ( (frame[z.v1].uint_val & PORT_SPACE_MASK) == ICMP_PORT_MASK ) - BRANCH(v2) - -internal-op Is-TCP-Port -type VV -eval frame[z.v1].int_val = (frame[z.v2].uint_val & PORT_SPACE_MASK) == TCP_PORT_MASK; - -internal-op Is-TCP-Port-Cond -op1-read -type VV -eval if ( (frame[z.v1].uint_val & PORT_SPACE_MASK) != TCP_PORT_MASK ) - BRANCH(v2) - -internal-op Not-Is-TCP-Port-Cond -op1-read -type VV -eval if ( (frame[z.v1].uint_val & PORT_SPACE_MASK) == TCP_PORT_MASK ) - BRANCH(v2) - -internal-op Is-UDP-Port -type VV -eval frame[z.v1].int_val = (frame[z.v2].uint_val & PORT_SPACE_MASK) == UDP_PORT_MASK; - -internal-op Is-UDP-Port-Cond -op1-read -type VV -eval if ( (frame[z.v1].uint_val & PORT_SPACE_MASK) != UDP_PORT_MASK ) - BRANCH(v2) - -internal-op Not-Is-UDP-Port-Cond -op1-read -type VV -eval if ( (frame[z.v1].uint_val & PORT_SPACE_MASK) == UDP_PORT_MASK ) - BRANCH(v2) - -internal-op Is-V4-Addr -type VV -eval frame[z.v1].int_val = frame[z.v2].addr_val->AsAddr().GetFamily() == IPv4; - -internal-op Is-V4-Addr-Cond -op1-read -type VV -eval if ( frame[z.v1].addr_val->AsAddr().GetFamily() != IPv4 ) - BRANCH(v2) - -internal-op Not-Is-V4-Addr-Cond -op1-read -type VV -eval if ( frame[z.v1].addr_val->AsAddr().GetFamily() == IPv4 ) - BRANCH(v2) - -internal-op Is-V6-Addr -type VV -eval frame[z.v1].int_val = frame[z.v2].addr_val->AsAddr().GetFamily() == IPv6; - -internal-op Is-V6-Addr-Cond -op1-read -type VV -eval if ( frame[z.v1].addr_val->AsAddr().GetFamily() != IPv6 ) - BRANCH(v2) - -internal-op Not-Is-V6-Addr-Cond -op1-read -type VV -eval if ( frame[z.v1].addr_val->AsAddr().GetFamily() == IPv6 ) - BRANCH(v2) - -internal-op Network-Time -type V -eval frame[z.v1].double_val = run_state::network_time; - -internal-op Current-Time -type V -eval frame[z.v1].double_val = util::current_time(); - -internal-op Reading-Live-Traffic -type V -eval frame[z.v1].int_val = run_state::reading_live; - -internal-op Reading-Live-Traffic-Cond -op1-read -type V -eval if ( ! run_state::reading_live ) - BRANCH(v1) - -internal-op Not-Reading-Live-Traffic-Cond -op1-read -type V -eval if ( run_state::reading_live ) - BRANCH(v1) - -internal-op Reading-Traces -type V -eval frame[z.v1].int_val = run_state::reading_traces; - -internal-op Reading-Traces-Cond -op1-read -type V -eval if ( ! run_state::reading_traces ) - BRANCH(v1) - -internal-op Not-Reading-Traces-Cond -op1-read -type V -eval if ( run_state::reading_traces ) - BRANCH(v1) - -internal-op Sort -op1-read -type V -eval if ( frame[z.v1].vector_val->Size() > 1 ) - frame[z.v1].vector_val->Sort(); - -internal-op Sort -type VV -eval auto vv = frame[z.v2].vector_val; - if ( vv->Size() > 1 ) - vv->Sort(); - zeek::Ref(vv); - Unref(frame[z.v1].vector_val); - frame[z.v1].vector_val = vv; - -internal-op Sort-With-Cmp -op1-read -type VV -eval if ( frame[z.v1].vector_val->Size() > 1 ) - frame[z.v1].vector_val->Sort(frame[z.v2].func_val); - -internal-op Sort-With-Cmp -type VVV -eval auto vv = frame[z.v2].vector_val; - if ( vv->Size() > 1 ) - vv->Sort(frame[z.v3].func_val); - zeek::Ref(vv); - Unref(frame[z.v1].vector_val); - frame[z.v1].vector_val = vv; - -macro EvalStartsWith(str_val, sub_val) - auto str = str_val.string_val; - auto sub = sub_val.string_val; - auto str_n = str->Len(); - auto sub_n = sub->Len(); - if ( str_n < sub_n ) - frame[z.v1].int_val = 0; - else - { - auto str_b = str->Bytes(); - auto sub_b = sub->Bytes(); - int i; - for ( i = 0; i < sub_n; ++i ) - if ( str_b[i] != sub_b[i] ) - break; - frame[z.v1].int_val = i == sub_n; - } - -internal-op Starts-With -type VVV -eval EvalStartsWith(frame[z.v2], frame[z.v3]) - -internal-op Starts-With -type VCV -eval EvalStartsWith(z.c, frame[z.v2]) - -internal-op Starts-With -type VVC -eval EvalStartsWith(frame[z.v2], z.c) - -macro EvalStrCmp(s1_src, s2_src) - auto s1 = s1_src.string_val; - auto s2 = s2_src.string_val; - frame[z.v1].int_val = Bstr_cmp(s1->AsString(), s2->AsString()); - -internal-op StrCmp -type VVV -eval EvalStrCmp(frame[z.v2], frame[z.v3]) - -internal-op StrCmp -type VCV -eval EvalStrCmp(z.c, frame[z.v2]) - -internal-op StrCmp -type VVC -eval EvalStrCmp(frame[z.v2], z.c) - -macro EvalStrStr(big_value, little_value) - auto big = big_value.string_val; - auto little = little_value.string_val; - frame[z.v1].int_val = 1 + big->AsString()->FindSubstring(little->AsString()); - -internal-op StrStr -type VVV -eval EvalStrStr(frame[z.v2], frame[z.v3]) - -internal-op StrStr -type VCV -eval EvalStrStr(z.c, frame[z.v2]) - -internal-op StrStr -type VVC -eval EvalStrStr(frame[z.v2], z.c) - -macro Cat1Op(val) - auto& v1 = frame[z.v1]; - ZVal::DeleteManagedType(v1); - v1 = val; - -macro Cat1OpRef(val) - Cat1Op(val) - zeek::Ref(v1.string_val); - -internal-op Cat1 -type VC -eval Cat1OpRef(z.c) - -internal-op Cat1 -type VV -eval Cat1OpRef(frame[z.v2]) - -macro Cat1FullVal(val) - auto formatted_val = ZVal(ZAM_val_cat(val.ToVal(z.t))); - Cat1Op(formatted_val) - -internal-op Cat1Full -type VC -eval Cat1FullVal(z.c) - -internal-op Cat1Full -type VV -eval Cat1FullVal(frame[z.v2]) - -internal-op CatN -type V -eval auto aux = z.aux; - auto& ca = aux->cat_args; - int n = aux->n; - size_t max_size = 0; - for ( int i = 0; i < n; ++i ) - max_size += ca[i]->MaxSize(frame, aux->elems[i].Slot()); - auto res = new char[max_size + /* slop */ n + 1]; - auto res_p = res; - for ( int i = 0; i < n; ++i ) - ca[i]->RenderInto(frame, aux->elems[i].Slot(), res_p); - *res_p = '\0'; - auto s = new String(true, reinterpret_cast(res), res_p - res); - Cat1Op(ZVal(new StringVal(s))) - -macro CatNPre() - auto aux = z.aux; - auto& ca = aux->cat_args; - -macro CatNMid() - auto res = new char[max_size + /* slop */ 10]; - auto res_p = res; - -macro CatNPost() - *res_p = '\0'; - auto s = new String(true, reinterpret_cast(res), res_p - res); - Cat1Op(ZVal(new StringVal(s))) - -internal-op Cat2 -type V -eval CatNPre() - size_t max_size = ca[0]->MaxSize(frame, aux->elems[0].Slot()); - max_size += ca[1]->MaxSize(frame, aux->elems[1].Slot()); - CatNMid() - ca[0]->RenderInto(frame, aux->elems[0].Slot(), res_p); - ca[1]->RenderInto(frame, aux->elems[1].Slot(), res_p); - CatNPost() - -internal-op Cat3 -type V -eval CatNPre() - size_t max_size = ca[0]->MaxSize(frame, aux->elems[0].Slot()); - max_size += ca[1]->MaxSize(frame, aux->elems[1].Slot()); - max_size += ca[2]->MaxSize(frame, aux->elems[2].Slot()); - CatNMid() - ca[0]->RenderInto(frame, aux->elems[0].Slot(), res_p); - ca[1]->RenderInto(frame, aux->elems[1].Slot(), res_p); - ca[2]->RenderInto(frame, aux->elems[2].Slot(), res_p); - CatNPost() - -internal-op Cat4 -type V -eval CatNPre() - size_t max_size = ca[0]->MaxSize(frame, aux->elems[0].Slot()); - max_size += ca[1]->MaxSize(frame, aux->elems[1].Slot()); - max_size += ca[2]->MaxSize(frame, aux->elems[2].Slot()); - max_size += ca[3]->MaxSize(frame, aux->elems[3].Slot()); - CatNMid() - ca[0]->RenderInto(frame, aux->elems[0].Slot(), res_p); - ca[1]->RenderInto(frame, aux->elems[1].Slot(), res_p); - ca[2]->RenderInto(frame, aux->elems[2].Slot(), res_p); - ca[3]->RenderInto(frame, aux->elems[3].Slot(), res_p); - CatNPost() - -internal-op Cat5 -type V -eval CatNPre() - size_t max_size = ca[0]->MaxSize(frame, aux->elems[0].Slot()); - max_size += ca[1]->MaxSize(frame, aux->elems[1].Slot()); - max_size += ca[2]->MaxSize(frame, aux->elems[2].Slot()); - max_size += ca[3]->MaxSize(frame, aux->elems[3].Slot()); - max_size += ca[4]->MaxSize(frame, aux->elems[4].Slot()); - CatNMid() - ca[0]->RenderInto(frame, aux->elems[0].Slot(), res_p); - ca[1]->RenderInto(frame, aux->elems[1].Slot(), res_p); - ca[2]->RenderInto(frame, aux->elems[2].Slot(), res_p); - ca[3]->RenderInto(frame, aux->elems[3].Slot(), res_p); - ca[4]->RenderInto(frame, aux->elems[4].Slot(), res_p); - CatNPost() - -internal-op Cat6 -type V -eval CatNPre() - size_t max_size = ca[0]->MaxSize(frame, aux->elems[0].Slot()); - max_size += ca[1]->MaxSize(frame, aux->elems[1].Slot()); - max_size += ca[2]->MaxSize(frame, aux->elems[2].Slot()); - max_size += ca[3]->MaxSize(frame, aux->elems[3].Slot()); - max_size += ca[4]->MaxSize(frame, aux->elems[4].Slot()); - max_size += ca[5]->MaxSize(frame, aux->elems[5].Slot()); - CatNMid() - ca[0]->RenderInto(frame, aux->elems[0].Slot(), res_p); - ca[1]->RenderInto(frame, aux->elems[1].Slot(), res_p); - ca[2]->RenderInto(frame, aux->elems[2].Slot(), res_p); - ca[3]->RenderInto(frame, aux->elems[3].Slot(), res_p); - ca[4]->RenderInto(frame, aux->elems[4].Slot(), res_p); - ca[5]->RenderInto(frame, aux->elems[5].Slot(), res_p); - CatNPost() - -internal-op Cat7 -type V -eval CatNPre() - size_t max_size = ca[0]->MaxSize(frame, aux->elems[0].Slot()); - max_size += ca[1]->MaxSize(frame, aux->elems[1].Slot()); - max_size += ca[2]->MaxSize(frame, aux->elems[2].Slot()); - max_size += ca[3]->MaxSize(frame, aux->elems[3].Slot()); - max_size += ca[4]->MaxSize(frame, aux->elems[4].Slot()); - max_size += ca[5]->MaxSize(frame, aux->elems[5].Slot()); - max_size += ca[6]->MaxSize(frame, aux->elems[6].Slot()); - CatNMid() - ca[0]->RenderInto(frame, aux->elems[0].Slot(), res_p); - ca[1]->RenderInto(frame, aux->elems[1].Slot(), res_p); - ca[2]->RenderInto(frame, aux->elems[2].Slot(), res_p); - ca[3]->RenderInto(frame, aux->elems[3].Slot(), res_p); - ca[4]->RenderInto(frame, aux->elems[4].Slot(), res_p); - ca[5]->RenderInto(frame, aux->elems[5].Slot(), res_p); - ca[6]->RenderInto(frame, aux->elems[6].Slot(), res_p); - CatNPost() - -internal-op Cat8 -type V -eval CatNPre() - size_t max_size = ca[0]->MaxSize(frame, aux->elems[0].Slot()); - max_size += ca[1]->MaxSize(frame, aux->elems[1].Slot()); - max_size += ca[2]->MaxSize(frame, aux->elems[2].Slot()); - max_size += ca[3]->MaxSize(frame, aux->elems[3].Slot()); - max_size += ca[4]->MaxSize(frame, aux->elems[4].Slot()); - max_size += ca[5]->MaxSize(frame, aux->elems[5].Slot()); - max_size += ca[6]->MaxSize(frame, aux->elems[6].Slot()); - max_size += ca[7]->MaxSize(frame, aux->elems[7].Slot()); - CatNMid() - ca[0]->RenderInto(frame, aux->elems[0].Slot(), res_p); - ca[1]->RenderInto(frame, aux->elems[1].Slot(), res_p); - ca[2]->RenderInto(frame, aux->elems[2].Slot(), res_p); - ca[3]->RenderInto(frame, aux->elems[3].Slot(), res_p); - ca[4]->RenderInto(frame, aux->elems[4].Slot(), res_p); - ca[5]->RenderInto(frame, aux->elems[5].Slot(), res_p); - ca[6]->RenderInto(frame, aux->elems[6].Slot(), res_p); - ca[7]->RenderInto(frame, aux->elems[7].Slot(), res_p); - CatNPost() - -macro AnalyzerName(tag) - auto atype = tag.ToVal(z.t); - auto val = atype->AsEnumVal(); - Unref(frame[z.v1].string_val); - plugin::Component* component = zeek::analyzer_mgr->Lookup(val); - if ( ! component ) - component = zeek::packet_mgr->Lookup(val); - if ( ! component ) - component = zeek::file_mgr->Lookup(val); - if ( component ) - frame[z.v1].string_val = new StringVal(component->CanonicalName()); - else - frame[z.v1].string_val = new StringVal(""); - -internal-op Analyzer-Name -type VV -eval AnalyzerName(frame[z.v2]) - -internal-op Analyzer-Name -type VC -eval AnalyzerName(z.c) - -macro FilesAddOrRemoveAnalyzer(file_id_slot, tag, args_slot, METHOD) - auto file_id = frame[z.file_id_slot].string_val; - using zeek::BifType::Record::Files::AnalyzerArgs; - auto rv = frame[z.args_slot].record_val->CoerceTo(AnalyzerArgs); - bool result = zeek::file_mgr->METHOD( - file_id->CheckString(), - zeek::file_mgr->GetComponentTag(tag.ToVal(z.t).get()), - std::move(rv)); - -macro FilesAddAnalyzer(file_id_slot, tag, args_slot) - FilesAddOrRemoveAnalyzer(file_id_slot, tag, args_slot, AddAnalyzer) - -internal-op Files-Add-Analyzer -op1-read -type VVV -eval FilesAddAnalyzer(v1, frame[z.v2], v3) - -internal-op Files-Add-Analyzer -op1-read -type ViV -eval FilesAddAnalyzer(v1, z.c, v2) - -internal-op Files-Add-Analyzer -type VVVV -side-effects OP_FILES_ADD_ANALYZER_VVV OP_VVV -eval FilesAddAnalyzer(v2, frame[z.v3], v4) - frame[z.v1].int_val = result; - -internal-op Files-Add-Analyzer -type VViV -side-effects OP_FILES_ADD_ANALYZER_ViV OP_VVC -eval FilesAddAnalyzer(v2, z.c, v3) - frame[z.v1].int_val = result; - -macro FilesRemoveAnalyzer(file_id_slot, tag, args_slot) - FilesAddOrRemoveAnalyzer(file_id_slot, tag, args_slot, RemoveAnalyzer) - -internal-op Files-Remove-Analyzer -op1-read -type VVV -eval FilesRemoveAnalyzer(v1, frame[z.v2], v3) - -internal-op Files-Remove-Analyzer -op1-read -type ViV -eval FilesRemoveAnalyzer(v1, z.c, v2) - -internal-op Files-Remove-Analyzer -type VVVV -side-effects OP_FILES_REMOVE_ANALYZER_VVV OP_VVV -eval FilesRemoveAnalyzer(v2, frame[z.v3], v4) - frame[z.v1].int_val = result; - -internal-op Files-Remove-Analyzer -type VViV -side-effects OP_FILES_REMOVE_ANALYZER_ViV OP_VVC -eval FilesRemoveAnalyzer(v2, z.c, v3) - frame[z.v1].int_val = result; - -macro AnalyzerEnabled(tag) - auto atype = tag.ToVal(z.t); - auto c = zeek::file_mgr->Lookup(atype->AsEnumVal()); - frame[z.v1].int_val = c && c->Enabled(); - -internal-op Analyzer-Enabled -type VV -eval AnalyzerEnabled(frame[z.v2]) - -internal-op Analyzer-Enabled -type VC -eval AnalyzerEnabled(z.c) - -macro FileAnalyzerName(tag) - auto atype = tag.ToVal(z.t); - Unref(frame[z.v1].string_val); - frame[z.v1] = ZVal(file_mgr->GetComponentNameVal({NewRef{}, atype->AsEnumVal()})); - -internal-op File-Analyzer-Name -type VV -eval FileAnalyzerName(frame[z.v2]) - -internal-op File-Analyzer-Name -type VC -eval FileAnalyzerName(z.c) - -macro IsProtocolAnalyzer(tag) - auto atype = tag.ToVal(z.t); - frame[z.v1].int_val = analyzer_mgr->Lookup(atype->AsEnumVal()) != nullptr; - -internal-op Is-Protocol-Analyzer -type VV -eval IsProtocolAnalyzer(frame[z.v2]) - -internal-op Is-Protocol-Analyzer -type VC -eval IsProtocolAnalyzer(z.c) - -internal-op Clear-Table -op1-read -type V -eval frame[z.v1].table_val->RemoveAll(); - -internal-op Files-Enable-Reassembly -op1-read -type V -eval auto f = frame[z.v1].string_val->CheckString(); - file_mgr->EnableReassembly(f); - -internal-op Files-Set-Reassembly-Buffer -op1-read -type VV -eval auto f = frame[z.v1].string_val->CheckString(); - file_mgr->SetReassemblyBuffer(f, frame[z.v2].uint_val); - -internal-op Files-Set-Reassembly-Buffer -type VVV -side-effects OP_FILES_SET_REASSEMBLY_BUFFER_VV OP_VV -eval auto f = frame[z.v2].string_val->CheckString(); - frame[z.v1].int_val = file_mgr->SetReassemblyBuffer(f, frame[z.v3].uint_val); - -internal-op Files-Set-Reassembly-Buffer -op1-read -type VC -eval auto f = frame[z.v1].string_val->CheckString(); - file_mgr->SetReassemblyBuffer(f, zeek_uint_t(z.v2)); - -internal-op Files-Set-Reassembly-Buffer -type VVC -side-effects OP_FILES_SET_REASSEMBLY_BUFFER_VC OP_VC -eval auto f = frame[z.v2].string_val->CheckString(); - frame[z.v1].int_val = file_mgr->SetReassemblyBuffer(f, zeek_uint_t(z.v2)); - -macro GetBytesThresh(cid, is_orig) - zeek::analyzer::Analyzer* a = analyzer::conn_size::GetConnsizeAnalyzer(cid); - auto res = 0U; - if ( a ) - res = static_cast(a)->GetByteAndPacketThreshold(true, is_orig); - frame[z.v1].uint_val = res; - -internal-op Get-Bytes-Thresh -type VVV -eval GetBytesThresh(frame[z.v2].record_val, frame[z.v3].int_val) - -internal-op Get-Bytes-Thresh -type VVi -eval GetBytesThresh(frame[z.v2].record_val, z.c.uint_val) - -macro SetBytesThresh(cid, threshold, is_orig) - bool res = false; - zeek::analyzer::Analyzer* a = analyzer::conn_size::GetConnsizeAnalyzer(cid); - if ( a ) - { - static_cast(a)->SetByteAndPacketThreshold(threshold, true, is_orig); - res = true; - } - -internal-op Set-Bytes-Thresh -op1-read -type VVV -eval SetBytesThresh(frame[z.v1].record_val, frame[z.v2].uint_val, frame[z.v3].int_val) - -internal-op Set-Bytes-Thresh -op1-read -type VVi -eval SetBytesThresh(frame[z.v1].record_val, frame[z.v2].uint_val, z.c.int_val) - -internal-op Set-Bytes-Thresh -op1-read -type ViV -eval SetBytesThresh(frame[z.v1].record_val, z.c.uint_val, frame[z.v2].int_val) - -internal-op Set-Bytes-Thresh -op1-read -type Vii -eval SetBytesThresh(frame[z.v1].record_val, z.c.uint_val, z.v2) - -internal-op Set-Bytes-Thresh -type VVVV -side-effects OP_SET_BYTES_THRESH_VVV OP_VVV -eval SetBytesThresh(frame[z.v2].record_val, frame[z.v3].uint_val, frame[z.v4].int_val) - frame[z.v1].int_val = res; - -internal-op Set-Bytes-Thresh -type VVVi -side-effects OP_SET_BYTES_THRESH_VVi OP_VVV_I3 -eval SetBytesThresh(frame[z.v2].record_val, frame[z.v3].uint_val, z.c.int_val) - frame[z.v1].int_val = res; - -internal-op Set-Bytes-Thresh -type VViV -side-effects OP_SET_BYTES_THRESH_ViV OP_VVV_I3 -eval SetBytesThresh(frame[z.v2].record_val, z.c.uint_val, frame[z.v3].int_val) - frame[z.v1].int_val = res; - -internal-op Set-Bytes-Thresh -type VVii -side-effects OP_SET_BYTES_THRESH_Vii OP_VVC_I2 -eval SetBytesThresh(frame[z.v2].record_val, z.c.uint_val, zeek_uint_t(z.v3)) - frame[z.v1].int_val = res; - -######################################## -# Instructions for known script functions -######################################## - -internal-op Func-Id-String -type VV -eval auto id_rec = frame[z.v2].record_val; - auto orig_h = id_rec->RawField(0).addr_val->AsAddr().AsString(); - auto resp_h = id_rec->RawField(2).addr_val->AsAddr().AsString(); - auto orig_p = static_cast(id_rec->RawField(1).uint_val) & ~PORT_SPACE_MASK; - auto resp_p = static_cast(id_rec->RawField(3).uint_val) & ~PORT_SPACE_MASK; - /* Maximum address size is for IPv6 with no compression. Each - * 8 16-bit hex elements plus 7 colons between them plus the two []'s - * = 8*4 + 7 + 2 = 41 characters. - * - * Maximum port size is 5. - * - * Two of these = 2*41 + 2*5 = 92. - * Other delimiters: two ':', one ' < ' for 5 more. - * - * TOTAL: 97 characters. - * - * We use considerably more for safety. - */ - char buf[128]; - snprintf(buf, sizeof buf, "%s:%u > %s:%u", orig_h.c_str(), orig_p, resp_h.c_str(), resp_p); - Unref(frame[z.v1].string_val); - frame[z.v1].string_val = new StringVal(buf); - -######################################## -# Instructions for script-level idioms -######################################## - -internal-op MinU -type VVC -eval frame[z.v1].uint_val = std::min(frame[z.v2].uint_val, z.c.uint_val); - -internal-op MinI -type VVC -eval frame[z.v1].int_val = std::min(frame[z.v2].int_val, z.c.int_val); - -internal-op MinD -type VVC -eval frame[z.v1].double_val = std::min(frame[z.v2].double_val, z.c.double_val); - -internal-op MinU -type VVV -eval frame[z.v1].uint_val = std::min(frame[z.v2].uint_val, frame[z.v3].uint_val); - -internal-op MinI -type VVV -eval frame[z.v1].int_val = std::min(frame[z.v2].int_val, frame[z.v3].int_val); - -internal-op MinD -type VVV -eval frame[z.v1].double_val = std::min(frame[z.v2].double_val, frame[z.v3].double_val); - -internal-op MaxU -type VVC -eval frame[z.v1].uint_val = std::max(frame[z.v2].uint_val, z.c.uint_val); - -internal-op MaxI -type VVC -eval frame[z.v1].int_val = std::max(frame[z.v2].int_val, z.c.int_val); - -internal-op MaxD -type VVC -eval frame[z.v1].double_val = std::max(frame[z.v2].double_val, z.c.double_val); - -internal-op MaxU -type VVV -eval frame[z.v1].uint_val = std::max(frame[z.v2].uint_val, frame[z.v3].uint_val); - -internal-op MaxI -type VVV -eval frame[z.v1].int_val = std::max(frame[z.v2].int_val, frame[z.v3].int_val); - -internal-op MaxD -type VVV -eval frame[z.v1].double_val = std::max(frame[z.v2].double_val, frame[z.v3].double_val); diff --git a/src/script_opt/ZAM/OPs/ZBI.op b/src/script_opt/ZAM/OPs/ZBI.op new file mode 100644 index 0000000000..29d50f13f0 --- /dev/null +++ b/src/script_opt/ZAM/OPs/ZBI.op @@ -0,0 +1,627 @@ +# Operations corresponding to ZAM BuiltIn Functions. + +internal-op Remove-Teredo +op1-read +class V +op-types R +eval auto teredo = zeek::packet_mgr->GetAnalyzer("Teredo"); + if ( teredo ) + { + zeek::detail::ConnKey conn_key($1); + static_cast(teredo.get())->RemoveConnection(conn_key); + } + +internal-op Remove-Teredo +side-effects OP_REMOVE_TEREDO_V OP_V +class VV +op-types I R +eval auto teredo = zeek::packet_mgr->GetAnalyzer("Teredo"); + if ( teredo ) + { + zeek::detail::ConnKey conn_key($1); + static_cast(teredo.get())->RemoveConnection(conn_key); + } + $$ = 1; + +internal-op Remove-GTPv1 +op1-read +class V +op-types R +eval auto gtpv1 = zeek::packet_mgr->GetAnalyzer("GTPv1"); + if ( gtpv1 ) + { + zeek::detail::ConnKey conn_key($1); + static_cast(gtpv1.get())->RemoveConnection(conn_key); + } + +internal-op Remove-GTPv1 +side-effects OP_REMOVE_GTPV1_V OP_V +class VV +op-types I R +eval auto gtpv1 = zeek::packet_mgr->GetAnalyzer("GTPv1"); + if ( gtpv1 ) + { + zeek::detail::ConnKey conn_key($1); + static_cast(gtpv1.get())->RemoveConnection(conn_key); + } + $$ = 1; + +internal-op Set-File-Handle +op1-read +class V +op-types S +eval auto handle = $1; + auto bytes = reinterpret_cast(handle->Bytes()); + auto h = std::string(bytes, handle->Len()); + zeek::file_mgr->SetHandle(h); + +internal-op Subnet-To-Addr +class VV +op-types X N +eval auto addr_v = make_intrusive($1->Prefix()); + ZVal::DeleteManagedType($$); + $$ = ZVal(std::move(addr_v)); + +macro EvalSubBytes(lhs, arg1, arg2, arg3) + { + auto sv = ZAM_sub_bytes(arg1, arg2, arg3); + Unref(lhs); + lhs = sv; + } + +internal-op Sub-Bytes +classes VVVV VVVC VVCV VCVV VVCi VCVi +op-types S S U I +eval EvalSubBytes($$, $1, $2, $3) + +# Use a distinct name because due to the convention when constructing +# instructions, frame slots are always positioned earlier than non-frame +# slots, i.e. we can't construct "VCiV", which is why the arguments are +# in a different order than above. +internal-op Sub-Bytes2 +class VCVi +op-types S S I U +eval EvalSubBytes($$, $1, $3, $2) + +internal-op Time-To-Double +class VV +op-types D D +eval $$ = $1; + + +internal-op To-Lower +class VV +op-types S S +eval auto sv = ZAM_to_lower($1); + Unref($$); + $$ = sv; + +# A ZAM version of Log::__write. In calls to it, the first argument +# is generally a constant (enum) *if we inlined*, but otherwise a +# parameter, so we support both VVV ad VVC. +# +# It's actually the case that the return value is pretty much always +# ignored ... plus optimization can elide it away. See the second +# pair of built-ins for versions that discard the return value. +# +# Could speed things up further by modifying the Write method to just +# take the raw enum value, as it appears that that's all that's ever +# actually used. + +macro LogWritePre(id_val, columns_val) + auto id = id_val; + auto columns = columns_val; + +macro LogWriteResPost(lhs) + bool result = log_mgr->Write(id->AsEnumVal(), columns->AsRecordVal()); + lhs = result; + +internal-op Log-Write +side-effects OP_LOG_WRITE_VV OP_VV +class VVV +op-types I X R +eval LogWritePre(LogEnum($1), $2) + LogWriteResPost($$) + +### Check that invoked correctly +internal-op Log-WriteC +side-effects OP_LOG_WRITE_CV OP_VC +class VCV +op-types I X R +eval LogWritePre(LogEnum($1), $2) + LogWriteResPost($$) + +# Versions that discard the return value. +internal-op Log-Write +side-effects +op1-read +classes VV CV +op-types X R +eval LogWritePre(LogEnum($1), $2) + (void) log_mgr->Write(id->AsEnumVal(), columns->AsRecordVal()); + +internal-op Broker-Flush-Logs +side-effects OP_BROKER_FLUSH_LOGS_X OP_X +class V +op-types U +eval $$ = broker_mgr->FlushLogBuffers(); + +internal-op Broker-Flush-Logs +side-effects +class X +eval (void) broker_mgr->FlushLogBuffers(); + +internal-op Get-Port-Transport-Proto +class VV +op-types U U +eval auto mask = $1 & PORT_SPACE_MASK; + auto v = 0; /* TRANSPORT_UNKNOWN */ + if ( mask == TCP_PORT_MASK ) + v = 1; + else if ( mask == UDP_PORT_MASK ) + v = 2; + else if ( mask == ICMP_PORT_MASK ) + v = 3; + $$ = v; + +predicate-op Conn-Exists +class V +op-types R +eval session_mgr->FindConnection($1) != nullptr + +internal-op Lookup-Conn +class VV +op-types X R +eval auto cid = $1; + Connection* conn = session_mgr->FindConnection(cid); + ValPtr res; + if ( conn ) + res = conn->GetVal(); + else + { + ERROR2("connection ID not a known connection", cid); + res = build_dummy_conn_record(); + } + AssignTarget($$, ZVal(res, res->GetType())); + +predicate-op Is-ICMP-Port +class V +op-types U +eval ($1 & PORT_SPACE_MASK) == ICMP_PORT_MASK + +predicate-op Is-TCP-Port +class V +op-types U +eval ($1 & PORT_SPACE_MASK) == TCP_PORT_MASK + +predicate-op Is-UDP-Port +class V +op-types U +eval ($1 & PORT_SPACE_MASK) == UDP_PORT_MASK + +predicate-op Is-V4-Addr +class V +op-types A +eval $1->AsAddr().GetFamily() == IPv4 + +predicate-op Is-V6-Addr +class V +op-types A +eval $1->AsAddr().GetFamily() == IPv6 + +internal-op Network-Time +class V +op-types D +eval $$ = run_state::network_time; + +internal-op Current-Time +class V +op-types D +eval $$ = util::current_time(); + +predicate-op Reading-Live-Traffic +class X +eval run_state::reading_live + +predicate-op Reading-Traces +class X +eval run_state::reading_traces + +internal-op Sort +op1-read +class V +op-types V +eval if ( $1->Size() > 1 ) + $1->Sort(); + +internal-op Sort +class VV +op-types V V +eval auto vv = $1; + if ( vv->Size() > 1 ) + vv->Sort(); + zeek::Ref(vv); + Unref($$); + $$ = vv; + +internal-op Sort-With-Cmp +op1-read +class VV +op-types V F +eval if ( $1->Size() > 1 ) + $1->Sort($2); + +internal-op Sort-With-Cmp +class VVV +op-types V V F +eval auto vv = $1; + if ( vv->Size() > 1 ) + vv->Sort($2); + zeek::Ref(vv); + Unref($$); + $$ = vv; + +internal-op Starts-With +classes VVV VCV VVC +op-types I S S +eval auto str = $1; + auto sub = $2; + auto str_n = str->Len(); + auto sub_n = sub->Len(); + if ( str_n < sub_n ) + $$ = 0; + else + { + auto str_b = str->Bytes(); + auto sub_b = sub->Bytes(); + int i; + for ( i = 0; i < sub_n; ++i ) + if ( str_b[i] != sub_b[i] ) + break; + $$ = i == sub_n; + } + +internal-op StrCmp +classes VVV VCV VVC +op-types I S S +eval auto s1 = $1; + auto s2 = $2; + $$ = Bstr_cmp(s1->AsString(), s2->AsString()); + +internal-op StrStr +classes VVV VCV VVC +op-types I S S +eval auto big = $1; + auto little = $2; + $$ = 1 + big->AsString()->FindSubstring(little->AsString()); + +macro Cat1Op(lhs, val) + auto& v1 = lhs; + ZVal::DeleteManagedType(v1); + v1 = val; + +internal-op Cat1 +classes VV VC +eval Cat1Op($$, $1) + zeek::Ref(v1.AsString()); + +internal-op Cat1Full +classes VV VC +eval auto formatted_val = ZVal(ZAM_val_cat($1.ToVal(Z_TYPE))); + Cat1Op($$, formatted_val) + +internal-op CatN +class V +eval CatNPre() + int n = aux->n; + size_t max_size = 0; + for ( int i = 0; i < n; ++i ) + max_size += ca[i]->MaxSize(aux->elems[i].ToDirectZVal(frame)); + auto res = new char[max_size + /* slop */ n + 1]; + auto res_p = res; + for ( int i = 0; i < n; ++i ) + ca[i]->RenderInto(aux->elems[i].ToDirectZVal(frame), res_p); + *res_p = '\0'; + auto s = new String(true, reinterpret_cast(res), res_p - res); + Cat1Op($$, ZVal(new StringVal(s))) + +macro CatNPre() + auto aux = Z_AUX; + auto& ca = aux->cat_args; + +macro CatNMid() + auto res = new char[max_size + /* slop */ 10]; + auto res_p = res; + +macro CatNPost(lhs) + *res_p = '\0'; + auto s = new String(true, reinterpret_cast(res), res_p - res); + Cat1Op(lhs, ZVal(new StringVal(s))) + +internal-op Cat2 +class V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(aux->elems[0].ToDirectZVal(frame)); + max_size += ca[1]->MaxSize(aux->elems[1].ToDirectZVal(frame)); + CatNMid() + ca[0]->RenderInto(aux->elems[0].ToDirectZVal(frame), res_p); + ca[1]->RenderInto(aux->elems[1].ToDirectZVal(frame), res_p); + CatNPost($$) + +internal-op Cat3 +class V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(aux->elems[0].ToDirectZVal(frame)); + max_size += ca[1]->MaxSize(aux->elems[1].ToDirectZVal(frame)); + max_size += ca[2]->MaxSize(aux->elems[2].ToDirectZVal(frame)); + CatNMid() + ca[0]->RenderInto(aux->elems[0].ToDirectZVal(frame), res_p); + ca[1]->RenderInto(aux->elems[1].ToDirectZVal(frame), res_p); + ca[2]->RenderInto(aux->elems[2].ToDirectZVal(frame), res_p); + CatNPost($$) + +internal-op Cat4 +class V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(aux->elems[0].ToDirectZVal(frame)); + max_size += ca[1]->MaxSize(aux->elems[1].ToDirectZVal(frame)); + max_size += ca[2]->MaxSize(aux->elems[2].ToDirectZVal(frame)); + max_size += ca[3]->MaxSize(aux->elems[3].ToDirectZVal(frame)); + CatNMid() + ca[0]->RenderInto(aux->elems[0].ToDirectZVal(frame), res_p); + ca[1]->RenderInto(aux->elems[1].ToDirectZVal(frame), res_p); + ca[2]->RenderInto(aux->elems[2].ToDirectZVal(frame), res_p); + ca[3]->RenderInto(aux->elems[3].ToDirectZVal(frame), res_p); + CatNPost($$) + +internal-op Cat5 +class V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(aux->elems[0].ToDirectZVal(frame)); + max_size += ca[1]->MaxSize(aux->elems[1].ToDirectZVal(frame)); + max_size += ca[2]->MaxSize(aux->elems[2].ToDirectZVal(frame)); + max_size += ca[3]->MaxSize(aux->elems[3].ToDirectZVal(frame)); + max_size += ca[4]->MaxSize(aux->elems[4].ToDirectZVal(frame)); + CatNMid() + ca[0]->RenderInto(aux->elems[0].ToDirectZVal(frame), res_p); + ca[1]->RenderInto(aux->elems[1].ToDirectZVal(frame), res_p); + ca[2]->RenderInto(aux->elems[2].ToDirectZVal(frame), res_p); + ca[3]->RenderInto(aux->elems[3].ToDirectZVal(frame), res_p); + ca[4]->RenderInto(aux->elems[4].ToDirectZVal(frame), res_p); + CatNPost($$) + +internal-op Cat6 +class V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(aux->elems[0].ToDirectZVal(frame)); + max_size += ca[1]->MaxSize(aux->elems[1].ToDirectZVal(frame)); + max_size += ca[2]->MaxSize(aux->elems[2].ToDirectZVal(frame)); + max_size += ca[3]->MaxSize(aux->elems[3].ToDirectZVal(frame)); + max_size += ca[4]->MaxSize(aux->elems[4].ToDirectZVal(frame)); + max_size += ca[5]->MaxSize(aux->elems[5].ToDirectZVal(frame)); + CatNMid() + ca[0]->RenderInto(aux->elems[0].ToDirectZVal(frame), res_p); + ca[1]->RenderInto(aux->elems[1].ToDirectZVal(frame), res_p); + ca[2]->RenderInto(aux->elems[2].ToDirectZVal(frame), res_p); + ca[3]->RenderInto(aux->elems[3].ToDirectZVal(frame), res_p); + ca[4]->RenderInto(aux->elems[4].ToDirectZVal(frame), res_p); + ca[5]->RenderInto(aux->elems[5].ToDirectZVal(frame), res_p); + CatNPost($$) + +internal-op Cat7 +class V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(aux->elems[0].ToDirectZVal(frame)); + max_size += ca[1]->MaxSize(aux->elems[1].ToDirectZVal(frame)); + max_size += ca[2]->MaxSize(aux->elems[2].ToDirectZVal(frame)); + max_size += ca[3]->MaxSize(aux->elems[3].ToDirectZVal(frame)); + max_size += ca[4]->MaxSize(aux->elems[4].ToDirectZVal(frame)); + max_size += ca[5]->MaxSize(aux->elems[5].ToDirectZVal(frame)); + max_size += ca[6]->MaxSize(aux->elems[6].ToDirectZVal(frame)); + CatNMid() + ca[0]->RenderInto(aux->elems[0].ToDirectZVal(frame), res_p); + ca[1]->RenderInto(aux->elems[1].ToDirectZVal(frame), res_p); + ca[2]->RenderInto(aux->elems[2].ToDirectZVal(frame), res_p); + ca[3]->RenderInto(aux->elems[3].ToDirectZVal(frame), res_p); + ca[4]->RenderInto(aux->elems[4].ToDirectZVal(frame), res_p); + ca[5]->RenderInto(aux->elems[5].ToDirectZVal(frame), res_p); + ca[6]->RenderInto(aux->elems[6].ToDirectZVal(frame), res_p); + CatNPost($$) + +internal-op Cat8 +class V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(aux->elems[0].ToDirectZVal(frame)); + max_size += ca[1]->MaxSize(aux->elems[1].ToDirectZVal(frame)); + max_size += ca[2]->MaxSize(aux->elems[2].ToDirectZVal(frame)); + max_size += ca[3]->MaxSize(aux->elems[3].ToDirectZVal(frame)); + max_size += ca[4]->MaxSize(aux->elems[4].ToDirectZVal(frame)); + max_size += ca[5]->MaxSize(aux->elems[5].ToDirectZVal(frame)); + max_size += ca[6]->MaxSize(aux->elems[6].ToDirectZVal(frame)); + max_size += ca[7]->MaxSize(aux->elems[7].ToDirectZVal(frame)); + CatNMid() + ca[0]->RenderInto(aux->elems[0].ToDirectZVal(frame), res_p); + ca[1]->RenderInto(aux->elems[1].ToDirectZVal(frame), res_p); + ca[2]->RenderInto(aux->elems[2].ToDirectZVal(frame), res_p); + ca[3]->RenderInto(aux->elems[3].ToDirectZVal(frame), res_p); + ca[4]->RenderInto(aux->elems[4].ToDirectZVal(frame), res_p); + ca[5]->RenderInto(aux->elems[5].ToDirectZVal(frame), res_p); + ca[6]->RenderInto(aux->elems[6].ToDirectZVal(frame), res_p); + ca[7]->RenderInto(aux->elems[7].ToDirectZVal(frame), res_p); + CatNPost($$) + +internal-op Analyzer-Name +classes VV VC +op-types S X +eval auto atype = $1.ToVal(Z_TYPE); + auto val = atype->AsEnumVal(); + Unref($$); + plugin::Component* component = zeek::analyzer_mgr->Lookup(val); + if ( ! component ) + component = zeek::packet_mgr->Lookup(val); + if ( ! component ) + component = zeek::file_mgr->Lookup(val); + if ( component ) + $$ = new StringVal(component->CanonicalName()); + else + $$ = new StringVal(""); + +macro FilesAddOrRemoveAnalyzer(file_id_val, tag, args_val, METHOD) + auto file_id = file_id_val; + using zeek::BifType::Record::Files::AnalyzerArgs; + auto rv = args_val->CoerceTo(AnalyzerArgs); + bool result = zeek::file_mgr->METHOD( + file_id->CheckString(), + zeek::file_mgr->GetComponentTag(tag.ToVal(Z_TYPE).get()), + std::move(rv)); + +macro FilesAddAnalyzer(file_id_val, tag, args_val) + FilesAddOrRemoveAnalyzer(file_id_val, tag, args_val, AddAnalyzer) + +internal-op Files-Add-Analyzer +op1-read +classes VVV VCV +op-types S X R +eval FilesAddAnalyzer($1, $2, $3) + +internal-op Files-Add-Analyzer +class VVVV +side-effects OP_FILES_ADD_ANALYZER_VVV OP_VVV +op-types I S X R +eval FilesAddAnalyzer($1, $2, $3) + $$ = result; + +internal-op Files-Add-Analyzer +class VVCV +op-types I S X R +side-effects OP_FILES_ADD_ANALYZER_VCV OP_VVC +eval FilesAddAnalyzer($1, $2, $3) + $$ = result; + +macro FilesRemoveAnalyzer(file_id_val, tag, args_slot) + FilesAddOrRemoveAnalyzer(file_id_val, tag, args_slot, RemoveAnalyzer) + +internal-op Files-Remove-Analyzer +op1-read +classes VVV VCV +op-types S X R +eval FilesRemoveAnalyzer($1, $2, $3) + +internal-op Files-Remove-Analyzer +class VVVV +op-types I S X R +side-effects OP_FILES_REMOVE_ANALYZER_VVV OP_VVV +eval FilesRemoveAnalyzer($1, $2, $3) + $$ = result; + +internal-op Files-Remove-Analyzer +class VVCV +op-types I S X R +side-effects OP_FILES_REMOVE_ANALYZER_VCV OP_VVC +eval FilesRemoveAnalyzer($1, $2, $3) + $$ = result; + +internal-op Analyzer-Enabled +classes VV VC +op-types I X +eval auto atype = $1.ToVal(Z_TYPE); + auto c = zeek::file_mgr->Lookup(atype->AsEnumVal()); + $$ = c && c->Enabled(); + +internal-op File-Analyzer-Name +classes VV VC +eval auto atype = $1.ToVal(Z_TYPE); + Unref($$.AsString()); + $$ = ZVal(file_mgr->GetComponentNameVal({NewRef{}, atype->AsEnumVal()})); + +internal-op Is-Protocol-Analyzer +classes VV VC +op-types I X +eval auto atype = $1.ToVal(Z_TYPE); + $$ = analyzer_mgr->Lookup(atype->AsEnumVal()) != nullptr; + +internal-op Clear-Table +op1-read +class V +op-types T +eval $1->RemoveAll(); + +internal-op Files-Enable-Reassembly +op1-read +class V +op-types S +eval auto f = $1->CheckString(); + file_mgr->EnableReassembly(f); + +internal-op Files-Set-Reassembly-Buffer +op1-read +classes VV Vi +op-types S U +eval auto f = $1->CheckString(); + file_mgr->SetReassemblyBuffer(f, $2); + +internal-op Files-Set-Reassembly-Buffer +class VVV +op-types I S U +side-effects OP_FILES_SET_REASSEMBLY_BUFFER_VV OP_VV +eval auto f = $1->CheckString(); + $$ = file_mgr->SetReassemblyBuffer(f, $2); + +internal-op Files-Set-Reassembly-Buffer +class VVi +op-types I S U +side-effects OP_FILES_SET_REASSEMBLY_BUFFER_Vi OP_VV_I2 +eval auto f = $1->CheckString(); + $$ = file_mgr->SetReassemblyBuffer(f, $2); + +internal-op Get-Bytes-Thresh +classes VVV VVC +op-types U R I +eval auto a = analyzer::conn_size::GetConnsizeAnalyzer($1); + auto res = 0U; + if ( a ) + res = static_cast(a)->GetByteAndPacketThreshold(true, $2); + $$ = res; + +macro SetBytesThresh(cid, threshold, is_orig) + bool res = false; + auto a = analyzer::conn_size::GetConnsizeAnalyzer(cid); + if ( a ) + { + static_cast(a)->SetByteAndPacketThreshold(threshold, true, is_orig); + res = true; + } + +internal-op Set-Bytes-Thresh +op1-read +classes VVV VVC VCV VCi +op-types R U I +eval SetBytesThresh($1, $2, $3) + +internal-op Set-Bytes-Thresh +class VVVV +op-types I R U I +side-effects OP_SET_BYTES_THRESH_VVV OP_VVV +eval SetBytesThresh($1, $2, $3) + $$ = res; + +internal-op Set-Bytes-Thresh +class VVVC +op-types I R U I +side-effects OP_SET_BYTES_THRESH_VVC OP_VVC +eval SetBytesThresh($1, $2, $3) + $$ = res; + +internal-op Set-Bytes-Thresh +class VVCV +op-types I R U I +side-effects OP_SET_BYTES_THRESH_VCV OP_VVC +eval SetBytesThresh($1, $2, $3) + $$ = res; + +internal-op Set-Bytes-Thresh +class VVCi +op-types I R U I +side-effects OP_SET_BYTES_THRESH_VCi OP_VVC_I2 +eval SetBytesThresh($1, $2, $3) + $$ = res; diff --git a/src/script_opt/ZAM/OPs/aggr-assignments.op b/src/script_opt/ZAM/OPs/aggr-assignments.op new file mode 100644 index 0000000000..c64b0077a6 --- /dev/null +++ b/src/script_opt/ZAM/OPs/aggr-assignments.op @@ -0,0 +1,89 @@ +# Operations corresponding to assigning to elements of aggregates. + +macro VectorElemAssignPre(vec, index) + auto ind = index.AsCount(); + auto vv = vec.AsVector(); + +macro EvalVectorElemAssign(vec, index, val_setup, assign_op) + VectorElemAssignPre(vec, index) + val_setup + if ( ! assign_op ) + ERROR("value used but not set"); + +op Vector-Elem-Assign +op1-read +set-type $1 +class VVV +eval EvalVectorElemAssign($1, $2,, copy_vec_elem(vv, ind, $3, Z_TYPE)) + +op Any-Vector-Elem-Assign +op1-read +set-type $1 +classes VVV VVC +eval EvalVectorElemAssign($1, $2,, vv->Assign(ind, $3.ToVal(Z_TYPE))) + +op Vector-Elem-Assign-Any +op1-read +class VVV +op-types X X a +eval EvalVectorElemAssign($1, $2, auto any_v = $3;, vv->Assign(ind, {NewRef{}, any_v})) + +op Vector-Elem-Assign +op1-read +set-type $2 +class VVC +eval VectorElemAssignPre($1, $2) + (void) copy_vec_elem(vv, ind, $3, Z_TYPE); + +# These versions are used when the constant is the index, not the new value. +op Vector-Elem-Assign +op1-read +set-type $1 +class VVi +op-types V X U +eval auto vv = $1; + if ( ! copy_vec_elem(vv, $3, $2, Z_TYPE) ) + ERROR("value used but not set"); + +op Any-Vector-Elem-Assign +op1-read +set-type $1 +class VVi +op-types V X I +eval auto vv = $1; + if ( ! vv->Assign($3, $2.ToVal(Z_TYPE)) ) + ERROR("value used but not set"); + +op Vector-Elem-Assign-Any +op1-read +class VVi +op-types V a I +eval auto vv = $1; + auto any_v = $2; + vv->Assign($3, {NewRef{}, any_v}); + +internal-op Vector-Slice-Assign +op1-read +class VV +op-types V V +eval ValPtr vec = {NewRef{}, $1}; + auto indices = Z_AUX->ToListVal(frame); + ValPtr vals = {NewRef{}, $2}; + bool iterators_invalidated; + auto error = assign_to_index(std::move(vec), std::move(indices), std::move(vals), iterators_invalidated); + if ( error ) + ERROR(error); + if ( iterators_invalidated ) + WARN("possible loop/iterator invalidation"); + + +internal-op Table-Elem-Assign +op1-read +classes VV VC +op-types T X +eval auto indices = Z_AUX->ToListVal(frame); + auto val = $2.ToVal(Z_TYPE); + bool iterators_invalidated = false; + $1->Assign(std::move(indices), std::move(val), true, &iterators_invalidated); + if ( iterators_invalidated ) + WARN("possible loop/iterator invalidation"); diff --git a/src/script_opt/ZAM/OPs/binary-exprs.op b/src/script_opt/ZAM/OPs/binary-exprs.op new file mode 100644 index 0000000000..17d90f9979 --- /dev/null +++ b/src/script_opt/ZAM/OPs/binary-exprs.op @@ -0,0 +1,104 @@ +# Operations corresponding to binary expressions. + +binary-expr-op Add +op-type I U D S +vector +eval $1 + $2 +eval-type S vector strings; + strings.push_back($1->AsString()); + strings.push_back($2->AsString()); + auto res = new StringVal(concatenate(strings)); + $$ = res; + +binary-expr-op Sub +op-type I U D T +vector +eval $1 - $2 +# +eval-type T auto v = $1->Clone(); + auto s = v.release()->AsTableVal(); + $2->RemoveFrom(s); + $$ = s; + +binary-expr-op Times +op-type I U D +vector +eval $1 * $2 + +binary-expr-op Divide +op-type I U D +vector +# +precheck $2 == 0 +precheck-action ERROR("division by zero"); +eval $1 / $2 + +binary-expr-op Mask +# Signal that this expression only has mixed-type evaluation. +op-type X +explicit-result-type +eval-mixed A I auto mask = static_cast($2); + auto a = $1->AsAddr(); + if ( a.GetFamily() == IPv4 && mask > 32 ) + ERROR(util::fmt("bad IPv4 subnet prefix length: %" PRIu32, mask)); + if ( a.GetFamily() == IPv6 && mask > 128 ) + ERROR(util::fmt("bad IPv6 subnet prefix length: %" PRIu32, mask)); + auto v = make_intrusive(a, mask); + Unref($$.AsSubNet()); + $$.AsSubNetRef() = v.release(); + +binary-expr-op Mod +op-type I U +vector +precheck $2 == 0 +precheck-action ERROR("modulo by zero"); +eval $1 % $2 + +binary-expr-op And-And +op-type I +vector +eval zeek_int_t($1 && $2) + +binary-expr-op Or-Or +op-type I +vector +eval zeek_int_t($1 || $2) + +binary-expr-op And +op-type U P T +vector +eval $1 & $2 +# +eval-type P $$ = new PatternVal(RE_Matcher_conjunction($1->AsPattern(), $2->AsPattern())); +# +eval-type T $$ = $1->Intersection(*$2).release(); + +binary-expr-op Or +op-type U P T +vector +eval $1 | $2 +# +eval-type P $$ = new PatternVal(RE_Matcher_disjunction($1->AsPattern(), $2->AsPattern())); +# +eval-type T auto v = $1->Clone(); + auto s = v.release()->AsTableVal(); + (void) $2->AddTo(s, false, false); + $$ = s; + +binary-expr-op Xor +op-type U +vector +eval $1 ^ $2 + +binary-expr-op Lshift +op-type I U +vector +eval-type I if ( $1 < 0 ) + ERROR("left shifting a negative number is undefined"); + $$ = $1 << $2; +eval $1 << $2 + +binary-expr-op Rshift +op-type I U +vector +eval $1 >> $2 diff --git a/src/script_opt/ZAM/OPs/calls.op b/src/script_opt/ZAM/OPs/calls.op new file mode 100644 index 0000000000..5fcbdda607 --- /dev/null +++ b/src/script_opt/ZAM/OPs/calls.op @@ -0,0 +1,180 @@ +# Operations corresponding to function calls. + +# A call with no arguments and no return value. +internal-op Call0 +op1-read +class X +side-effects +num-call-args 0 + +# A call with no arguments and a return value. +internal-assignment-op Call0 +class V +side-effects OP_CALL0_X OP_X +assign-val v +num-call-args 0 + +# Calls with 1 argument and no return value. +internal-op Call1 +op1-read +classes V C +side-effects +num-call-args 1 + +# Same but with a return value. +internal-assignment-op Call1 +class VV +side-effects OP_CALL1_V OP_V +assign-val v +num-call-args 1 + +internal-assignment-op Call1 +class VC +side-effects OP_CALL1_C OP_C +assign-val v +num-call-args 1 + +# Calls with 2-5 arguments and no return value. +internal-op Call2 +class X +side-effects +num-call-args 2 + +# Same with a return value. +internal-assignment-op Call2 +class V +side-effects OP_CALL2_X OP_X +assign-val v +num-call-args 2 + +internal-op Call3 +class X +side-effects +num-call-args 3 + +# Same with a return value. +internal-assignment-op Call3 +class V +side-effects OP_CALL3_X OP_X +assign-val v +num-call-args 3 + +internal-op Call4 +class X +side-effects +num-call-args 4 + +# Same with a return value. +internal-assignment-op Call4 +class V +side-effects OP_CALL4_X OP_X +assign-val v +num-call-args 4 + +internal-op Call5 +class X +side-effects +num-call-args 5 + +# Same with a return value. +internal-assignment-op Call5 +class V +side-effects OP_CALL5_X OP_X +assign-val v +num-call-args 5 + +# ... and with an arbitrary number of arguments. + +internal-op CallN +class X +side-effects +num-call-args n + +# Same with a return value. +internal-assignment-op CallN +class V +side-effects OP_CALLN_X OP_X +assign-val v +num-call-args n + +# Same, but for indirect calls via a global variable. +internal-op IndCallN +class X +side-effects +indirect-call +num-call-args n + +# Same with a return value. +internal-assignment-op IndCallN +class V +side-effects OP_INDCALLN_X OP_X +assign-val v +indirect-call +num-call-args n + +# And versions with a local variable rather than a global. +internal-op Local-IndCallN +op1-read +class V +side-effects +indirect-local-call +num-call-args n + +internal-assignment-op Local-IndCallN +class VV +side-effects OP_LOCAL_INDCALLN_V OP_V +assign-val v +indirect-local-call +num-call-args n + +# A call made in a "when" context. These always have assignment targets. +# To keep things simple, we just use one generic flavor (for N arguments, +# doing a less-streamlined-but-simpler Val-based assignment). +macro WhenCall(lhs, func) + if ( ! func ) + throw ZAMDelayedCallException(); + auto trigger = Z_FRAME->GetTrigger(); + Val* v = trigger ? trigger->Lookup(Z_AUX->call_expr.get()) : nullptr; + ValPtr vp; + if ( v ) + vp = {NewRef{}, v}; + else + { + auto aux = Z_AUX; + auto current_assoc = Z_FRAME->GetTriggerAssoc(); + auto n = aux->n; + std::vector args; + for ( auto i = 0; i < n; ++i ) + args.push_back(aux->ToVal(frame, i)); + Z_FRAME->SetCall(Z_AUX->call_expr.get()); + /* It's possible that this function will call another that + * itself returns null because *it* is the actual blocker. + * That will set ZAM_error, which we need to ignore. + */ + auto hold_ZAM_error = ZAM_error; + vp = func->Invoke(&args, Z_FRAME); + ZAM_error = hold_ZAM_error; + Z_FRAME->SetTriggerAssoc(current_assoc); + if ( ! vp ) + throw ZAMDelayedCallException(); + } + if ( Z_IS_MANAGED ) + ZVal::DeleteManagedType(lhs); + lhs = ZVal(vp, Z_TYPE); + +internal-op WhenCallN +class V +side-effects +eval WhenCall($$, Z_AUX_FUNC) + +internal-op WhenIndCallN +class VV +op-types X F +side-effects +eval WhenCall($$, $1) + +# Form for when we need to look up the function value at run-time. +internal-op When-ID-IndCallN +class V +side-effects +eval WhenCall($$, Z_AUX_ID->GetVal()->AsFunc()) diff --git a/src/script_opt/ZAM/OPs/coercions.op b/src/script_opt/ZAM/OPs/coercions.op new file mode 100644 index 0000000000..fef6ea8096 --- /dev/null +++ b/src/script_opt/ZAM/OPs/coercions.op @@ -0,0 +1,151 @@ +# Operations corresponding to type coercions. + +direct-unary-op Arith-Coerce ArithCoerce + +internal-op Coerce-UI +class VV +op-types U I +eval auto v = $1; + if ( v < 0 ) + ERROR("underflow converting int to count"); + else + $$ = zeek_uint_t(v); + +internal-op Coerce-UD +class VV +op-types U D +eval auto v = $1; + if ( v < 0.0 ) + ERROR("underflow converting double to count"); + else if ( v > static_cast(UINT64_MAX) ) + ERROR("overflow converting double to count"); + else + $$ = zeek_uint_t(v); + +internal-op Coerce-IU +class VV +op-types I U +eval auto v = $1; + if ( v > INT64_MAX ) + ERROR("overflow converting count to int"); + else + $$ = zeek_int_t(v); + +internal-op Coerce-ID +class VV +op-types I D +eval auto v = $1; + if ( v < static_cast(INT64_MIN) ) + ERROR("underflow converting double to int"); + else if ( v > static_cast(INT64_MAX) ) + ERROR("overflow converting double to int"); + else + $$ = zeek_int_t(v); + +internal-op Coerce-DI +class VV +op-types D I +eval $$ = double($1); + +internal-op Coerce-DU +class VV +op-types D U +eval $$ = double($1); + + +macro EvalCoerceVec(lhs, rhs, coercer) + auto old_v1 = lhs.AsVector(); + lhs.AsVectorRef() = coercer(rhs.AsVector(), Z_LOC); + Unref(old_v1); /* delayed to allow for same value on both sides */ + +internal-op Coerce-UI-Vec +class VV +eval EvalCoerceVec($$, $1, vec_coerce_UI) + +internal-op Coerce-UD-Vec +class VV +eval EvalCoerceVec($$, $1, vec_coerce_UD) + +internal-op Coerce-IU-Vec +class VV +eval EvalCoerceVec($$, $1, vec_coerce_IU) + +internal-op Coerce-ID-Vec +class VV +eval EvalCoerceVec($$, $1, vec_coerce_ID) + +internal-op Coerce-DI-Vec +class VV +eval EvalCoerceVec($$, $1, vec_coerce_DI) + +internal-op Coerce-DU-Vec +class VV +eval EvalCoerceVec($$, $1, vec_coerce_DU) + + +direct-unary-op Record-Coerce RecordCoerce + +internal-op Record-Coerce +class VV +op-types R R +eval auto rt = cast_intrusive(Z_TYPE); + auto v = $1; + auto to_r = coerce_to_record(std::move(rt), v, Z_AUX_MAP); + Unref($$); + $$ = to_r.release(); + +direct-unary-op Table-Coerce TableCoerce + +internal-op Table-Coerce +class VV +op-types T T +eval auto tv = $1; + if ( tv->Size() > 0 ) + ERROR("coercion of non-empty table/set"); + else + { + auto tt = cast_intrusive(Z_TYPE); + AttributesPtr attrs = tv->GetAttrs(); + auto t = make_intrusive(tt, attrs); + Unref($$); + $$ = t.release(); + } + +direct-unary-op Vector-Coerce VectorCoerce + +internal-op Vector-Coerce +class VV +op-types V V +eval if ( $1->Size() > 0 ) + ERROR("coercion of non-empty vector"); + else + { + auto vv = new VectorVal(cast_intrusive(Z_TYPE)); + Unref($$); + $$ = vv; + } + +unary-expr-op To-Any-Coerce +op-type X +set-type $1 +eval AssignTarget($$, ZVal($1.ToVal(Z_TYPE), ZAM::any_base_type)) + +unary-expr-op From-Any-Coerce +no-const +op-type X +set-type $$ +eval auto v = $1.AsAny(); + AssignTarget($$, ZVal({NewRef{}, v}, Z_TYPE)) + +unary-expr-op From-Any-Vec-Coerce +no-const +op-type X +set-type $$ +eval auto vv = $1.AsVector(); + if ( ! vv->Concretize(Z_TYPE->Yield()) ) + ERROR("incompatible vector-of-any"); + else + { + zeek::Ref(vv); + AssignTarget($$, ZVal(vv)) + } diff --git a/src/script_opt/ZAM/OPs/constructors.op b/src/script_opt/ZAM/OPs/constructors.op new file mode 100644 index 0000000000..c55b81c6bc --- /dev/null +++ b/src/script_opt/ZAM/OPs/constructors.op @@ -0,0 +1,251 @@ +# Operations corresponding to aggregated constructors. + +# Table construction requires atypical evaluation of list elements +# using information from their expression specifics. +direct-unary-op Table-Constructor ConstructTable + +macro ConstructTableOrSetPre(width) + auto tt = cast_intrusive(Z_TYPE); + auto new_t = new TableVal(tt, Z_AUX_ATTRS); + auto aux = Z_AUX; + auto n = aux->n; + auto ind_width = width; + +macro ConstructTableOrSetPost(lhs) + auto& t = lhs.AsTableRef(); + Unref(t); + t = new_t; + +internal-op Construct-Table +class Vi +eval ConstructTableOrSetPre($1) + for ( auto i = 0; i < n; ++i ) + { + auto indices = aux->ToIndices(frame, i, ind_width); + auto v = aux->ToVal(frame, i + ind_width); + new_t->Assign(indices, v); + i += ind_width; + } + ConstructTableOrSetPost($$) + +# When tables are constructed, if their &default is a lambda with captures +# then we need to explicitly set up the default. +internal-op Set-Table-Default-Lambda +op1-read +class VV +op-types T X +eval auto tbl = $1; + auto lambda = $2.ToVal(Z_TYPE); + tbl->InitDefaultVal(std::move(lambda)); + +direct-unary-op Set-Constructor ConstructSet + +internal-op Construct-Set +class Vi +eval ConstructTableOrSetPre($1) + for ( auto i = 0; i < n; i += ind_width ) + { + auto indices = aux->ToIndices(frame, i, ind_width); + new_t->Assign(indices, nullptr); + } + ConstructTableOrSetPost($$) + +direct-unary-op Record-Constructor ConstructRecord + +direct-unary-op Rec-Construct-With-Rec ConstructRecordFromRecord + +macro ConstructRecordPost(lhs) + auto& r = lhs.AsRecordRef(); + Unref(r); + r = new RecordVal(cast_intrusive(Z_TYPE), std::move(init_vals)); + +op Construct-Direct-Record +class V +eval auto init_vals = Z_AUX->ToZValVec(frame); + ConstructRecordPost($$) + +op Construct-Known-Record +class V +eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); + ConstructRecordPost($$) + +macro AssignFromRec(lhs_full, rhs_full) + /* The following is defined below, for use by Rec-Assign-Fields */ + SetUpRecFieldOps(lhs_full, rhs_full, lhs_map) + auto is_managed = Z_AUX->is_managed; + for ( size_t i = 0U; i < n; ++i ) + { + auto rhs_i = rhs->RawField(rhs_map[i]); + if ( is_managed[i] ) + zeek::Ref(rhs_i.ManagedVal()); + init_vals[lhs_map[i]] = rhs_i; + } + +op Construct-Known-Record-From +class VV +eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); + AssignFromRec($$, $1) + ConstructRecordPost($$) + +macro DoNetworkTimeInit(slot) + init_vals[slot] = ZVal(run_state::network_time); + +op Construct-Known-Record-With-NT +class Vi +eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); + DoNetworkTimeInit($1) + ConstructRecordPost($$) + +op Construct-Known-Record-With-NT-From +class VVi +eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); + DoNetworkTimeInit($2) + AssignFromRec($$, $1) + ConstructRecordPost($$) + +macro GenInits() + auto init_vals = Z_AUX->ToZValVecWithMap(frame); + for ( auto& fi : *z.aux->field_inits ) + init_vals[fi.first] = fi.second->Generate(); + +op Construct-Known-Record-With-Inits +class V +eval GenInits() + ConstructRecordPost($$) + +op Construct-Known-Record-With-Inits-From +class VV +eval GenInits() + AssignFromRec($$, $1) + ConstructRecordPost($$) + +op Construct-Known-Record-With-Inits-And-NT +class Vi +eval GenInits() + DoNetworkTimeInit($1) + ConstructRecordPost($$) + +op Construct-Known-Record-With-Inits-And-NT-From +class VVi +eval GenInits() + DoNetworkTimeInit($2) + AssignFromRec($$, $1) + ConstructRecordPost($$) + +macro SetUpRecFieldOps(lhs_full, rhs_full, which_lhs_map) + auto lhs = lhs_full.record_val; + auto rhs = rhs_full.record_val; + auto& lhs_map = Z_AUX->which_lhs_map; + auto& rhs_map = Z_AUX->rhs_map; + auto n = rhs_map.size(); + +op Rec-Assign-Fields +op1-read +class VV +eval SetUpRecFieldOps($1, $2, map) + for ( size_t i = 0U; i < n; ++i ) + lhs->RawOptField(lhs_map[i]) = rhs->RawField(rhs_map[i]); + +macro DoManagedRecAssign() + auto is_managed = Z_AUX->is_managed; + for ( size_t i = 0U; i < n; ++i ) + if ( is_managed[i] ) + { + auto& lhs_i = lhs->RawOptField(lhs_map[i]); + auto rhs_i = rhs->RawField(rhs_map[i]); + zeek::Ref(rhs_i.ManagedVal()); + if ( lhs_i ) + ZVal::DeleteManagedType(*lhs_i); + lhs_i = rhs_i; + } + else + lhs->RawOptField(lhs_map[i]) = rhs->RawField(rhs_map[i]); + +op Rec-Assign-Fields-Managed +op1-read +class VV +eval SetUpRecFieldOps($1, $2, map) + DoManagedRecAssign() + +op Rec-Assign-Fields-All-Managed +op1-read +class VV +eval SetUpRecFieldOps($1, $2, map) + for ( size_t i = 0U; i < n; ++i ) + { + auto& lhs_i = lhs->RawOptField(lhs_map[i]); + auto rhs_i = rhs->RawField(rhs_map[i]); + zeek::Ref(rhs_i.ManagedVal()); + if ( lhs_i ) + ZVal::DeleteManagedType(*lhs_i); + lhs_i = rhs_i; + } + +op Rec-Add-Int-Fields +op1-read +class VV +eval SetUpRecFieldOps($1, $2, map) + for ( size_t i = 0U; i < n; ++i ) + lhs->RawField(lhs_map[i]).int_val += rhs->RawField(rhs_map[i]).int_val; + +op Rec-Add-Double-Fields +op1-read +class VV +eval SetUpRecFieldOps($1, $2, map) + for ( size_t i = 0U; i < n; ++i ) + lhs->RawField(lhs_map[i]).double_val += rhs->RawField(rhs_map[i]).double_val; + +op Rec-Add-Fields +op1-read +class VV +eval SetUpRecFieldOps($1, $2, map) + auto& types = Z_AUX->types; + for ( size_t i = 0U; i < n; ++i ) + { + auto& lhs_i = lhs->RawField(lhs_map[i]); + auto rhs_i = rhs->RawField(rhs_map[i]); + auto tag = types[i]->Tag(); + if ( tag == TYPE_INT ) + lhs_i.int_val += rhs_i.int_val; + else if ( tag == TYPE_COUNT ) + lhs_i.uint_val += rhs_i.uint_val; + else + lhs_i.double_val += rhs_i.double_val; + } + +# Special instruction for concretizing vectors that are fields in a +# newly-constructed record. "aux" holds which fields in the record to +# inspect. +op Concretize-Vector-Fields +op1-read +class V +op-types R +eval auto rt = cast_intrusive(Z_TYPE); + auto r = $1; + auto aux = Z_AUX; + auto n = aux->n; + for ( auto i = 0; i < n; ++i ) + { + auto ind = aux->elems[i].IntVal(); + auto v_i = r->GetField(ind); + ASSERT(v_i); + if ( v_i->GetType()->IsUnspecifiedVector() ) + { + const auto& t_i = rt->GetFieldType(ind); + v_i->AsVectorVal()->Concretize(t_i->Yield()); + } + } + +direct-unary-op Vector-Constructor ConstructVector + +internal-op Construct-Vector +class V +op-types V +eval auto new_vv = new VectorVal(cast_intrusive(Z_TYPE)); + auto aux = Z_AUX; + auto n = aux->n; + for ( auto i = 0; i < n; ++i ) + new_vv->Assign(i, aux->ToVal(frame, i)); + auto& vv = $$; + Unref(vv); + vv = new_vv; diff --git a/src/script_opt/ZAM/OPs/indexing.op b/src/script_opt/ZAM/OPs/indexing.op new file mode 100644 index 0000000000..85f3691150 --- /dev/null +++ b/src/script_opt/ZAM/OPs/indexing.op @@ -0,0 +1,212 @@ +# Operations corresponding to indexing of tables, vectors, strings, +# and "any" values. + +op IndexVecBoolSelect +classes VVV VCV +op-types V V V +set-type $$ +eval if ( $1->Size() != $2->Size() ) + ERROR("size mismatch, boolean index and vector"); + else + { + auto vt = cast_intrusive(Z_TYPE); + auto v2 = $1; + auto v3 = $2; + auto v = vector_bool_select(std::move(vt), v2, v3); + Unref($$); + $$ = v.release(); + } + +op IndexVecIntSelect +classes VVV VCV +op-types V V V +set-type $$ +eval auto vt = cast_intrusive(Z_TYPE); + auto v2 = $1; + auto v3 = $2; + auto v = vector_int_select(std::move(vt), v2, v3); + Unref($$); + $$ = v.release(); + +op Index +class VVL +custom-method return CompileIndex(n1, n2, l, false); +no-eval + +op Index +class VCL +custom-method return CompileIndex(n, c, l, false); +no-eval + +op WhenIndex +class VVL +custom-method return CompileIndex(n1, n2, l, true); +no-eval + +op WhenIndex +class VCL +custom-method return CompileIndex(n, c, l, true); +no-eval + +internal-op Index-Vec +class VVV +op-types X V I +eval EvalIndexVec($$, $1, $2) + +macro EvalIndexVec(lhs, rhs_vec, index) + auto& vv = rhs_vec->RawVec(); + zeek_int_t ind = index; + if ( ind < 0 ) + ind += vv.size(); + if ( ind < 0 || ind >= int(vv.size()) ) + ERROR("no such index"); + AssignTarget(lhs, CopyVal(*(vv[ind]))) + +internal-op Index-VecC +class VVi +op-types X V I +eval EvalIndexVec($$, $1, $2) + +internal-op Index-Any-Vec +class VVV +op-types X V I +eval EvalIndexAnyVec($$, $1, $2) + +macro EvalIndexAnyVec(lhs, vec, index) + auto vv = vec; + zeek_int_t ind = index; + if ( ind < 0 ) + ind += vv->Size(); + if ( ind < 0 || ind >= int(vv->Size()) ) + ERROR("no such index"); + AssignTarget(lhs, ZVal(vv->ValAt(ind).release())) + +internal-op Index-Any-VecC +class VVi +op-types X V I +eval EvalIndexAnyVec($$, $1, $2) + +macro WhenIndexResCheck(vec) + if ( vec && IndexExprWhen::evaluating > 0 ) + IndexExprWhen::results.push_back({NewRef{}, vec}); + +internal-op When-Index-Vec +class VVV +op-types X V I +eval EvalIndexAnyVec($$, $1, $2) + WhenIndexResCheck($$.AsVector()) + +internal-op When-Index-VecC +class VVi +op-types X V I +eval EvalIndexAnyVec($$, $1, $2) + WhenIndexResCheck($$.AsVector()) + +macro EvalVecSlice(lhs, vv) + auto vec = vv; + auto v = index_slice(vec, indices.get()); + Unref(lhs); + lhs = v.release(); + +internal-op Index-Vec-Slice +class VV +op-types V V +eval auto indices = Z_AUX->ToListVal(frame); + EvalVecSlice($$, $1) + +internal-op When-Index-Vec-Slice +class VV +op-types V V +eval auto indices = Z_AUX->ToListVal(frame); + EvalVecSlice($$, $1) + WhenIndexResCheck($$) + +internal-op Table-Index +class VV +eval auto indices = Z_AUX->ToListVal(frame); + EvalTableIndex($1, indices) + if ( v ) + AssignTarget($$, BuildVal(v, Z_TYPE)) + +internal-op Table-PatStr-Index +classes VVV VVC +op-types X T S +eval auto vec = ZVal($1->LookupPattern({NewRef{}, $2})); + ZVal::DeleteManagedType($$); + $$ = vec; + +internal-op When-Table-Index +class VV +eval auto indices = Z_AUX->ToListVal(frame); + EvalTableIndex($1, indices) + if ( v ) + { + if ( IndexExprWhen::evaluating > 0 ) + IndexExprWhen::results.emplace_back(v); + AssignTarget($$, BuildVal(v, Z_TYPE)) + } + +macro EvalTableIndex(tbl, index) + auto v = tbl.AsTable()->FindOrDefault(index); + if ( ! v ) + ERROR("no such index"); + +internal-op When-PatStr-Index +class VV +op-types X T +eval auto indices = Z_AUX->ToListVal(frame); + auto arg0 = indices->Idx(0); + auto v = $1->LookupPattern({NewRef{}, arg0->AsStringVal()}); + if ( IndexExprWhen::evaluating > 0 ) + IndexExprWhen::results.emplace_back(v); + AssignTarget($$, BuildVal(v, Z_TYPE)) + +internal-assignment-op Table-Index1 +classes VVV VVC +assign-val v +eval EvalTableIndex($1, $2.ToVal(Z_TYPE)) +# No AssignTarget needed, as this is an assignment-op + +# This version is for a variable v3. +internal-op Index-String +class VVV +op-types S S I +eval EvalIndexString($$, $1, $2) + +macro EvalIndexString(lhs, s, index) + auto str = s->AsString(); + auto len = str->Len(); + auto idx = index; + if ( idx < 0 ) + idx += len; + auto v = str->GetSubstring(idx, 1); + Unref(lhs); + lhs = new StringVal(v ? v : new String("")); + +# This version is for a constant v3. +internal-op Index-StringC +class VVi +op-types S S I +eval EvalIndexString($$, $1, $2) + +internal-op Index-String-Slice +class VV +op-types S S +eval auto str = $1->AsString(); + auto indices = Z_AUX->ToListVal(frame); + auto slice = index_string(str, indices.get()); + Unref($$); + $$ = new StringVal(slice->ToStdString()); + +op AnyIndex +class VVi +op-types X a I +set-type $$ +eval auto lv = $1->AsListVal(); + if ( $2 < 0 || $2 >= lv->Length() ) + reporter->InternalError("bad \"any\" element index"); + ValPtr elem = lv->Idx($2); + if ( CheckAnyType(elem->GetType(), Z_TYPE, Z_LOC) ) + AssignTarget($$, BuildVal(elem, Z_TYPE)) + else + ZAM_error = true; diff --git a/src/script_opt/ZAM/OPs/internal.op b/src/script_opt/ZAM/OPs/internal.op new file mode 100644 index 0000000000..dd29579ee6 --- /dev/null +++ b/src/script_opt/ZAM/OPs/internal.op @@ -0,0 +1,124 @@ +# Internal operations not directly driven off of AST elements. + +# These two are only needed for type-based switch statements. Could think +# about replacing them using CoerceFromAnyExpr. +op Assign-Any +classes VV VC +set-type $1 +op-types a X +eval auto v = $1.ToVal(Z_TYPE); + $$ = v.release(); + +# Lazy way to assign without having to track the specific type of +# a constant. +internal-op Assign-Const +class VC +eval AssignTarget($$, BuildVal($1.ToVal(Z_TYPE), Z_TYPE)) + +internal-assignment-op Load-Val +class Vi +assign-val v +eval auto& v = Z_FRAME->GetElement($1); + +internal-assignment-op Load-Global +# We don't use GlobalVal() for the assignment because we want to leverage +# the bookkeeping that assign-val gives us in terms of memory management. +class Vg +assign-val v +eval auto& v = GlobalID($1)->GetVal(); + if ( ! v ) + ERROR2("value used but not set", Z_AUX_ID.get()); + +# We need a special form here for loading global types, as they don't +# fit the usual template. +internal-op Load-Global-Type +class Vg +op-types t I +eval auto& v = $$; + Unref(v); + auto& t = GlobalID($1)->GetType(); + v = new TypeVal(t, true); + +internal-op Load-Capture +class Vi +eval $$ = Z_FRAME->GetFunction()->GetCapturesVec()[$1]; + +internal-op Load-Managed-Capture +class Vi +eval auto& lhs = $$; + auto& rhs = Z_FRAME->GetFunction()->GetCapturesVec()[$1]; + zeek::Ref(rhs.ManagedVal()); + ZVal::DeleteManagedType(lhs); + lhs = rhs; + +internal-op Store-Global +op1-internal +class g +eval GlobalID($1)->SetVal(GlobalVal($1).ToVal(Z_TYPE)); + +# Both of these have the LHS as v2 not v1, to keep with existing +# conventions of OP_VV_I2 op type (as opposed to OP_VV_I1_V2, which doesn't +# currently exist, and would be a pain to add). +internal-op Store-Capture +op1-read +class Vi +eval Z_FRAME->GetFunction()->GetCapturesVec()[$2] = $1; + +internal-op Store-Managed-Capture +op1-read +class Vi +eval auto& lhs = Z_FRAME->GetFunction()->GetCapturesVec()[$2]; + auto& rhs = $1; + zeek::Ref(rhs.ManagedVal()); + ZVal::DeleteManagedType(lhs); + lhs = rhs; + + +internal-op Copy-To +class VC +set-type $1 +eval AssignTarget($$, CopyVal($1)) + +internal-op GoTo +class b +eval $1 + +internal-op Hook-Break +class X +eval flow = FLOW_BREAK; + pc = end_pc; + DO_ZAM_PROFILE + continue; + +# Slot 2 gives frame size. +internal-op Lambda +class Vi +op-types F I +eval auto& primary_func = Z_AUX_PRIMARY_FUNC; + auto& body = primary_func->GetBodies()[0].stmts; + ASSERT(body->Tag() == STMT_ZAM); + auto lamb = make_intrusive(Z_AUX_ID); + lamb->AddBody(body, $1); + lamb->SetName(Z_AUX_LAMBDA_NAME.c_str()); + auto& aux = Z_AUX; + if ( aux->n > 0 ) + { + auto captures = std::make_unique>(); + for ( auto i = 0; i < aux->n; ++i ) + { + auto slot = aux->elems[i].Slot(); + if ( slot >= 0 ) + { + auto& cp = frame[slot]; + if ( aux->elems[i].IsManaged() ) + zeek::Ref(cp.ManagedVal()); + captures->push_back(cp); + } + else + /* Used for when-locals. */ + captures->push_back(ZVal()); + } + lamb->CreateCaptures(std::move(captures)); + } + Unref($$); + $$ = lamb.release(); diff --git a/src/script_opt/ZAM/OPs/iterations.op b/src/script_opt/ZAM/OPs/iterations.op new file mode 100644 index 0000000000..92c4895b62 --- /dev/null +++ b/src/script_opt/ZAM/OPs/iterations.op @@ -0,0 +1,124 @@ +# Operations corresponding to iterations. + +internal-op Init-Table-Loop +op1-read +class Vf +op-types T I +eval $2.BeginLoop({NewRef{}, $1}, frame, Z_AUX); + +internal-op Next-Table-Iter +op1-read +class fb +eval NextTableIterPre($1, $2) + $1.NextIter(); + +macro NextTableIterPre(iter, BRANCH) + if ( iter.IsDoneIterating() ) + BRANCH + +internal-op Next-Table-Iter-No-Vars +op1-read +class fb +eval NextTableIterPre($1, $2) + $1.IterFinished(); + +internal-op Next-Table-Iter-Val-Var +# v1 = slot of the "ValueVar" +class Vfb +eval NextTableIterPre($1, $2) + AssignTarget($$, $1.IterValue()); + $1.NextIter(); + +internal-op Next-Table-Iter-Val-Var-No-Vars +# v1 = slot of the "ValueVar" +class Vfb +eval NextTableIterPre($1, $2) + AssignTarget($$, $1.IterValue()); + $1.IterFinished(); + + +internal-op Init-Vector-Loop +op1-read +class Vs +op-types V I +eval auto& vv = $1->RawVec(); + $2.InitLoop(&vv); + +macro NextVectorIterCore(info, BRANCH) + if ( info.IsDoneIterating() ) + BRANCH + const auto& vv = *info.vv; + if ( ! vv[info.iter] ) + { /* Account for vector hole. Re-execute for next position. */ + info.IterFinished(); + REDO + } + +internal-op Next-Vector-Iter +# v1 = iteration variable +class Vsb +op-types U I I +eval NextVectorIterCore($1, $2) + $$ = $1.iter; + $1.IterFinished(); + +internal-op Next-Vector-Blank-Iter +op1-internal +class sb +eval NextVectorIterCore($1, $2) + $1.IterFinished(); + +internal-op Next-Vector-Iter-Val-Var +# v1 = iteration variable +# v2 = value variable +op1-read-write +class VVsb +op-types U X I I +eval NextVectorIterCore($2, $3) + $$ = $2.iter; + if ( Z_IS_MANAGED ) + $1 = BuildVal(vv[$2.iter]->ToVal(Z_TYPE), Z_TYPE); + else + $1 = *vv[$2.iter]; + $2.IterFinished(); + +internal-op Next-Vector-Blank-Iter-Val-Var +# v1 = value variable +class Vsb +eval NextVectorIterCore($1, $2) + if ( Z_IS_MANAGED ) + $$ = BuildVal(vv[$1.iter]->ToVal(Z_TYPE), Z_TYPE); + else + $$ = *vv[$1.iter]; + $1.IterFinished(); + + +internal-op Init-String-Loop +op1-read +classes Vs Cs +op-types S I +eval $2.InitLoop($1->AsString()); + +internal-op Next-String-Iter +# v1 = iteration variable +class Vsb +op-types S I I +eval if ( $1.IsDoneIterating() ) + $2 + auto bytes = (const char*) $1.s->Bytes() + $1.iter; + auto sv = new StringVal(1, bytes); + Unref($$); + $$ = sv; + $1.IterFinished(); + +internal-op Next-String-Blank-Iter +op1-internal +class sb +eval if ( $1.IsDoneIterating() ) + $2 + $1.IterFinished(); + +internal-op End-Table-Loop +op1-internal +class f +eval $1.Clear(); diff --git a/src/script_opt/ZAM/OPs/macros.op b/src/script_opt/ZAM/OPs/macros.op new file mode 100644 index 0000000000..92d2eedba5 --- /dev/null +++ b/src/script_opt/ZAM/OPs/macros.op @@ -0,0 +1,74 @@ +# General-purpose macros. Those that are specific to a group of instructions +# are defined with those templates rather than appearing here. + +# Macros for information associated with the current instruction. + +# The Val frame used to pass in arguments. +macro Z_FRAME f + +# The main type. +macro Z_TYPE z.GetType() + +# Whether it's managed. +macro Z_IS_MANAGED *(z.is_managed) + +# Secondary type. +macro Z_TYPE2 z.GetType2() + +# Auxiliary information. +macro Z_AUX z.aux +macro Z_AUX_ID z.aux->id_val +macro Z_AUX_FUNC z.aux->func +macro Z_AUX_MAP z.aux->map +macro Z_AUX_ATTRS z.aux->attrs +macro Z_AUX_WHEN_INFO z.aux->wi +macro Z_AUX_EVENT_HANDLER z.aux->event_handler +macro Z_AUX_PRIMARY_FUNC z.aux->lambda->PrimaryFunc() +macro Z_AUX_LAMBDA_NAME z.aux->lambda->Name() + +# Location in the original script. +macro Z_LOC z.loc + +macro SET_RET_TYPE(type) ret_type = type; + +macro INDEX_LIST zam_index_val_list + +macro ERROR(msg) ZAM_run_time_error(Z_LOC, msg) +macro ERROR2(msg, obj) ZAM_run_time_error(Z_LOC, msg, obj) + +macro WARN(msg) ZAM_run_time_warning(Z_LOC, msg) + +# The following abstracts the process of creating a frame-assignable value. +macro BuildVal(v, t) ZVal(v, t) + +# Returns a memory-managed-if-necessary copy of an existing value. +macro CopyVal(v) (Z_IS_MANAGED ? BuildVal((v).ToVal(Z_TYPE), Z_TYPE) : (v)) + +# Managed assignments to the given target. +macro AssignTarget(target, v) { + if ( Z_IS_MANAGED ) + { + /* It's important to hold a reference to v here prior + to the deletion in case target points to v. */ + auto v2 = v; + ZVal::DeleteManagedType(target); + target = v2; + } + else + target = v; + } + +macro Branch(target) { DO_ZAM_PROFILE; pc = target; continue; } + +macro REDO { --pc; /* so we then increment to here again */ break; } + +macro GlobalID(g) globals[g].id +macro GlobalVal(g) frame[globals[g].slot] + +macro StepIter(slot) step_iters[slot] +macro TableIter(slot) (*tiv_ptr)[slot] + +macro DirectField(r, f) r->RawField(f) +macro DirectOptField(r, f) r->RawOptField(f) + +macro LogEnum(v) v.ToVal(ZAM::log_ID_enum_type) diff --git a/src/script_opt/ZAM/OPs/non-uniform.op b/src/script_opt/ZAM/OPs/non-uniform.op new file mode 100644 index 0000000000..f53cc6f57a --- /dev/null +++ b/src/script_opt/ZAM/OPs/non-uniform.op @@ -0,0 +1,267 @@ +# Operations corresponding to non-uniform expressions. + +assign-op Field +class R +field-op +assign-val v +eval auto r = $1.AsRecord(); + auto& rv = DirectOptField(r, $2); + ZVal v; + if ( ! rv ) + { + auto def = r->GetType()->FieldDefault($2); + if ( def ) + v = ZVal(def, Z_TYPE); + else + ERROR(util::fmt("field value missing: $%s", r->GetType()->AsRecordType()->FieldName($2))); + } + else + v = *rv; + +expr-op Has-Field +class VRi +includes-field-op +no-eval + +internal-op Has-Field +class VRi +op-types I R I +eval $$ = $1->HasField($2); + +internal-op Has-Field +class VRii +op-types R R I I +eval DirectOptField($$, $2) = ZVal(zeek_int_t($1->HasField($3))); + +# The following generates an assignment version of Has-Field that we +# don't use (because we need the one above that uses "includes-field-op") +# but lets us compress the two conditionals. +predicate-op Has-Field +class Vi +op-types R I +eval $1->HasField($2) + +predicate-op Table-Has-Elements +class V +op-types T +eval $1->Size() > 0 + +predicate-op Vector-Has-Elements +class V +op-types V +eval $1->Size() > 0 + +expr-op In +class VVV +custom-method return CompileInExpr(n1, n2, n3); +no-eval + +expr-op In +class VCV +custom-method return CompileInExpr(n1, c, n2); +no-eval + +expr-op In +class VVC +custom-method return CompileInExpr(n1, n2, c); +no-eval + +internal-op P-In-S +classes VVV VCV VVC +op-types I P S +eval $$ = $1->MatchAnywhere($2->AsString()) != 0; + +internal-op Str-In-Pat-Tbl +classes VVV VCV +op-types I S T +eval $$ = $2->MatchPattern({NewRef{}, $1}); + +internal-op S-In-S +classes VVV VCV VVC +op-types I S S +eval auto sc = reinterpret_cast($1->CheckString()); + auto cmp = util::strstr_n($2->Len(), $2->Bytes(), $1->Len(), sc); + $$ = cmp != -1; + +internal-op A-In-S +classes VVV VCV VVC +op-types I A N +eval $$ = $2->Contains($1->AsAddr()); + + +# Handled differently because of the unusual middle argument. +op L-In-T +class VLV +custom-method return CompileInExpr(n1, l, n2); +no-eval + +op L-In-T +class VLC +custom-method return CompileInExpr(n, l, c); +no-eval + +op L-In-Vec +class VLV +custom-method return CompileInExpr(n1, l, n2); +no-eval + +op L-In-Vec +class VLC +custom-method return CompileInExpr(n, l, c); +no-eval + + +predicate-op Val-Is-In-Table +class VV +op-types X T +eval $2->Find($1.ToVal(Z_TYPE)) != nullptr + +# Variants for indexing two values, one of which might be a constant. +# We set the instructions's *second* type to be that of the first variable +# index. We get the type of the second variable (if any) by digging it +# out of the table's type. For a constant in either position, we use +# the main instruction type, as always. + +macro EvalVal2InTableCore(op1, op2) + INDEX_LIST->Clear(); + INDEX_LIST->Append(op1); + INDEX_LIST->Append(op2); + +macro EvalVal2InTableAssignCore(lhs, tbl) + lhs.AsIntRef() = tbl.AsTable()->Find(INDEX_LIST) != nullptr; + +macro EvalVal2InTablePre(op1, op2, tbl) + auto& tt_ind = tbl.AsTable()->GetType()->AsTableType()->GetIndexTypes(); + EvalVal2InTableCore(op1.ToVal(Z_TYPE2), op2.ToVal(tt_ind[1])) + +internal-op Val2-Is-In-Table +class VVVV +eval EvalVal2InTablePre($1,$2,$3) + EvalVal2InTableAssignCore($$, $3) + +internal-op Val2-Is-In-Table-Cond +op1-read +class VVVb +eval EvalVal2InTablePre($1,$2,$3) + EvalVal2InTableCond($3, INDEX_LIST, $4, !) + +macro EvalVal2InTableCond(tbl, op, BRANCH, negate) + if ( negate tbl.AsTable()->Find(op) ) + BRANCH + +internal-op Val2-Is-Not-In-Table-Cond +op1-read +class VVVb +eval EvalVal2InTablePre($1,$2,$3) + EvalVal2InTableCond($3, INDEX_LIST, $4,) + +internal-op Val2-Is-In-Table +class VVVC +eval EvalVal2InTableCore($1.ToVal(Z_TYPE2), $3.ToVal(Z_TYPE)) + EvalVal2InTableAssignCore($$, $2) + +internal-op Val2-Is-In-Table +class VVCV +eval EvalVal2InTableCore($2.ToVal(Z_TYPE), $1.ToVal(Z_TYPE2)) + EvalVal2InTableAssignCore($$, $3) + +internal-op Val2-Is-In-Table-Cond +op1-read +class VVbC +eval EvalVal2InTableCore($1.ToVal(Z_TYPE2), $4.ToVal(Z_TYPE)) + EvalVal2InTableCond($2, INDEX_LIST, $3, !) + +internal-op Val2-Is-In-Table-Cond +op1-read +class VVCb +eval EvalVal2InTableCore($3.ToVal(Z_TYPE), $1.ToVal(Z_TYPE2)) + EvalVal2InTableCond($2, INDEX_LIST, $4, !) + +internal-op Val2-Is-Not-In-Table-Cond +op1-read +class VVbC +eval EvalVal2InTableCore($1.ToVal(Z_TYPE2), $4.ToVal(Z_TYPE)) + EvalVal2InTableCond($2, INDEX_LIST, $3, ) + +internal-op Val2-Is-Not-In-Table-Cond +op1-read +class VVCb +eval EvalVal2InTableCore($3.ToVal(Z_TYPE), $1.ToVal(Z_TYPE2)) + EvalVal2InTableCond($2, INDEX_LIST, $4, ) + + +predicate-op Const-Is-In-Table +class VC +op-types T X +eval $1->Find($2.ToVal(Z_TYPE)) != nullptr + +internal-op List-Is-In-Table +classes VV VC +op-types I T +eval auto indices = Z_AUX->ToListVal(frame); + $$ = $1->Find(std::move(indices)) != nullptr; + +internal-op Val-Is-In-Vector +class VVV +op-types I I V +eval auto vec = $2; + auto ind = $1; + $$ = vec->Has(ind); + +internal-op Const-Is-In-Vector +class VCV +op-types I I V +eval auto vec = $2; + auto ind = $1; + $$ = vec->Has(ind); + +expr-op Cond +class VVVV +op-types X I X X +set-type $2 +eval AssignTarget($$, $1 ? CopyVal($2) : CopyVal($3)) + +expr-op Cond +class VVVC +op-types X I X X +set-type $2 +eval AssignTarget($$, $1 ? CopyVal($2) : CopyVal($3)) + +expr-op Cond +class VVCV +op-types X I X X +set-type $2 +eval AssignTarget($$, $1 ? CopyVal($2) : CopyVal($3)) + +op Bool-Vec-Cond +class VVVV +op-types V V V V +set-type $2 +eval auto& vsel = $1->RawVec(); + auto& v1 = $2->RawVec(); + auto& v2 = $3->RawVec(); + auto n = v1.size(); + auto res = new vector>(n); + for ( auto i = 0U; i < n; ++i ) + if ( vsel[i] ) + (*res)[i] = vsel[i]->AsInt() ? v1[i] : v2[i]; + auto& full_res = $$; + Unref(full_res); + full_res = new VectorVal(cast_intrusive(Z_TYPE), res); + +# Our instruction format doesn't accommodate two constants, so for +# the singular case of a V ? C1 : C2 conditional, we split it into +# two operations, V ? C1 and !V ? C2. +op CondC1 +class VVC +op-types X I X +set-type $$ +eval if ( $1 ) + AssignTarget($$, CopyVal($2)) + +op CondC2 +class VVC +op-types X I X +set-type $$ +eval if ( ! $1 ) + AssignTarget($$, CopyVal($2)) diff --git a/src/script_opt/ZAM/OPs/rel-exprs.op b/src/script_opt/ZAM/OPs/rel-exprs.op new file mode 100644 index 0000000000..d8fdd4d94b --- /dev/null +++ b/src/script_opt/ZAM/OPs/rel-exprs.op @@ -0,0 +1,55 @@ +# Operations corresponding to relational expressions. + +rel-expr-op LT +op-type I U D S T A +vector +eval $1 < $2 +eval-type S Bstr_cmp($1->AsString(), $2->AsString()) < 0 +eval-type T $1->IsSubsetOf(*$2) && $1->Size() < $2->Size() +eval-type A $1->AsAddr() < $2->AsAddr() + +rel-expr-op LE +op-type I U D S T A +vector +eval $1 <= $2 +eval-type S Bstr_cmp($1->AsString(), $2->AsString()) <= 0 +eval-type T $1->IsSubsetOf(*$2) +eval-type A $1->AsAddr() < $2->AsAddr() || $1->AsAddr() == $2->AsAddr() + +rel-expr-op EQ +op-type I U D S T A N F +vector +eval $1 == $2 +eval-type S Bstr_cmp($1->AsString(), $2->AsString()) == 0 +eval-type T $1->EqualTo(*$2) +eval-type A $1->AsAddr() == $2->AsAddr() +eval-type N $1->AsSubNet() == $2->AsSubNet() +eval-type F util::streq($1->Name(), $2->Name()) +eval-mixed P S $1->MatchExactly($2->AsString()) + +rel-expr-op NE +op-type I U D S T A N F +vector +eval $1 != $2 +eval-type S Bstr_cmp($1->AsString(), $2->AsString()) != 0 +eval-type T ! $1->EqualTo(*$2) +eval-type A $1->AsAddr() != $2->AsAddr() +eval-type N $1->AsSubNet() != $2->AsSubNet() +eval-type F ! util::streq($1->Name(), $2->Name()) +eval-mixed P S ! $1->MatchExactly($2->AsString()) + +# Note, canonicalization means that GE and GT shouldn't occur +# for Sets (type T). +rel-expr-op GE +op-type I U D S A +vector +eval $1 >= $2 +eval-type S Bstr_cmp($1->AsString(), $2->AsString()) >= 0 +eval-type A ! ($1->AsAddr() < $2->AsAddr()) + +rel-expr-op GT +op-type I U D S A +vector +eval $1 > $2 +eval-type S Bstr_cmp($1->AsString(), $2->AsString()) > 0 +eval-type A ! ($1->AsAddr() < $2->AsAddr()) && $1->AsAddr() != $2->AsAddr() diff --git a/src/script_opt/ZAM/OPs/script-idioms.op b/src/script_opt/ZAM/OPs/script-idioms.op new file mode 100644 index 0000000000..040e4bfab5 --- /dev/null +++ b/src/script_opt/ZAM/OPs/script-idioms.op @@ -0,0 +1,57 @@ +# Operations corresponding to scripting idioms / known script functions. + +internal-op MinU +classes VVV VVC +op-types U U U +eval $$ = std::min($1, $2); + +internal-op MinI +classes VVV VVC +op-types I I I +eval $$ = std::min($1, $2); + +internal-op MinD +classes VVV VVC +op-types D D D +eval $$ = std::min($1, $2); + +internal-op MaxU +classes VVV VVC +op-types U U U +eval $$ = std::max($1, $2); + +internal-op MaxI +classes VVV VVC +op-types I I I +eval $$ = std::max($1, $2); + +internal-op MaxD +classes VVV VVC +op-types D D D +eval $$ = std::max($1, $2); + +internal-op Func-Id-String +class VV +op-types S R +eval auto id_rec = $1; + auto orig_h = DirectField(id_rec, 0).AsAddr()->AsAddr().AsString(); + auto resp_h = DirectField(id_rec, 2).AsAddr()->AsAddr().AsString(); + auto orig_p = static_cast(DirectField(id_rec, 1).AsCount()) & ~PORT_SPACE_MASK; + auto resp_p = static_cast(DirectField(id_rec, 3).AsCount()) & ~PORT_SPACE_MASK; + /* Maximum address size is for IPv6 with no compression. Each + * 8 16-bit hex elements plus 7 colons between them plus the two []'s + * = 8*4 + 7 + 2 = 41 characters. + * + * Maximum port size is 5. + * + * Two of these = 2*41 + 2*5 = 92. + * Other delimiters: two ':', one ' < ' for 5 more. + * + * TOTAL: 97 characters. + * + * We use considerably more for safety. + */ + char buf[128]; + snprintf(buf, sizeof buf, "%s:%u > %s:%u", orig_h.c_str(), orig_p, resp_h.c_str(), resp_p); + Unref($$); + $$ = new StringVal(buf); diff --git a/src/script_opt/ZAM/OPs/stmts.op b/src/script_opt/ZAM/OPs/stmts.op new file mode 100644 index 0000000000..37d99b6c71 --- /dev/null +++ b/src/script_opt/ZAM/OPs/stmts.op @@ -0,0 +1,339 @@ +# Operations corresponding to statements, other than iterations. + +macro EvalScheduleArgs(time, is_delta, build_args) + if ( ! run_state::terminating ) + { + double dt = time; + if ( is_delta ) + dt += run_state::network_time; + auto handler = EventHandlerPtr(Z_AUX_EVENT_HANDLER); + ValVec args; + build_args + auto timer = new ScheduleTimer(handler, std::move(args), dt); + timer_mgr->Add(timer); + } + +macro EvalSchedule(time, is_delta) + EvalScheduleArgs(time, is_delta, Z_AUX->FillValVec(args, frame);) + +op Schedule +class ViHL +op-types D I X X +op1-read +custom-method return CompileSchedule(n, nullptr, i, h, l); +eval EvalSchedule($1, $2) + +op Schedule +class CiHL +op-types D I X X +op1-read +custom-method return CompileSchedule(nullptr, c, i, h, l); +eval EvalSchedule($1, $2) + +internal-op Schedule0 +classes ViH CiH +op-types D I X +op1-read +eval EvalScheduleArgs($1, $2,) + +macro QueueEvent(eh, args) + if ( *eh ) + event_mgr.Enqueue(eh, std::move(args)); + +op Event +class HL +op1-read +custom-method return CompileEvent(h, l); +eval ValVec args; + Z_AUX->FillValVec(args, frame); + QueueEvent(Z_AUX_EVENT_HANDLER, args); + +internal-op Event0 +class X +eval ValVec args(0); + QueueEvent(Z_AUX_EVENT_HANDLER, args); + +internal-op Event1 +class V +op1-read +eval ValVec args(1); + args[0] = $1.ToVal(Z_TYPE); + QueueEvent(Z_AUX_EVENT_HANDLER, args); + +internal-op Event2 +class VV +op1-read +eval ValVec args(2); + args[0] = $1.ToVal(Z_TYPE); + args[1] = $2.ToVal(Z_TYPE2); + QueueEvent(Z_AUX_EVENT_HANDLER, args); + +internal-op Event3 +class VVV +op1-read +eval ValVec args(3); + auto& aux = Z_AUX; + args[0] = $1.ToVal(Z_TYPE); + args[1] = $2.ToVal(Z_TYPE2); + args[2] = $3.ToVal(aux->elems[2].GetType()); + QueueEvent(Z_AUX_EVENT_HANDLER, args); + +internal-op Event4 +class VVVV +op1-read +eval ValVec args(4); + auto& aux = Z_AUX; + args[0] = $1.ToVal(Z_TYPE); + args[1] = $2.ToVal(Z_TYPE2); + args[2] = $3.ToVal(aux->elems[2].GetType()); + args[3] = $4.ToVal(aux->elems[3].GetType()); + QueueEvent(Z_AUX_EVENT_HANDLER, args); + + +op Return +class X +eval EvalReturn(nullptr,) + +macro EvalReturn(val, type) + ret_u = val; + type + DO_ZAM_PROFILE + pc = end_pc; + continue; + +op Return +op1-read +classes V C +set-type $$ +eval EvalReturn(&$$, SET_RET_TYPE(Z_TYPE)) + +op When-Return +class X +eval static auto any_val = ZVal(); + EvalReturn(&any_val,); + + +# Branch on the value of v1 using switch table v2, with default branch to v3 + +macro EvalSwitchBody(index, branch, cases, postscript) + { + auto t = cases[index]; + if ( t.find(v) == t.end() ) + pc = branch; + else + pc = t[v]; + postscript + DO_ZAM_PROFILE + continue; + } + +internal-op SwitchI +op1-read +class Vii +op-types I I I +eval auto v = $1; + EvalSwitchBody($2, $3, int_cases,) + +internal-op SwitchU +op1-read +class Vii +op-types U I I +eval auto v = $1; + EvalSwitchBody($2, $3, uint_cases,) + +internal-op SwitchD +op1-read +class Vii +op-types D I I +eval auto v = $1; + EvalSwitchBody($2, $3, double_cases,) + +internal-op SwitchS +op1-read +class Vii +op-types S I I +eval auto vs = $1->AsString()->Render(); + std::string v(vs); + EvalSwitchBody($2, $3, str_cases,delete[] vs;) + +internal-op SwitchA +op1-read +class Vii +op-types A I I +eval auto v = $1->AsAddr().AsString(); + EvalSwitchBody($2, $3, str_cases,) + +internal-op SwitchN +op1-read +class Vii +op-types N I I +eval auto v = $1->AsSubNet().AsString(); + EvalSwitchBody($2, $3, str_cases,) + + +internal-op Determine-Type-Match +class VV +op-types I a +eval auto& aux = Z_AUX; + int match = -1; + for ( int i = 0; i < aux->n; ++i ) + { + auto& el = aux->elems[i]; + auto& et = el.GetType(); + if ( can_cast_value_to_type($1, et.get()) ) + { + match = i; + if ( el.Slot() >= 0 ) + { + auto& tv = frame[el.Slot()]; + if ( el.IsManaged() ) + Unref(tv.ManagedVal()); + tv = ZVal(cast_value_to_type($1, et.get()), et); + } + break; + } + } + $$ = match; + +op CheckAnyLen +op1-read +class Vi +op-types L U +eval auto v = $1; + if ( v->Vals().size() != $2 ) + ERROR("mismatch in list lengths"); + +op Print +class O +eval do_print_stmt(Z_AUX->ToValVec(frame)); +method-post z.aux = v->aux; + +op Print1 +op1-read +classes V C +set-type $$ +eval std::vector vals; + vals.push_back($$.ToVal(Z_TYPE)); + do_print_stmt(vals); + + +internal-op If-Else +op1-read +class Vb +op-types I I +eval if ( ! $1 ) $2 + +internal-op If +op1-read +class Vb +op-types I I +eval if ( ! $1 ) $2 + +internal-op If-Not +op1-read +class Vb +op-types I I +eval if ( $1 ) $2 + + +op AddStmt +op1-read +class VO +eval auto indices = Z_AUX->ToListVal(frame); + EvalAddStmt($1, indices) +method-post z.aux = v->aux; + +macro EvalAddStmt(lhs, ind) + auto index = ind; + bool iterators_invalidated = false; + lhs.AsTable()->Assign(std::move(index), nullptr, true, &iterators_invalidated); + if ( iterators_invalidated ) + WARN("possible loop/iterator invalidation"); + +op AddStmt1 +op1-read +set-type $1 +classes VV VC +eval EvalAddStmt($1, $2.ToVal(Z_TYPE)) + + +op ClearTable +op1-read +class V +op-types T +eval $1->RemoveAll(); + +op ClearVector +op1-read +class V +op-types V +eval $1->Resize(0); + + +op DelTable +op1-read +class VO +op-types T X +eval auto indices = Z_AUX->ToListVal(frame); + bool iterators_invalidated = false; + $1->Remove(*indices, true, &iterators_invalidated); + if ( iterators_invalidated ) + WARN("possible loop/iterator invalidation"); +method-post z.aux = v->aux; + +op DelField +op1-read +class Vi +op-types R I +eval $1->Remove($2); + + +internal-op Init-Record +class V +op-types R +eval auto r = new RecordVal(cast_intrusive(Z_TYPE)); + Unref($$); + $$ = r; + +internal-op Init-Vector +class V +op-types V +eval auto vt = cast_intrusive(Z_TYPE); + auto vec = new VectorVal(std::move(vt)); + Unref($$); + $$ = vec; + +internal-op Init-Table +class V +op-types T +eval auto tt = cast_intrusive(Z_TYPE); + auto t = new TableVal(tt, Z_AUX_ATTRS); + Unref($$); + $$ = t; + +op When +class V +op1-read +op-types F +eval BuildWhen($1, -1.0) + +op When-Timeout +classes VV VC +op1-read +op-types F D +eval BuildWhen($1, $2) + +macro BuildWhen(zf, timeout) + auto& aux = Z_AUX; + auto wi = Z_AUX_WHEN_INFO; + FuncPtr func{NewRef{}, zf}; + auto lambda = make_intrusive(func); + wi->Instantiate(std::move(lambda)); + std::vector local_aggrs; + for ( int i = 0; i < aux->n; ++i ) + { + auto v = aux->ToVal(frame, i); + if ( v ) + local_aggrs.push_back(v); + } + (void)make_intrusive(wi, wi->WhenExprGlobals(), local_aggrs, timeout, Z_FRAME, Z_LOC->Loc()); diff --git a/src/script_opt/ZAM/OPs/unary-exprs.op b/src/script_opt/ZAM/OPs/unary-exprs.op new file mode 100644 index 0000000000..a44cbe937d --- /dev/null +++ b/src/script_opt/ZAM/OPs/unary-exprs.op @@ -0,0 +1,181 @@ +# Operations corresponding to unary expressions. + +# Direct assignment of an existing value. +assign-op Assign +class V + +# The same, but where the assignment target (LHS) is a record field. +assign-op Field-LHS-Assign +op1-read +class F + +unary-expr-op Clone +no-const +op-type X +set-type $$ +set-type2 $1 +eval auto v = $1.ToVal(Z_TYPE2)->Clone(); + AssignTarget($$, BuildVal(v, Z_TYPE)) + +unary-expr-op Size +no-const +op-type I U D A N S T V * +explicit-result-type +set-type $$ +set-type2 $1 +eval-type I $$ = ZVal(zeek_int_t($1 < 0 ? -$1 : $1)); +eval-type U $$ = ZVal($1); +eval-type D $$ = ZVal($1 < 0 ? -$1 : $1); +eval-type A $$ = ZVal(zeek_uint_t($1->AsAddr().GetFamily() == IPv4 ? 32 : 128)); +eval-type N $$ = ZVal(pow(2.0, double(128 - $1->AsSubNet().LengthIPv6()))); +eval-type S $$ = ZVal(zeek_uint_t($1->Len())); +eval-type T $$ = ZVal(zeek_uint_t($1->Size())); +eval-type V $$ = ZVal(zeek_uint_t($1->Size())); +eval auto v = $1.ToVal(Z_TYPE2)->SizeVal(); + $$ = BuildVal(v, Z_TYPE); + +unary-expr-op Not +op-type I +eval ! $1 + +unary-expr-op Complement +op-type U +eval ~ $1 + +unary-expr-op Positive +op-type I U D +vector +eval $1 + +unary-expr-op Negate +op-type I U D +vector +eval -$1 + +op IncrI +op1-read-write +class V +op-types I +eval ++$$; + +op IncrU +op1-read-write +class V +op-types U +eval ++$$; + +op DecrI +op1-read-write +class V +op-types I +eval --$$; + +op DecrU +op1-read-write +class V +op-types U +eval auto& u = $$; + if ( u == 0 ) + WARN("count underflow"); + --u; + +unary-op AppendTo +# Note, even though it feels like appending both reads and modifies +# its first operand, for our purposes it just reads it (to get the +# aggregate), and then modifies its *content* but not the operand's +# value itself. +op1-read +set-type $1 +eval auto vv = $1.AsVector(); + if ( vv->Size() == 0 ) + /* Use the slightly more expensive Assign(), since it + * knows how to deal with empty vectors that do not yet + * have concrete types. + */ + vv->Assign(0, $2.ToVal(Z_TYPE)); + else + { + vv->RawVec().push_back(CopyVal($2)); + vv->Modified(); + } + +# For vectors-of-any, we always go through the Assign() interface because +# it's needed for tracking the potentially differing types. +unary-op AppendToAnyVec +op1-read +set-type $1 +eval auto vv = $1.AsVector(); + vv->Assign(vv->Size(), $2.ToVal(Z_TYPE)); + +internal-op AddPatternToField +classes VVi VCi +op1-read +op-types R P I +eval auto r = $$; + auto fpat = r->GetField($2)->AsPatternVal(); + if ( fpat ) + { + $1->AddTo(fpat, false); + r->Modified(); + } + else + ERROR(util::fmt("field value missing: $%s", r->GetType()->AsRecordType()->FieldName($2))); + +unary-op ExtendPattern +op1-read +eval $1.AsPattern()->AddTo($$.AsPattern(), false); + +unary-op AddVecToVec +op1-read +eval if ( ! $1.AsVector()->AddTo($$.AsVector(), false) ) + ERROR("incompatible vector element assignment"); + +unary-op AddTableToTable +op1-read +eval auto t = $$.AsTable(); + auto v = $1.AsTable(); + if ( v->Size() > 0 ) + { + v->AddTo(t, false); + t->Modified(); + } + +unary-op RemoveTableFromTable +op1-read +eval auto t = $$.AsTable(); + auto v = $1.AsTable(); + if ( v->Size() > 0 ) + { + v->RemoveFrom(t); + t->Modified(); + } + +unary-expr-op Cast +op-type X +set-type $$ +set-type2 $1 +eval EvalCast($$, $1.ToVal(Z_TYPE2)) + +macro EvalCast(lhs, rhs) + std::string error; + auto res = cast_value(rhs, Z_TYPE, error); + if ( res ) + AssignTarget(lhs, BuildVal(res, Z_TYPE)) + else + ERROR(error.c_str()); + +# Cast an "any" type to the given type. Only needed for type-based switch +# statements. +internal-op Cast-Any +class VV +op-types X a +eval ValPtr rhs = {NewRef{}, $1}; + EvalCast($$, rhs) + +direct-unary-op Is Is + +internal-op Is +class VV +op-types I X +eval auto rhs = $1.ToVal(Z_TYPE2).get(); + $$ = can_cast_value_to_type(rhs, Z_TYPE.get()); From a1185ee6bb374c038daa5ff5e3940c5c2f22e611 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 17:59:50 +0100 Subject: [PATCH 18/33] add tracking of control flow information --- src/script_opt/ZAM/AM-Opt.cc | 75 +++++++++++++++++++++++++++++---- src/script_opt/ZAM/Branches.cc | 7 ++- src/script_opt/ZAM/Compile.h | 16 ++++--- src/script_opt/ZAM/Low-Level.cc | 18 +++++++- src/script_opt/ZAM/Stmt.cc | 33 ++++++++++++--- src/script_opt/ZAM/ZInst.h | 19 ++++++++- 6 files changed, 144 insertions(+), 24 deletions(-) diff --git a/src/script_opt/ZAM/AM-Opt.cc b/src/script_opt/ZAM/AM-Opt.cc index 68f8117f1d..c21fd6e77c 100644 --- a/src/script_opt/ZAM/AM-Opt.cc +++ b/src/script_opt/ZAM/AM-Opt.cc @@ -962,16 +962,75 @@ void ZAMCompiler::KillInst(zeek_uint_t i) { } } - if ( num_labels == 0 ) - // No labels to propagate. - return; + ZInstI* succ = nullptr; - for ( auto j = i + 1; j < insts1.size(); ++j ) { - auto succ = insts1[j]; - if ( succ->live ) { - succ->num_labels += num_labels; - break; + if ( num_labels > 0 ) { + for ( auto j = i + 1; j < insts1.size(); ++j ) { + if ( insts1[j]->live ) { + succ = insts1[j]; + break; + } } + if ( succ ) + succ->num_labels += num_labels; + } + + // Look into propagating control flow info. + if ( inst->aux && ! inst->aux->cft.empty() ) { + auto& cft = inst->aux->cft; + + if ( cft.count(CFT_ELSE) > 0 ) { + // Push forward unless this was the end of the block. + if ( cft.count(CFT_BLOCK_END) == 0 ) { + ASSERT(succ); + AddCFT(succ, CFT_ELSE); + } + else + // But if it *was* the end of the block, remove that block. + --cft[CFT_BLOCK_END]; + } + + if ( cft.count(CFT_BREAK) > 0 ) { + // ### Factor this with the following + // Propagate breaks backwards. + int j = i; + while ( --j >= 0 ) + if ( insts1[j]->live ) + break; + + ASSERT(j >= 0); + + // Make sure the CFT entry is created. + AddCFT(insts1[j], CFT_BREAK); + + auto be_cnt = cft[CFT_BREAK]; + --be_cnt; // we already did one above + insts1[j]->aux->cft[CFT_BREAK] += be_cnt; + } + + if ( cft.count(CFT_BLOCK_END) > 0 ) { + // Propagate block-ends backwards. + int j = i; + while ( --j >= 0 ) + if ( insts1[j]->live ) + break; + + ASSERT(j >= 0); + + // Make sure the CFT entry is created. + AddCFT(insts1[j], CFT_BLOCK_END); + + auto be_cnt = cft[CFT_BLOCK_END]; + --be_cnt; // we already did one above + insts1[j]->aux->cft[CFT_BLOCK_END] += be_cnt; + } + + // If's can be killed because their bodies become empty, + // break's because they just lead to their following instruction, + // and next's if they become dead code. + // However, loop's and next's should not be killed. + ASSERT(cft.count(CFT_LOOP) == 0); + ASSERT(cft.count(CFT_LOOP_COND) == 0); } } diff --git a/src/script_opt/ZAM/Branches.cc b/src/script_opt/ZAM/Branches.cc index 717a4a0afd..bbe1ecf957 100644 --- a/src/script_opt/ZAM/Branches.cc +++ b/src/script_opt/ZAM/Branches.cc @@ -10,9 +10,12 @@ namespace zeek::detail { void ZAMCompiler::PushGoTos(GoToSets& gotos) { gotos.emplace_back(); } -void ZAMCompiler::ResolveGoTos(GoToSets& gotos, const InstLabel l) { - for ( auto& gi : gotos.back() ) +void ZAMCompiler::ResolveGoTos(GoToSets& gotos, const InstLabel l, ControlFlowType cft) { + for ( auto& gi : gotos.back() ) { SetGoTo(gi, l); + if ( cft != CFT_NONE ) + AddCFT(insts1[gi.stmt_num], cft); + } gotos.pop_back(); } diff --git a/src/script_opt/ZAM/Compile.h b/src/script_opt/ZAM/Compile.h index 82b76d4ea3..9f5319351c 100644 --- a/src/script_opt/ZAM/Compile.h +++ b/src/script_opt/ZAM/Compile.h @@ -159,16 +159,17 @@ private: const ZAMStmt ValueSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c); const ZAMStmt TypeSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c); + const ZAMStmt GenSwitch(const SwitchStmt* sw, int slot, InternalTypeTag it); void PushNexts() { PushGoTos(nexts); } void PushBreaks() { PushGoTos(breaks); } void PushFallThroughs() { PushGoTos(fallthroughs); } void PushCatchReturns() { PushGoTos(catches); } - void ResolveNexts(const InstLabel l) { ResolveGoTos(nexts, l); } - void ResolveBreaks(const InstLabel l) { ResolveGoTos(breaks, l); } + void ResolveNexts(const InstLabel l) { ResolveGoTos(nexts, l, CFT_NEXT); } + void ResolveBreaks(const InstLabel l) { ResolveGoTos(breaks, l, CFT_BREAK); } void ResolveFallThroughs(const InstLabel l) { ResolveGoTos(fallthroughs, l); } - void ResolveCatchReturns(const InstLabel l) { ResolveGoTos(catches, l); } + void ResolveCatchReturns(const InstLabel l) { ResolveGoTos(catches, l, CFT_INLINED_RETURN); } const ZAMStmt LoopOverTable(const ForStmt* f, const NameExpr* val); const ZAMStmt LoopOverVector(const ForStmt* f, const NameExpr* val); @@ -229,8 +230,8 @@ private: const ZAMStmt CompileIndex(const NameExpr* n1, int n2_slot, const TypePtr& n2_type, const ListExpr* l, bool in_when); - const ZAMStmt BuildLambda(const NameExpr* n, ExprPtr le); // marker - const ZAMStmt BuildLambda(int n_slot, ExprPtr le); // marker + const ZAMStmt BuildLambda(const NameExpr* n, ExprPtr le); + const ZAMStmt BuildLambda(int n_slot, ExprPtr le); // Second argument is which instruction slot holds the branch target. const ZAMStmt GenCond(const Expr* e, int& branch_v); @@ -277,7 +278,7 @@ private: using GoToSets = std::vector; void PushGoTos(GoToSets& gotos); - void ResolveGoTos(GoToSets& gotos, const InstLabel l); + void ResolveGoTos(GoToSets& gotos, const InstLabel l, ControlFlowType cft = CFT_NONE); ZAMStmt GenGoTo(GoToSet& v); ZAMStmt GoToStub(); @@ -322,6 +323,9 @@ private: const ZAMStmt ErrorStmt(); const ZAMStmt LastInst(); + // Adds control flow information to an instruction. + void AddCFT(ZInstI* inst, ControlFlowType cft); + // Returns a handle to state associated with building // up a list of values. std::unique_ptr BuildVals(const ListExprPtr&); diff --git a/src/script_opt/ZAM/Low-Level.cc b/src/script_opt/ZAM/Low-Level.cc index 25e132339c..c03b1cbd90 100644 --- a/src/script_opt/ZAM/Low-Level.cc +++ b/src/script_opt/ZAM/Low-Level.cc @@ -20,9 +20,25 @@ bool ZAMCompiler::NullStmtOK() const { const ZAMStmt ZAMCompiler::EmptyStmt() { return ZAMStmt(insts1.size() - 1); } +const ZAMStmt ZAMCompiler::ErrorStmt() { return ZAMStmt(0); } + const ZAMStmt ZAMCompiler::LastInst() { return ZAMStmt(insts1.size() - 1); } -const ZAMStmt ZAMCompiler::ErrorStmt() { return ZAMStmt(0); } +void ZAMCompiler::AddCFT(ZInstI* inst, ControlFlowType cft) { + if ( cft == CFT_NONE ) + return; + + if ( ! inst->aux ) + inst->aux = new ZInstAux(0); + + auto cft_entry = inst->aux->cft.find(cft); + if ( cft_entry == inst->aux->cft.end() ) + inst->aux->cft[cft] = 1; + else { + ASSERT(cft == CFT_BLOCK_END || cft == CFT_BREAK); + ++cft_entry->second; + } +} std::unique_ptr ZAMCompiler::BuildVals(const ListExprPtr& l) { return std::make_unique(InternalBuildVals(l.get())); diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index f5964d5679..d163e9e3d3 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -141,13 +141,21 @@ const ZAMStmt ZAMCompiler::IfElse(const Expr* e, const Stmt* s1, const Stmt* s2) else cond_stmt = GenCond(e, branch_v); + AddCFT(insts1.back(), CFT_IF); + if ( s1 ) { auto s1_end = CompileStmt(s1); + AddCFT(insts1.back(), CFT_BLOCK_END); + if ( s2 ) { auto branch_after_s1 = GoToStub(); + auto else_start = insts1.size(); auto s2_end = CompileStmt(s2); + SetV(cond_stmt, GoToTargetBeyond(branch_after_s1), branch_v); SetGoTo(branch_after_s1, GoToTargetBeyond(s2_end)); + AddCFT(insts1[else_start], CFT_ELSE); + AddCFT(insts1.back(), CFT_BLOCK_END); return s2_end; } @@ -462,6 +470,7 @@ const ZAMStmt ZAMCompiler::GenSwitch(const SwitchStmt* sw, int slot, InternalTyp // Generate each of the cases. auto cases = sw->Cases(); std::vector case_start; + int case_index = 0; PushFallThroughs(); for ( auto sw_case : *cases ) { @@ -477,8 +486,11 @@ const ZAMStmt ZAMCompiler::GenSwitch(const SwitchStmt* sw, int slot, InternalTyp ResolveBreaks(sw_end); int def_ind = sw->DefaultCaseIndex(); - if ( def_ind >= 0 ) - SetV3(sw_head, case_start[def_ind]); + if ( def_ind >= 0 ) { + auto def = case_start[def_ind]; + SetV3(sw_head, def); + AddCFT(def, CFT_DEFAULT); + } else SetV3(sw_head, sw_end); @@ -651,18 +663,23 @@ const ZAMStmt ZAMCompiler::While(const Stmt* cond_stmt, const Expr* cond, const if ( cond->Tag() == EXPR_NAME ) { auto n = cond->AsNameExpr(); - cond_IF = AddInst(ZInstI(OP_IF_VV, FrameSlot(n), 0)); + cond_IF = AddInst(ZInstI(OP_IF_Vb, FrameSlot(n), 0)); branch_v = 2; } else cond_IF = GenCond(cond, branch_v); + AddCFT(insts1[head.stmt_num], CFT_LOOP); + AddCFT(insts1[cond_IF.stmt_num], CFT_LOOP_COND); + PushNexts(); PushBreaks(); if ( body && body->Tag() != STMT_NULL ) (void)CompileStmt(body); + AddCFT(insts1.back(), CFT_BLOCK_END); + auto tail = GoTo(GoToTarget(head)); auto beyond_tail = GoToTargetBeyond(tail); @@ -682,17 +699,21 @@ const ZAMStmt ZAMCompiler::CompileFor(const ForStmt* f) { PushNexts(); PushBreaks(); + ZAMStmt z; + if ( et == TYPE_TABLE ) - return LoopOverTable(f, val); + z = LoopOverTable(f, val); else if ( et == TYPE_VECTOR ) - return LoopOverVector(f, val); + z = LoopOverVector(f, val); else if ( et == TYPE_STRING ) - return LoopOverString(f, e); + z = LoopOverString(f, e); else reporter->InternalError("bad \"for\" loop-over value when compiling"); + + return z; } const ZAMStmt ZAMCompiler::LoopOverTable(const ForStmt* f, const NameExpr* val) { diff --git a/src/script_opt/ZAM/ZInst.h b/src/script_opt/ZAM/ZInst.h index 3cf5828f3d..23c02b5a7d 100644 --- a/src/script_opt/ZAM/ZInst.h +++ b/src/script_opt/ZAM/ZInst.h @@ -385,6 +385,20 @@ private: bool is_managed = false; }; +enum ControlFlowType { + CFT_IF, + CFT_BLOCK_END, + CFT_ELSE, + CFT_LOOP, + CFT_LOOP_COND, + CFT_NEXT, + CFT_BREAK, + CFT_DEFAULT, + CFT_INLINED_RETURN, + + CFT_NONE, +}; + // Auxiliary information, used when the fixed ZInst layout lacks // sufficient expressiveness to represent all of the elements that // an instruction needs. @@ -500,11 +514,14 @@ public: // Whether we know that we're calling a BiF. bool is_BiF_call = false; + // Associated control flow information. + std::map cft; + // Used for referring to events. EventHandler* event_handler = nullptr; // Used for things like constructors. - AttributesPtr attrs = nullptr; + AttributesPtr attrs; // Whether the instruction can lead to globals/captures changing. // Currently only needed by the optimizer, but convenient to From 65e713e6eaafcedf8877ecb107e2490c324d1235 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 18:06:58 +0100 Subject: [PATCH 19/33] rework ZAM compilation of type switches to leverage value switches --- src/script_opt/ZAM/Stmt.cc | 94 ++++++++++++++------------------------ 1 file changed, 34 insertions(+), 60 deletions(-) diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index d163e9e3d3..375204887b 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -538,10 +538,18 @@ const ZAMStmt ZAMCompiler::GenSwitch(const SwitchStmt* sw, int slot, InternalTyp } } + // For type switches, we map them to consecutive numbers, and then use + // a integer-valued switch on those. + int tm_ctr = 0; + for ( auto [_, index] : *sw->TypeMap() ) { + auto case_body_start = case_start[index]; + new_int_cases[tm_ctr++] = case_body_start; + } + // Now add the jump table to the set we're keeping for the // corresponding type. - switch ( t->InternalType() ) { + switch ( it ) { case TYPE_INTERNAL_INT: int_casesI.push_back(new_int_cases); break; case TYPE_INTERNAL_UNSIGNED: uint_casesI.push_back(new_uint_cases); break; @@ -555,86 +563,52 @@ const ZAMStmt ZAMCompiler::GenSwitch(const SwitchStmt* sw, int slot, InternalTyp default: reporter->InternalError("bad switch type"); } + AddCFT(insts1[body_end.stmt_num], CFT_BLOCK_END); + return body_end; } const ZAMStmt ZAMCompiler::TypeSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c) { auto cases = sw->Cases(); auto type_map = sw->TypeMap(); - - auto body_end = EmptyStmt(); - auto tmp = NewSlot(true); // true since we know "any" is managed int slot = v ? FrameSlot(v) : 0; - if ( v && v->GetType()->Tag() != TYPE_ANY ) { - auto z = ZInstI(OP_ASSIGN_ANY_VV, tmp, slot); - body_end = AddInst(z); - slot = tmp; + if ( v ) { + if ( v->GetType()->Tag() != TYPE_ANY ) { + auto z = ZInstI(OP_ASSIGN_ANY_VV, tmp, slot); + AddInst(z); + slot = tmp; + } } - if ( c ) { + else { + ASSERT(c); auto z = ZInstI(OP_ASSIGN_ANY_VC, tmp, c); - body_end = AddInst(z); + AddInst(z); slot = tmp; } - int def_ind = sw->DefaultCaseIndex(); - ZAMStmt def_succ(0); // successor to default, if any - bool saw_def_succ = false; // whether def_succ is meaningful + int ntypes = type_map->size(); + auto aux = new ZInstAux(ntypes); - PushFallThroughs(); - for ( auto& i : *type_map ) { - auto id = i.first; - auto type = id->GetType(); + for ( auto i = 0; i < type_map->size(); ++i ) { + auto& tm = (*type_map)[i]; + auto id_i = tm.first; + auto id_case = tm.second; - ZInstI z; - - z = ZInstI(OP_BRANCH_IF_NOT_TYPE_VV, slot, 0); - z.SetType(type); - auto case_test = AddInst(z); - - // Type cases that don't use "as" create a placeholder - // ID with a null name. - if ( id->Name() ) { - int id_slot = Frame1Slot(id, OP_CAST_ANY_VV); - z = ZInstI(OP_CAST_ANY_VV, id_slot, slot); - z.SetType(type); - body_end = AddInst(z); - } - else - body_end = case_test; - - ResolveFallThroughs(GoToTargetBeyond(body_end)); - body_end = CompileStmt((*cases)[i.second]->Body()); - SetV2(case_test, GoToTargetBeyond(body_end)); - - if ( def_ind >= 0 && i.second == def_ind + 1 ) { - def_succ = case_test; - saw_def_succ = true; - } - - PushFallThroughs(); + auto slot = id_i->Name() ? FrameSlot(id_i) : -1; + aux->Add(i, slot, id_i->GetType()); } - ResolveFallThroughs(GoToTargetBeyond(body_end)); + auto match_tmp = NewSlot(false); + auto z = ZInstI(OP_DETERMINE_TYPE_MATCH_VV, match_tmp, slot); + z.op_type = OP_VV; + z.aux = aux; + AddInst(z); - if ( def_ind >= 0 ) { - PushFallThroughs(); - - body_end = CompileStmt((*sw->Cases())[def_ind]->Body()); - - // Now resolve any fallthrough's in the default. - if ( saw_def_succ ) - ResolveFallThroughs(GoToTargetBeyond(def_succ)); - else - ResolveFallThroughs(GoToTargetBeyond(body_end)); - } - - ResolveBreaks(GoToTargetBeyond(body_end)); - - return body_end; + return GenSwitch(sw, match_tmp, TYPE_INTERNAL_INT); } const ZAMStmt ZAMCompiler::CompileWhile(const WhileStmt* ws) { From 63f76c7f84807ef96faea15eb984bb5b30e71a8e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Aug 2024 21:11:34 +0100 Subject: [PATCH 20/33] exposing some functionality for greater flexibility in structuring run-time execution --- src/script_opt/ZAM/Support.cc | 38 +++++++++++++++++++++-- src/script_opt/ZAM/Support.h | 4 +++ src/script_opt/ZAM/ZBody.cc | 58 ++++------------------------------- src/script_opt/ZAM/ZBody.h | 44 +++++++++++++++++++++++--- 4 files changed, 84 insertions(+), 60 deletions(-) diff --git a/src/script_opt/ZAM/Support.cc b/src/script_opt/ZAM/Support.cc index 7e0d9172be..61efb9265d 100644 --- a/src/script_opt/ZAM/Support.cc +++ b/src/script_opt/ZAM/Support.cc @@ -16,7 +16,7 @@ namespace ZAM { std::string curr_func; std::shared_ptr curr_loc; TypePtr log_ID_enum_type; -TypePtr any_base_type; +TypePtr any_base_type = base_type(TYPE_ANY); } // namespace ZAM bool ZAM_error = false; @@ -35,6 +35,32 @@ bool is_ZAM_compilable(const ProfileFunc* pf, const char** reason) { bool IsAny(const Type* t) { return t->Tag() == TYPE_ANY; } +bool CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, const std::shared_ptr& loc) { + if ( IsAny(expected_type) ) + return true; + + if ( ! same_type(any_type, expected_type, false, false) ) { + auto at = any_type->Tag(); + auto et = expected_type->Tag(); + + if ( at == TYPE_RECORD && et == TYPE_RECORD ) { + auto at_r = any_type->AsRecordType(); + auto et_r = expected_type->AsRecordType(); + + if ( record_promotion_compatible(et_r, at_r) ) + return true; + } + + char buf[8192]; + snprintf(buf, sizeof buf, "run-time type clash (%s/%s)", type_name(at), type_name(et)); + + reporter->RuntimeError(loc->Loc(), "%s", buf); + return false; + } + + return true; +} + StringVal* ZAM_to_lower(const StringVal* sv) { auto bs = sv->AsString(); const u_char* s = bs->Bytes(); @@ -82,7 +108,10 @@ void ZAM_run_time_error(const char* msg) { } void ZAM_run_time_error(std::shared_ptr loc, const char* msg) { - reporter->RuntimeError(loc->Loc(), "%s", msg); + if ( loc ) + reporter->RuntimeError(loc->Loc(), "%s", msg); + else + fprintf(stderr, ": %s\n", msg); ZAM_error = true; } @@ -92,7 +121,10 @@ void ZAM_run_time_error(const char* msg, const Obj* o) { } void ZAM_run_time_error(std::shared_ptr loc, const char* msg, const Obj* o) { - reporter->RuntimeError(loc->Loc(), "%s (%s)", msg, obj_desc(o).c_str()); + if ( loc ) + reporter->RuntimeError(loc->Loc(), "%s (%s)", msg, obj_desc(o).c_str()); + else + ZAM_run_time_error(msg, o); ZAM_error = true; } diff --git a/src/script_opt/ZAM/Support.h b/src/script_opt/ZAM/Support.h index 6e01b7c611..96777b26f6 100644 --- a/src/script_opt/ZAM/Support.h +++ b/src/script_opt/ZAM/Support.h @@ -42,6 +42,10 @@ extern bool IsAny(const Type* t); inline bool IsAny(const TypePtr& t) { return IsAny(t.get()); } inline bool IsAny(const Expr* e) { return IsAny(e->GetType()); } +// Run-time checking for "any" type being consistent with +// expected typed. Returns true if the type match is okay. +extern bool CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, const std::shared_ptr& loc); + extern void ZAM_run_time_error(const char* msg); extern void ZAM_run_time_error(std::shared_ptr loc, const char* msg); extern void ZAM_run_time_error(std::shared_ptr loc, const char* msg, const Obj* o); diff --git a/src/script_opt/ZAM/ZBody.cc b/src/script_opt/ZAM/ZBody.cc index 80a1039c5d..126c01d55f 100644 --- a/src/script_opt/ZAM/ZBody.cc +++ b/src/script_opt/ZAM/ZBody.cc @@ -14,27 +14,6 @@ #include "zeek/script_opt/ZAM/Compile.h" #include "zeek/session/Manager.h" -// Needed for managing the corresponding values. -#include "zeek/File.h" -#include "zeek/Func.h" -#include "zeek/OpaqueVal.h" - -// Just needed for BiFs. -#include "zeek/analyzer/Manager.h" -#include "zeek/analyzer/protocol/conn-size/ConnSize.h" -#include "zeek/broker/Manager.h" -#include "zeek/file_analysis/Manager.h" -#include "zeek/file_analysis/file_analysis.bif.h" -#include "zeek/logging/Manager.h" -#include "zeek/packet_analysis/Manager.h" -#include "zeek/packet_analysis/protocol/gtpv1/GTPv1.h" -#include "zeek/packet_analysis/protocol/teredo/Teredo.h" - -#include "zeek.bif.func_h" - -// For reading_live and reading_traces -#include "zeek/RunState.h" - namespace zeek::detail { static double CPU_prof_overhead = 0.0; @@ -165,7 +144,7 @@ void report_ZOP_profile() { // assigned value was missing (which we can only tell for managed types), // true otherwise. -static bool copy_vec_elem(VectorVal* vv, zeek_uint_t ind, ZVal zv, const TypePtr& t) { +bool copy_vec_elem(VectorVal* vv, zeek_uint_t ind, ZVal zv, const TypePtr& t) { if ( vv->Size() <= ind ) vv->Resize(ind + 1); @@ -200,7 +179,7 @@ static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1, const VectorVal* v2, con // Vector coercion. #define VEC_COERCE(tag, lhs_type, cast, rhs_accessor, ov_check, ov_err) \ - static VectorVal* vec_coerce_##tag(VectorVal* vec, const ZInst& z) { \ + VectorVal* vec_coerce_##tag(VectorVal* vec, std::shared_ptr z_loc) { \ auto& v = vec->RawVec(); \ auto yt = make_intrusive(base_type(lhs_type)); \ auto res_zv = new VectorVal(yt); \ @@ -214,7 +193,7 @@ static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1, const VectorVal* v2, con std::string err = "overflow promoting from "; \ err += ov_err; \ err += " arithmetic value"; \ - ZAM_run_time_error(z.loc, err.c_str()); \ + ZAM_run_time_error(z_loc, err.c_str()); \ res[i] = std::nullopt; \ } \ else \ @@ -272,7 +251,6 @@ ZBody::ZBody(std::string _func_name, const ZAMCompiler* zc) : Stmt(STMT_ZAM) { auto log_ID_type = lookup_ID("ID", "Log"); ASSERT(log_ID_type); ZAM::log_ID_enum_type = log_ID_type->GetType(); - ZAM::any_base_type = base_type(TYPE_ANY); ZVal::SetZValNilStatusAddr(&ZAM_error); did_init = false; } @@ -335,6 +313,9 @@ ValPtr ZBody::Exec(Frame* f, StmtFlowType& flow) { // Type of the return value. If nil, then we don't have a value. TypePtr ret_type; + // ListVal corresponding to INDEX_LIST. + static auto zam_index_val_list = make_intrusive(TYPE_ANY); + #ifdef ENABLE_ZAM_PROFILE static bool profiling_active = analysis_options.profile_ZAM; static int sampling_rate = analysis_options.profile_sampling_rate; @@ -529,33 +510,6 @@ void ZBody::ReportProfile(ProfMap& pm, const ProfVec& pv, const std::string& pre } } -bool ZBody::CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, - const std::shared_ptr& loc) const { - if ( IsAny(expected_type) ) - return true; - - if ( ! same_type(any_type, expected_type, false, false) ) { - auto at = any_type->Tag(); - auto et = expected_type->Tag(); - - if ( at == TYPE_RECORD && et == TYPE_RECORD ) { - auto at_r = any_type->AsRecordType(); - auto et_r = expected_type->AsRecordType(); - - if ( record_promotion_compatible(et_r, at_r) ) - return true; - } - - char buf[8192]; - snprintf(buf, sizeof buf, "run-time type clash (%s/%s)", type_name(at), type_name(et)); - - reporter->RuntimeError(loc->Loc(), "%s", buf); - return false; - } - - return true; -} - void ZBody::Dump() const { printf("Frame:\n"); diff --git a/src/script_opt/ZAM/ZBody.h b/src/script_opt/ZAM/ZBody.h index db4c5beb7b..794ee36467 100644 --- a/src/script_opt/ZAM/ZBody.h +++ b/src/script_opt/ZAM/ZBody.h @@ -8,6 +8,31 @@ #include "zeek/script_opt/ZAM/Profile.h" #include "zeek/script_opt/ZAM/Support.h" +//////////////////////////////////////////////////////////////////////// +// Headers needed for run-time execution: + +// Needed for managing the corresponding values. +#include "zeek/File.h" +#include "zeek/Func.h" +#include "zeek/OpaqueVal.h" + +// Just needed for BiFs. +#include "zeek/analyzer/Manager.h" +#include "zeek/analyzer/protocol/conn-size/ConnSize.h" +#include "zeek/broker/Manager.h" +#include "zeek/file_analysis/Manager.h" +#include "zeek/file_analysis/file_analysis.bif.h" +#include "zeek/logging/Manager.h" +#include "zeek/packet_analysis/Manager.h" +#include "zeek/packet_analysis/protocol/gtpv1/GTPv1.h" +#include "zeek/packet_analysis/protocol/teredo/Teredo.h" + +#include "zeek.bif.func_h" + +// For reading_live and reading_traces +#include "zeek/RunState.h" +//////////////////////////////////////////////////////////////////////// + namespace zeek::detail { // Static information about globals used in a function. @@ -63,6 +88,11 @@ public: const std::string& FuncName() const { return func_name; } private: + friend class CPPCompile; + + auto Instructions() const { return insts; } + auto NumInsts() const { return end_pc; } + // Initializes profiling information, if needed. void InitProfile(); std::shared_ptr BuildProfVec() const; @@ -70,11 +100,6 @@ private: void ReportProfile(ProfMap& pm, const ProfVec& pv, const std::string& prefix, std::set caller_modules) const; - // Run-time checking for "any" type being consistent with - // expected typed. Returns true if the type match is okay. - bool CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, - const std::shared_ptr& loc) const; - StmtPtr Duplicate() override { return {NewRef{}, this}; } void StmtDescribe(ODesc* d) const override; @@ -128,4 +153,13 @@ private: ProfVec* curr_prof_vec; }; +extern bool copy_vec_elem(VectorVal* vv, zeek_uint_t ind, ZVal zv, const TypePtr& t); + +extern VectorVal* vec_coerce_DI(VectorVal* vec, std::shared_ptr z_loc); +extern VectorVal* vec_coerce_DU(VectorVal* vec, std::shared_ptr z_loc); +extern VectorVal* vec_coerce_ID(VectorVal* vec, std::shared_ptr z_loc); +extern VectorVal* vec_coerce_IU(VectorVal* vec, std::shared_ptr z_loc); +extern VectorVal* vec_coerce_UD(VectorVal* vec, std::shared_ptr z_loc); +extern VectorVal* vec_coerce_UI(VectorVal* vec, std::shared_ptr z_loc); + } // namespace zeek::detail From db2244827081ed973918186d45be43b39ba0246f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 6 Aug 2024 08:25:58 +0100 Subject: [PATCH 21/33] internal option to suppress control-flow optimization --- src/script_opt/ScriptOpt.cc | 1 + src/script_opt/ScriptOpt.h | 3 +++ src/script_opt/ZAM/AM-Opt.cc | 6 ++++++ 3 files changed, 10 insertions(+) diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 47f9fddf24..4a789134e8 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -271,6 +271,7 @@ static void init_options() { check_env_opt("ZEEK_REPORT_UNCOMPILABLE", analysis_options.report_uncompilable); check_env_opt("ZEEK_ZAM_CODE", analysis_options.gen_ZAM_code); check_env_opt("ZEEK_NO_ZAM_OPT", analysis_options.no_ZAM_opt); + check_env_opt("ZEEK_NO_ZAM_CONTROL_FLOW_OPT", analysis_options.no_ZAM_control_flow_opt); check_env_opt("ZEEK_DUMP_ZAM", analysis_options.dump_ZAM); check_env_opt("ZEEK_PROFILE", analysis_options.profile_ZAM); diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 1f58c99a9a..4854936073 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -72,6 +72,9 @@ struct AnalyOpt { // Deactivate the low-level ZAM optimizer. bool no_ZAM_opt = false; + // Deactivate ZAM optimization of control flow. + bool no_ZAM_control_flow_opt = false; + // Produce a profile of ZAM execution. bool profile_ZAM = false; diff --git a/src/script_opt/ZAM/AM-Opt.cc b/src/script_opt/ZAM/AM-Opt.cc index c21fd6e77c..f242affccc 100644 --- a/src/script_opt/ZAM/AM-Opt.cc +++ b/src/script_opt/ZAM/AM-Opt.cc @@ -144,6 +144,9 @@ bool ZAMCompiler::RemoveDeadCode() { if ( ! i0->live ) continue; + if ( analysis_options.no_ZAM_control_flow_opt ) + continue; + auto i1 = NextLiveInst(i0); // Look for degenerate branches. @@ -181,6 +184,9 @@ bool ZAMCompiler::RemoveDeadCode() { } bool ZAMCompiler::CollapseGoTos() { + if ( analysis_options.no_ZAM_control_flow_opt ) + return false; + bool did_change = false; for ( auto& i0 : insts1 ) { From 1457099df3a569de4e0bceee7ce1e99b40929a8d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 6 Aug 2024 08:29:16 +0100 Subject: [PATCH 22/33] "-O validate-ZAM" option to validate generated ZAM instructions --- src/CMakeLists.txt | 1 + src/Options.cc | 9 ++- src/script_opt/ScriptOpt.cc | 5 ++ src/script_opt/ScriptOpt.h | 9 +++ src/script_opt/ZAM/README.md | 1 + src/script_opt/ZAM/Validate.cc | 110 +++++++++++++++++++++++++++++++++ src/zeek-setup.cc | 4 +- 7 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 src/script_opt/ZAM/Validate.cc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5a623745f6..fbcfa6b672 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -446,6 +446,7 @@ set(MAIN_SRCS script_opt/ZAM/Profile.cc script_opt/ZAM/Stmt.cc script_opt/ZAM/Support.cc + script_opt/ZAM/Validate.cc script_opt/ZAM/Vars.cc script_opt/ZAM/ZBody.cc script_opt/ZAM/ZInst.cc diff --git a/src/Options.cc b/src/Options.cc index 0c15fbe074..c0c6ff3d4e 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -194,6 +194,7 @@ static void print_analysis_help() { fprintf(stderr, " optimize-AST optimize the (transformed) AST; implies xform\n"); fprintf(stderr, " profile-ZAM generate to zprof.out a ZAM execution profile; implies -O ZAM\n"); fprintf(stderr, " report-recursive report on recursive functions and exit\n"); + fprintf(stderr, " validate-ZAM perform internal assessment of synthesized ZAM instructions and exit\n"); fprintf(stderr, " xform transform scripts to \"reduced\" form\n"); fprintf(stderr, "\n--optimize options when generating C++:\n"); @@ -220,14 +221,14 @@ static void set_analysis_option(const char* opt, Options& opts) { exit(0); } - if ( util::streq(opt, "dump-uds") ) + if ( util::streq(opt, "allow-cond") ) + a_o.allow_cond = true; + else if ( util::streq(opt, "dump-uds") ) a_o.activate = a_o.dump_uds = true; else if ( util::streq(opt, "dump-xform") ) a_o.activate = a_o.dump_xform = true; else if ( util::streq(opt, "dump-ZAM") ) a_o.activate = a_o.dump_ZAM = true; - else if ( util::streq(opt, "allow-cond") ) - a_o.allow_cond = true; else if ( util::streq(opt, "gen-C++") ) a_o.gen_CPP = true; else if ( util::streq(opt, "gen-standalone-C++") ) @@ -254,6 +255,8 @@ static void set_analysis_option(const char* opt, Options& opts) { a_o.report_uncompilable = true; else if ( util::streq(opt, "use-C++") ) a_o.use_CPP = true; + else if ( util::streq(opt, "validate-ZAM") ) + a_o.validate_ZAM = true; else if ( util::streq(opt, "xform") ) a_o.activate = true; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 4a789134e8..7013eecbb9 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -558,6 +558,11 @@ void clear_script_analysis() { void analyze_scripts(bool no_unused_warnings) { init_options(); + if ( analysis_options.validate_ZAM ) { + validate_ZAM_insts(); + return; + } + // Any standalone compiled scripts have already been instantiated // at this point, but may require post-loading-of-scripts finalization. for ( auto cb : standalone_finalizations ) diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index 4854936073..3ca64036e5 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -61,6 +61,10 @@ struct AnalyOpt { // recursive, and exit. Only germane if running the inliner. bool report_recursive = false; + // If true, assess the instructions generated from ZAM templates + // for validity, and exit. + bool validate_ZAM = false; + // If true, generate ZAM code for applicable function bodies, // activating all optimizations. bool gen_ZAM = false; @@ -244,6 +248,11 @@ extern bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body); // suppressed by the flag) and optimization. extern void analyze_scripts(bool no_unused_warnings); +// Conduct internal validation of ZAM instructions. Upon success, generates +// a terse report to stdout. Exits with an internal error if a problem is +// encountered. +extern void validate_ZAM_insts(); + // Called when all script processing is complete and we can discard // unused ASTs and associated state. extern void clear_script_analysis(); diff --git a/src/script_opt/ZAM/README.md b/src/script_opt/ZAM/README.md index dc9c62da68..90afbdbf09 100644 --- a/src/script_opt/ZAM/README.md +++ b/src/script_opt/ZAM/README.md @@ -100,6 +100,7 @@ issues: |`profile-ZAM` | Generate to "zprof.out" a ZAM execution profile. (Requires configuring with `--enable-ZAM-profiling` or `--enable-debug`.)| |`report-recursive` | Report on recursive functions and exit.| |`report-uncompilable` | Report on uncompilable functions and exit. For ZAM, all functions should be compilable.| +|`validate-ZAM` | Perform internal validation of ZAM instructions and exit.| |`xform` | Transform scripts to "reduced" form.| diff --git a/src/script_opt/ZAM/Validate.cc b/src/script_opt/ZAM/Validate.cc new file mode 100644 index 0000000000..9540964335 --- /dev/null +++ b/src/script_opt/ZAM/Validate.cc @@ -0,0 +1,110 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include + +#include "zeek/script_opt/ZAM/ZBody.h" +#include "zeek/script_opt/ZAM/ZOp.h" + +using std::string; + +namespace zeek::detail { + +std::unordered_map zam_inst_desc = { + +#include "ZAM-Desc.h" + +}; + +// While the following has commonalities that could be factored out, +// for now we keep this form because it provides flexibility for +// accommodating other forms of accessors. +static std::map type_pats = { + {'A', "Addr"}, {'a', "Any"}, {'D', "Double"}, {'F', "Func"}, {'I', "Int"}, {'L', "List"}, {'N', "SubNet"}, + {'P', "Pattern"}, {'R', "Record"}, {'S', "String"}, {'T', "Table"}, {'t', "Type"}, {'U', "Count"}, {'V', "Vector"}, +}; + +static int num_valid = 0; +static int num_tested = 0; +static int num_skipped = 0; + +void analyze_ZAM_inst(const char* op_name, const ZAMInstDesc& zid) { + auto& oc = zid.op_class; + auto& ot = zid.op_types; + auto& eval = zid.op_eval; + + bool have_ot = ! ot.empty(); + + if ( have_ot && oc.size() != ot.size() ) + reporter->InternalError("%s: instruction class/types mismatch (%s/%s)", op_name, oc.c_str(), ot.c_str()); + + int nslot = 0; + + for ( size_t i = 0; i < oc.size(); ++i ) { + auto oc_i = oc[i]; + + string op; + + switch ( oc_i ) { + case 'V': + case 'R': op = "frame\\[z\\.v" + std::to_string(++nslot) + "\\]"; break; + + case 'b': + case 'f': + case 'g': + case 's': + case 'i': op = "z\\.v" + std::to_string(++nslot); break; + + case 'C': op = "z\\.c"; break; + + default: + if ( have_ot && ot[i] != 'X' ) + reporter->InternalError("instruction types mismatch: %s (%c)", op_name, oc_i); + } + + auto match_pat = op; + if ( have_ot ) { + auto ot_i = ot[i]; + + bool bare_int = std::string("bfgis").find(oc_i) != std::string::npos; + + if ( ot_i == 'X' || bare_int ) { + if ( ot_i == 'X' && bare_int ) + reporter->InternalError("empty instruction type for '%c' class element: %s", oc_i, op_name); + + if ( ! std::regex_search(eval, std::regex(op)) ) + reporter->InternalError("%s: operand %s not found", op_name, op.c_str()); + + ++num_skipped; + continue; + } + + auto tp = type_pats.find(ot_i); + if ( tp == type_pats.end() ) + reporter->InternalError("%s: instruction type %c not found", op_name, ot_i); + match_pat += ".As" + tp->second + "(Ref)?\\(\\)"; + ++num_tested; + } + + if ( ! std::regex_search(eval, std::regex(match_pat)) ) + reporter->InternalError("%s: did not find /%s/ in %s", op_name, match_pat.c_str(), eval.c_str()); + } + ++num_valid; +} + +void validate_ZAM_insts() { + // The following primes a data structure we access. + (void)AssignmentFlavor(OP_NOP, TYPE_VOID, false); + + for ( int i = 0; i < int(OP_NOP); ++i ) { + auto zop = ZOp(i); + if ( zam_inst_desc.find(zop) == zam_inst_desc.end() && assignment_flavor.find(zop) == assignment_flavor.end() ) + reporter->InternalError("op %s missing from description", ZOP_name(zop)); + } + + for ( auto& zid : zam_inst_desc ) + analyze_ZAM_inst(ZOP_name(zid.first), zid.second); + + printf("%d valid, %d tested, %d skipped\n", num_valid, num_tested, num_skipped); +} + +} // namespace zeek::detail diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index 3f5549b78a..52f267449f 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -917,8 +917,8 @@ SetupResult setup(int argc, char** argv, Options* zopts) { analyze_scripts(options.no_unused_warnings); - if ( analysis_options.report_recursive ) { - // This option is report-and-exit. + if ( analysis_options.report_recursive || analysis_options.validate_ZAM ) { + // These options are report-and-exit. early_shutdown(); exit(0); } From 3e1b53681015acba4613ad57f1c16f9839b37360 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 6 Aug 2024 08:35:39 +0100 Subject: [PATCH 23/33] complete migration of ZAM to use only public ZVal methods --- src/ZVal.h | 1 - src/script_opt/ZAM/OPs/constructors.op | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ZVal.h b/src/ZVal.h index d7695140c0..1223c67b39 100644 --- a/src/ZVal.h +++ b/src/ZVal.h @@ -161,7 +161,6 @@ union ZVal { private: friend class RecordVal; friend class VectorVal; - friend class zeek::detail::ZBody; // Used for bool, int, enum. zeek_int_t int_val; diff --git a/src/script_opt/ZAM/OPs/constructors.op b/src/script_opt/ZAM/OPs/constructors.op index c55b81c6bc..aacd1ffd46 100644 --- a/src/script_opt/ZAM/OPs/constructors.op +++ b/src/script_opt/ZAM/OPs/constructors.op @@ -133,8 +133,8 @@ eval GenInits() ConstructRecordPost($$) macro SetUpRecFieldOps(lhs_full, rhs_full, which_lhs_map) - auto lhs = lhs_full.record_val; - auto rhs = rhs_full.record_val; + auto lhs = lhs_full.AsRecordRef(); + auto rhs = rhs_full.AsRecord(); auto& lhs_map = Z_AUX->which_lhs_map; auto& rhs_map = Z_AUX->rhs_map; auto n = rhs_map.size(); @@ -186,14 +186,14 @@ op1-read class VV eval SetUpRecFieldOps($1, $2, map) for ( size_t i = 0U; i < n; ++i ) - lhs->RawField(lhs_map[i]).int_val += rhs->RawField(rhs_map[i]).int_val; + lhs->RawField(lhs_map[i]).AsIntRef() += rhs->RawField(rhs_map[i]).AsInt(); op Rec-Add-Double-Fields op1-read class VV eval SetUpRecFieldOps($1, $2, map) for ( size_t i = 0U; i < n; ++i ) - lhs->RawField(lhs_map[i]).double_val += rhs->RawField(rhs_map[i]).double_val; + lhs->RawField(lhs_map[i]).AsDoubleRef() += rhs->RawField(rhs_map[i]).AsDouble(); op Rec-Add-Fields op1-read @@ -206,11 +206,11 @@ eval SetUpRecFieldOps($1, $2, map) auto rhs_i = rhs->RawField(rhs_map[i]); auto tag = types[i]->Tag(); if ( tag == TYPE_INT ) - lhs_i.int_val += rhs_i.int_val; + lhs_i.AsIntRef() += rhs_i.AsInt(); else if ( tag == TYPE_COUNT ) - lhs_i.uint_val += rhs_i.uint_val; + lhs_i.AsCountRef() += rhs_i.AsCount(); else - lhs_i.double_val += rhs_i.double_val; + lhs_i.AsDoubleRef() += rhs_i.AsDouble(); } # Special instruction for concretizing vectors that are fields in a From 3b6df1ab7f73dd8c49fadb9832ad8a6890821a0a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 6 Aug 2024 09:35:49 +0100 Subject: [PATCH 24/33] convert new ZAM operations to use typed operands --- src/script_opt/ZAM/OPs/constructors.op | 70 +++++++++++++++----------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/src/script_opt/ZAM/OPs/constructors.op b/src/script_opt/ZAM/OPs/constructors.op index aacd1ffd46..6220784a42 100644 --- a/src/script_opt/ZAM/OPs/constructors.op +++ b/src/script_opt/ZAM/OPs/constructors.op @@ -55,23 +55,24 @@ direct-unary-op Record-Constructor ConstructRecord direct-unary-op Rec-Construct-With-Rec ConstructRecordFromRecord macro ConstructRecordPost(lhs) - auto& r = lhs.AsRecordRef(); - Unref(r); - r = new RecordVal(cast_intrusive(Z_TYPE), std::move(init_vals)); + Unref(lhs); + lhs = new RecordVal(cast_intrusive(Z_TYPE), std::move(init_vals)); op Construct-Direct-Record class V +op-types R eval auto init_vals = Z_AUX->ToZValVec(frame); ConstructRecordPost($$) op Construct-Known-Record class V +op-types R eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); ConstructRecordPost($$) -macro AssignFromRec(lhs_full, rhs_full) +macro AssignFromRec(rhs) /* The following is defined below, for use by Rec-Assign-Fields */ - SetUpRecFieldOps(lhs_full, rhs_full, lhs_map) + SetUpRecFieldOps(lhs_map) auto is_managed = Z_AUX->is_managed; for ( size_t i = 0U; i < n; ++i ) { @@ -83,8 +84,9 @@ macro AssignFromRec(lhs_full, rhs_full) op Construct-Known-Record-From class VV +op-types R R eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); - AssignFromRec($$, $1) + AssignFromRec($1) ConstructRecordPost($$) macro DoNetworkTimeInit(slot) @@ -92,49 +94,53 @@ macro DoNetworkTimeInit(slot) op Construct-Known-Record-With-NT class Vi +op-types R I eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); DoNetworkTimeInit($1) ConstructRecordPost($$) op Construct-Known-Record-With-NT-From class VVi +op-types R R I eval auto init_vals = Z_AUX->ToZValVecWithMap(frame); DoNetworkTimeInit($2) - AssignFromRec($$, $1) + AssignFromRec($1) ConstructRecordPost($$) macro GenInits() auto init_vals = Z_AUX->ToZValVecWithMap(frame); - for ( auto& fi : *z.aux->field_inits ) - init_vals[fi.first] = fi.second->Generate(); + for ( auto& fi : *Z_AUX->field_inits ) + init_vals[fi.first] = fi.second->Generate(); op Construct-Known-Record-With-Inits class V +op-types R eval GenInits() ConstructRecordPost($$) op Construct-Known-Record-With-Inits-From class VV +op-types R R eval GenInits() - AssignFromRec($$, $1) + AssignFromRec($1) ConstructRecordPost($$) op Construct-Known-Record-With-Inits-And-NT class Vi +op-types R I eval GenInits() DoNetworkTimeInit($1) ConstructRecordPost($$) op Construct-Known-Record-With-Inits-And-NT-From class VVi +op-types R R I eval GenInits() DoNetworkTimeInit($2) - AssignFromRec($$, $1) + AssignFromRec($1) ConstructRecordPost($$) -macro SetUpRecFieldOps(lhs_full, rhs_full, which_lhs_map) - auto lhs = lhs_full.AsRecordRef(); - auto rhs = rhs_full.AsRecord(); +macro SetUpRecFieldOps(which_lhs_map) auto& lhs_map = Z_AUX->which_lhs_map; auto& rhs_map = Z_AUX->rhs_map; auto n = rhs_map.size(); @@ -142,11 +148,12 @@ macro SetUpRecFieldOps(lhs_full, rhs_full, which_lhs_map) op Rec-Assign-Fields op1-read class VV -eval SetUpRecFieldOps($1, $2, map) +op-types R R +eval SetUpRecFieldOps(map) for ( size_t i = 0U; i < n; ++i ) - lhs->RawOptField(lhs_map[i]) = rhs->RawField(rhs_map[i]); + $1->RawOptField(lhs_map[i]) = $2->RawField(rhs_map[i]); -macro DoManagedRecAssign() +macro DoManagedRecAssign(lhs, rhs) auto is_managed = Z_AUX->is_managed; for ( size_t i = 0U; i < n; ++i ) if ( is_managed[i] ) @@ -164,17 +171,19 @@ macro DoManagedRecAssign() op Rec-Assign-Fields-Managed op1-read class VV -eval SetUpRecFieldOps($1, $2, map) - DoManagedRecAssign() +op-types R R +eval SetUpRecFieldOps(map) + DoManagedRecAssign($1, $2) op Rec-Assign-Fields-All-Managed op1-read class VV -eval SetUpRecFieldOps($1, $2, map) +op-types R R +eval SetUpRecFieldOps(map) for ( size_t i = 0U; i < n; ++i ) { - auto& lhs_i = lhs->RawOptField(lhs_map[i]); - auto rhs_i = rhs->RawField(rhs_map[i]); + auto& lhs_i = $1->RawOptField(lhs_map[i]); + auto rhs_i = $2->RawField(rhs_map[i]); zeek::Ref(rhs_i.ManagedVal()); if ( lhs_i ) ZVal::DeleteManagedType(*lhs_i); @@ -184,26 +193,29 @@ eval SetUpRecFieldOps($1, $2, map) op Rec-Add-Int-Fields op1-read class VV -eval SetUpRecFieldOps($1, $2, map) +op-types R R +eval SetUpRecFieldOps(map) for ( size_t i = 0U; i < n; ++i ) - lhs->RawField(lhs_map[i]).AsIntRef() += rhs->RawField(rhs_map[i]).AsInt(); + $1->RawField(lhs_map[i]).AsIntRef() += $2->RawField(rhs_map[i]).AsInt(); op Rec-Add-Double-Fields op1-read class VV -eval SetUpRecFieldOps($1, $2, map) +op-types R R +eval SetUpRecFieldOps(map) for ( size_t i = 0U; i < n; ++i ) - lhs->RawField(lhs_map[i]).AsDoubleRef() += rhs->RawField(rhs_map[i]).AsDouble(); + $1->RawField(lhs_map[i]).AsDoubleRef() += $2->RawField(rhs_map[i]).AsDouble(); op Rec-Add-Fields op1-read class VV -eval SetUpRecFieldOps($1, $2, map) +op-types R R +eval SetUpRecFieldOps(map) auto& types = Z_AUX->types; for ( size_t i = 0U; i < n; ++i ) { - auto& lhs_i = lhs->RawField(lhs_map[i]); - auto rhs_i = rhs->RawField(rhs_map[i]); + auto& lhs_i = $1->RawField(lhs_map[i]); + auto rhs_i = $2->RawField(rhs_map[i]); auto tag = types[i]->Tag(); if ( tag == TYPE_INT ) lhs_i.AsIntRef() += rhs_i.AsInt(); From bf3cf9da489df6eb0a0b292169fa11555d9a83c7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 6 Aug 2024 09:38:56 +0100 Subject: [PATCH 25/33] BTest updates for ZAM regularization changes --- .../Baseline.zam/bifs.from_json-2/.stderr | 2 +- .../core.option-runtime-errors-2/.stderr | 2 +- .../core.option-runtime-errors-3/.stderr | 2 +- .../core.option-runtime-errors/.stderr | 2 +- .../Baseline.zam/opt.validate-ZAM/output | 2 ++ testing/btest/Baseline/bifs.sub_bytes/.stderr | 1 + testing/btest/Baseline/bifs.sub_bytes/out | 9 ++++++++ .../language.vector-type-checking/out | 1 + testing/btest/bifs/sub_bytes.zeek | 22 +++++++++++++++++++ .../btest/language/vector-type-checking.zeek | 13 +++++++++++ testing/btest/opt/validate-ZAM.zeek | 5 +++++ 11 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 testing/btest/Baseline.zam/opt.validate-ZAM/output create mode 100644 testing/btest/Baseline/bifs.sub_bytes/.stderr create mode 100644 testing/btest/Baseline/bifs.sub_bytes/out create mode 100644 testing/btest/bifs/sub_bytes.zeek create mode 100644 testing/btest/opt/validate-ZAM.zeek diff --git a/testing/btest/Baseline.zam/bifs.from_json-2/.stderr b/testing/btest/Baseline.zam/bifs.from_json-2/.stderr index 372f599e7f..1a3fceee72 100644 --- a/testing/btest/Baseline.zam/bifs.from_json-2/.stderr +++ b/testing/btest/Baseline.zam/bifs.from_json-2/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], 10, from_json_default_key_mapper)) +error in <...>/from_json.zeek, line 4: from_json() requires a type argument (from_json([], ::#0, from_json_default_key_mapper)) diff --git a/testing/btest/Baseline.zam/core.option-runtime-errors-2/.stderr b/testing/btest/Baseline.zam/core.option-runtime-errors-2/.stderr index 719b74fadc..15561eb6c9 100644 --- a/testing/btest/Baseline.zam/core.option-runtime-errors-2/.stderr +++ b/testing/btest/Baseline.zam/core.option-runtime-errors-2/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/option-runtime-errors.zeek, line 3: Incompatible type for set of ID 'A': got 'string', need 'count' (Option::set(A, hi, )) +error in <...>/option-runtime-errors.zeek, line 3: Incompatible type for set of ID 'A': got 'string', need 'count' (Option::set(A, ::#0, )) diff --git a/testing/btest/Baseline.zam/core.option-runtime-errors-3/.stderr b/testing/btest/Baseline.zam/core.option-runtime-errors-3/.stderr index f121199a5d..682f1fbe27 100644 --- a/testing/btest/Baseline.zam/core.option-runtime-errors-3/.stderr +++ b/testing/btest/Baseline.zam/core.option-runtime-errors-3/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/option-runtime-errors.zeek, line 3: ID 'A' is not an option (Option::set(A, 6, )) +error in <...>/option-runtime-errors.zeek, line 3: ID 'A' is not an option (Option::set(A, ::#0, )) diff --git a/testing/btest/Baseline.zam/core.option-runtime-errors/.stderr b/testing/btest/Baseline.zam/core.option-runtime-errors/.stderr index 7537f10f90..5e6d272614 100644 --- a/testing/btest/Baseline.zam/core.option-runtime-errors/.stderr +++ b/testing/btest/Baseline.zam/core.option-runtime-errors/.stderr @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/option-runtime-errors.zeek, line 9: Could not find ID named 'B' (Option::set(B, 6, )) +error in <...>/option-runtime-errors.zeek, line 9: Could not find ID named 'B' (Option::set(B, ::#0, )) diff --git a/testing/btest/Baseline.zam/opt.validate-ZAM/output b/testing/btest/Baseline.zam/opt.validate-ZAM/output new file mode 100644 index 0000000000..b3ebf021ca --- /dev/null +++ b/testing/btest/Baseline.zam/opt.validate-ZAM/output @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +1237 valid, 1860 tested, 426 skipped diff --git a/testing/btest/Baseline/bifs.sub_bytes/.stderr b/testing/btest/Baseline/bifs.sub_bytes/.stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/bifs.sub_bytes/.stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/bifs.sub_bytes/out b/testing/btest/Baseline/bifs.sub_bytes/out new file mode 100644 index 0000000000..6d1b207b13 --- /dev/null +++ b/testing/btest/Baseline/bifs.sub_bytes/out @@ -0,0 +1,9 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +bcde +bcde +bcde +bcde +bcde +bcde +bcde +bcde diff --git a/testing/btest/Baseline/language.vector-type-checking/out b/testing/btest/Baseline/language.vector-type-checking/out index 71eb85a096..f892af90fc 100644 --- a/testing/btest/Baseline/language.vector-type-checking/out +++ b/testing/btest/Baseline/language.vector-type-checking/out @@ -17,3 +17,4 @@ error in count and <...>/vector-type-checking.zeek, line 39: arithmetic mixed wi error in <...>/vector-type-checking.zeek, line 39 and count: type mismatch (thousand-two and count) error in <...>/vector-type-checking.zeek, line 39: inconsistent types in vector constructor (vector(thousand-two)) error in <...>/vector-type-checking.zeek, line 45: type clash in assignment (lea = vector(thousand-three)) +error in <...>/vector-type-checking.zeek, line 57: cannot compare string vectors with pattern vectors (vector(foo) == vector(<...>/)) diff --git a/testing/btest/bifs/sub_bytes.zeek b/testing/btest/bifs/sub_bytes.zeek new file mode 100644 index 0000000000..a1aa7217db --- /dev/null +++ b/testing/btest/bifs/sub_bytes.zeek @@ -0,0 +1,22 @@ +# @TEST-DOC: Test the sub_bytes() function. +# +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr + +# These tests are to ensure that script optimization gets all the permutations +# correct, for varying combinations of constant and variable arguments. + +global s = "abcdefghij"; +global a = 2; +global b = 4; + +print sub_bytes(s, a, b); +print sub_bytes(s, 2, b); +print sub_bytes(s, a, 4); +print sub_bytes(s, 2, 4); + +print sub_bytes("abcdefghij", a, b); +print sub_bytes("abcdefghij", 2, b); +print sub_bytes("abcdefghij", a, 4); +print sub_bytes("abcdefghij", 2, 4); diff --git a/testing/btest/language/vector-type-checking.zeek b/testing/btest/language/vector-type-checking.zeek index bdea76c4cd..24842efd44 100644 --- a/testing/btest/language/vector-type-checking.zeek +++ b/testing/btest/language/vector-type-checking.zeek @@ -44,3 +44,16 @@ event zeek_init() { local lea: MyVec = vector("thousand-three"); # type clash } + +# check operation that's okay as a scalar but not as a vector +event zeek_init() + { + if ( "foo" == /fo*/ ) + print "should not complain"; + } + +event zeek_init() + { + if ( vector("foo") == vector(/fo*/) ) + print "should complain"; + } diff --git a/testing/btest/opt/validate-ZAM.zeek b/testing/btest/opt/validate-ZAM.zeek new file mode 100644 index 0000000000..2640581f58 --- /dev/null +++ b/testing/btest/opt/validate-ZAM.zeek @@ -0,0 +1,5 @@ +# @TEST-DOC: ZAM maintenance script for validating synthesized operations. +# @TEST-REQUIRES: test "${ZEEK_ZAM}" == "1" +# +# @TEST-EXEC: zeek -b -O validate-ZAM %INPUT >output +# @TEST-EXEC: btest-diff output From 42bf164dc4fed4dac334385dc8834c6e523a3e48 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 6 Aug 2024 09:39:51 +0100 Subject: [PATCH 26/33] updates to typos allow-list reflecting ZAM regularization changes --- .typos.toml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.typos.toml b/.typos.toml index dfa60bd504..f8a512a8c3 100644 --- a/.typos.toml +++ b/.typos.toml @@ -9,6 +9,7 @@ extend-ignore-re = [ "Remove in v6.1.*SupressWeird", "max_repititions:.*Remove in v6.1", "mis-aliasing of", + "mis-indexing", # On purpose "\"THE NETBIOS NAM\"", # NFS stuff. @@ -20,6 +21,12 @@ extend-ignore-re = [ "ot->Tag\\(\\) == TYPE_.*", "auto.* ot =", "ot = OP_.*", + "ot\\[", + "ot.size", + "ot.empty", + "ot_i", + "ot.c_str", + "have_ot", "if \\( ot == OP_.*", "ot->Yield\\(\\)->InternalType\\(\\)", "switch \\( ot \\)", @@ -53,7 +60,7 @@ ND_REDIRECT = "ND_REDIRECT" NED_ACK = "NED_ACK" NFS3ERR_ACCES = "NFS3ERR_ACCES" NO_SEH = "NO_SEH" -OP_SWITCHS_VVV = "OP_SWITCHS_VVV" +OP_SWITCHS_Vii = "OP_SWITCHS_Vii" O_WRONLY = "O_WRONLY" RPC_NT_CALL_FAILED_DNE = "RPC_NT_CALL_FAILED_DNE" RpcAddPrintProvidor = "RpcAddPrintProvidor" From a947d9616027bd501a27f400a8435dd123ddcdc0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 8 Aug 2024 08:42:11 -0700 Subject: [PATCH 27/33] script optimization fix for complex "in" expressions in conditionals --- src/script_opt/Expr.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/script_opt/Expr.cc b/src/script_opt/Expr.cc index 5e6500ae0d..a7c76ed72a 100644 --- a/src/script_opt/Expr.cc +++ b/src/script_opt/Expr.cc @@ -115,6 +115,9 @@ bool Expr::IsReducedConditional(Reducer* c) const { return NonReduced(this); if ( op1->Tag() == EXPR_LIST ) { + if ( ! op1->IsReduced(c) ) + return NonReduced(this); + auto l1 = op1->AsListExpr(); auto& l1_e = l1->Exprs(); From 3f52cbcbc7c0e830372def0a16a4df75af125954 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 8 Aug 2024 15:30:45 -0700 Subject: [PATCH 28/33] fix for script optimization of nested switch statements --- src/script_opt/ZAM/Stmt.cc | 60 ++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index 375204887b..4753717d94 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -423,45 +423,27 @@ const ZAMStmt ZAMCompiler::ValueSwitch(const SwitchStmt* sw, const NameExpr* v, } const ZAMStmt ZAMCompiler::GenSwitch(const SwitchStmt* sw, int slot, InternalTypeTag it) { - int tbl = 0; ZOp op; switch ( it ) { - case TYPE_INTERNAL_INT: - op = OP_SWITCHI_Vii; - tbl = int_casesI.size(); - break; + case TYPE_INTERNAL_INT: op = OP_SWITCHI_Vii; break; - case TYPE_INTERNAL_UNSIGNED: - op = OP_SWITCHU_Vii; - tbl = uint_casesI.size(); - break; + case TYPE_INTERNAL_UNSIGNED: op = OP_SWITCHU_Vii; break; - case TYPE_INTERNAL_DOUBLE: - op = OP_SWITCHD_Vii; - tbl = double_casesI.size(); - break; + case TYPE_INTERNAL_DOUBLE: op = OP_SWITCHD_Vii; break; - case TYPE_INTERNAL_STRING: - op = OP_SWITCHS_Vii; - tbl = str_casesI.size(); - break; + case TYPE_INTERNAL_STRING: op = OP_SWITCHS_Vii; break; - case TYPE_INTERNAL_ADDR: - op = OP_SWITCHA_Vii; - tbl = str_casesI.size(); - break; + case TYPE_INTERNAL_ADDR: op = OP_SWITCHA_Vii; break; - case TYPE_INTERNAL_SUBNET: - op = OP_SWITCHN_Vii; - tbl = str_casesI.size(); - break; + case TYPE_INTERNAL_SUBNET: op = OP_SWITCHN_Vii; break; default: reporter->InternalError("bad switch type"); } - // Add the "head", i.e., the execution of the jump table. - auto sw_head_op = ZInstI(op, slot, tbl, 0); + // Add the "head", i.e., the execution of the jump table. At this point, + // we leave the table (v2) and default (v3) TBD. + auto sw_head_op = ZInstI(op, slot, 0, 0); sw_head_op.op_type = OP_VVV_I2_I3; auto sw_head = AddInst(sw_head_op); @@ -549,20 +531,36 @@ const ZAMStmt ZAMCompiler::GenSwitch(const SwitchStmt* sw, int slot, InternalTyp // Now add the jump table to the set we're keeping for the // corresponding type. + size_t tbl; + switch ( it ) { - case TYPE_INTERNAL_INT: int_casesI.push_back(new_int_cases); break; + case TYPE_INTERNAL_INT: + tbl = int_casesI.size(); + int_casesI.push_back(new_int_cases); + break; - case TYPE_INTERNAL_UNSIGNED: uint_casesI.push_back(new_uint_cases); break; + case TYPE_INTERNAL_UNSIGNED: + tbl = uint_casesI.size(); + uint_casesI.push_back(new_uint_cases); + break; - case TYPE_INTERNAL_DOUBLE: double_casesI.push_back(new_double_cases); break; + case TYPE_INTERNAL_DOUBLE: + tbl = double_casesI.size(); + double_casesI.push_back(new_double_cases); + break; case TYPE_INTERNAL_STRING: case TYPE_INTERNAL_ADDR: - case TYPE_INTERNAL_SUBNET: str_casesI.push_back(new_str_cases); break; + case TYPE_INTERNAL_SUBNET: + tbl = str_casesI.size(); + str_casesI.push_back(new_str_cases); + break; default: reporter->InternalError("bad switch type"); } + insts1[sw_head.stmt_num]->v2 = int(tbl); + AddCFT(insts1[body_end.stmt_num], CFT_BLOCK_END); return body_end; From 6faad5e5ca0c284a79c85c026c8a9371b0c48848 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 9 Aug 2024 09:26:52 -0700 Subject: [PATCH 29/33] fix for script optimization of "opaque" values that are run-time constants --- src/script_opt/Expr.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/script_opt/Expr.cc b/src/script_opt/Expr.cc index a7c76ed72a..5a04a3bef1 100644 --- a/src/script_opt/Expr.cc +++ b/src/script_opt/Expr.cc @@ -475,7 +475,8 @@ ExprPtr UnaryExpr::Reduce(Reducer* c, StmtPtr& red_stmt) { auto op_val = op->FoldVal(); if ( op_val ) { auto fold = Fold(op_val.get()); - return TransformMe(make_intrusive(fold), c, red_stmt); + if ( fold->GetType()->Tag() != TYPE_OPAQUE ) + return TransformMe(make_intrusive(fold), c, red_stmt); } if ( c->Optimizing() ) @@ -523,7 +524,8 @@ ExprPtr BinaryExpr::Reduce(Reducer* c, StmtPtr& red_stmt) { auto op2_fold_val = op2->FoldVal(); if ( op1_fold_val && op2_fold_val ) { auto fold = Fold(op1_fold_val.get(), op2_fold_val.get()); - return TransformMe(make_intrusive(fold), c, red_stmt); + if ( fold->GetType()->Tag() != TYPE_OPAQUE ) + return TransformMe(make_intrusive(fold), c, red_stmt); } if ( c->Optimizing() ) From 202c405a1ee90cae5773ef9b667708974dce4429 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 9 Aug 2024 09:27:22 -0700 Subject: [PATCH 30/33] fix for -O C++ construction of variable names that use multiple module namespaces --- src/script_opt/CPP/Vars.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/script_opt/CPP/Vars.cc b/src/script_opt/CPP/Vars.cc index 4aa967c3f8..0b9efb45d9 100644 --- a/src/script_opt/CPP/Vars.cc +++ b/src/script_opt/CPP/Vars.cc @@ -111,10 +111,12 @@ string CPPCompile::LocalName(const ID* l) const { auto n = l->Name(); auto without_module = strstr(n, "::"); - if ( without_module ) - return Canonicalize(without_module + 2); - else - return Canonicalize(n); + while ( without_module ) { + n = without_module + 2; + without_module = strstr(n, "::"); + } + + return Canonicalize(n); } string CPPCompile::Canonicalize(const char* name) const { From 910a2f6c5945c2da3073748ed797f778b079e364 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 9 Aug 2024 09:27:59 -0700 Subject: [PATCH 31/33] address some script optimization compiler warnings under Linux --- src/script_opt/ZAM/Stmt.cc | 2 +- src/script_opt/ZAM/ZOp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index 4753717d94..d353827b68 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -591,7 +591,7 @@ const ZAMStmt ZAMCompiler::TypeSwitch(const SwitchStmt* sw, const NameExpr* v, c int ntypes = type_map->size(); auto aux = new ZInstAux(ntypes); - for ( auto i = 0; i < type_map->size(); ++i ) { + for ( size_t i = 0; i < type_map->size(); ++i ) { auto& tm = (*type_map)[i]; auto id_i = tm.first; auto id_case = tm.second; diff --git a/src/script_opt/ZAM/ZOp.h b/src/script_opt/ZAM/ZOp.h index 72ae94ddd3..09e44f145d 100644 --- a/src/script_opt/ZAM/ZOp.h +++ b/src/script_opt/ZAM/ZOp.h @@ -5,6 +5,7 @@ #pragma once #include +#include namespace zeek::detail { From 691a4003b716acf650de021332ef43e4ae52c4d6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 15 Aug 2024 10:32:51 -0700 Subject: [PATCH 32/33] fixes to limit AST traversal in the face of recursive types --- src/ScriptValidation.cc | 10 ++++++++++ src/script_opt/CSE.h | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/ScriptValidation.cc b/src/ScriptValidation.cc index 91c4d77c19..1510e1f53a 100644 --- a/src/ScriptValidation.cc +++ b/src/ScriptValidation.cc @@ -58,6 +58,15 @@ public: return TC_CONTINUE; } + TraversalCode PreType(const Type* t) override { + if ( types_seen.count(t) > 0 ) + return TC_ABORTSTMT; + + types_seen.insert(t); + + return TC_CONTINUE; + } + void SetHookDepth(int hd) { hook_depth = hd; } bool IsValid() const { return valid_script; } @@ -83,6 +92,7 @@ private: } std::unordered_map stmt_depths; + std::unordered_set types_seen; int hook_depth = 0; bool report; // whether to report problems via "reporter" bool valid_script = true; diff --git a/src/script_opt/CSE.h b/src/script_opt/CSE.h index 56d6572d9a..60b1ad45bc 100644 --- a/src/script_opt/CSE.h +++ b/src/script_opt/CSE.h @@ -24,6 +24,13 @@ public: TraversalCode PreExpr(const Expr*) override; TraversalCode PostExpr(const Expr*) override; + TraversalCode PreType(const Type* t) override { + if ( types_seen.count(t) > 0 ) + return TC_ABORTSTMT; + types_seen.insert(t); + return TC_CONTINUE; + } + // Returns the ultimate verdict re safety. bool IsValid() const { if ( ! is_valid ) @@ -105,6 +112,9 @@ protected: // // A count to allow for nesting. int in_aggr_mod_expr = 0; + + // Used to limit traversal of recursive types. + std::unordered_set types_seen; }; // Used for debugging, to communicate which expression wasn't From 2477213619c57b9560681d01e4a2d73a747eca10 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 15 Aug 2024 13:02:09 -0700 Subject: [PATCH 33/33] simpler and more robust identification of function parameters for AST profiling --- src/ID.cc | 11 +++------- src/ID.h | 10 ++++++--- src/script_opt/ProfileFunc.cc | 40 ++++++++++++----------------------- src/script_opt/ProfileFunc.h | 4 ++++ 4 files changed, 27 insertions(+), 38 deletions(-) diff --git a/src/ID.cc b/src/ID.cc index 72effdb4a3..8a0716cb19 100644 --- a/src/ID.cc +++ b/src/ID.cc @@ -100,20 +100,15 @@ ID::ID(const char* arg_name, IDScope arg_scope, bool arg_is_export) { name = util::copy_string(arg_name); scope = arg_scope; is_export = arg_is_export; - is_option = false; - is_blank = name && extract_var_name(name) == "_"; - is_const = false; - is_enum_const = false; - is_type = false; offset = 0; - if ( is_blank ) + if ( name && extract_var_name(name) == "_" ) { + is_blank = true; SetType(base_type(TYPE_ANY)); + } opt_info = new IDOptInfo(this); - infer_return_type = false; - SetLocationInfo(&start_location, &end_location); } diff --git a/src/ID.h b/src/ID.h index 75b849b5c1..ec27ba7720 100644 --- a/src/ID.h +++ b/src/ID.h @@ -81,7 +81,6 @@ public: } bool IsType() const { return is_type; } - void MakeType() { is_type = true; } void SetVal(ValPtr v); @@ -160,9 +159,14 @@ protected: const char* name; IDScope scope; bool is_export; - bool infer_return_type; TypePtr type; - bool is_const, is_enum_const, is_type, is_option, is_blank; + bool is_capture = false; + bool is_const = false; + bool is_enum_const = false; + bool is_type = false; + bool is_option = false; + bool is_blank = false; + bool infer_return_type = false; int offset; ValPtr val; AttributesPtr attrs; diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 84f8cb16f7..d6d096f374 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -24,11 +24,12 @@ p_hash_type p_hash(const Obj* o) { ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields) { profiled_func = func; + profiled_scope = profiled_func->GetScope(); profiled_body = body.get(); abs_rec_fields = _abs_rec_fields; - auto ft = func->GetType()->AsFuncType(); - auto& fcaps = ft->GetCaptures(); + profiled_func_t = cast_intrusive(func->GetType()); + auto& fcaps = profiled_func_t->GetCaptures(); if ( fcaps ) { int offset = 0; @@ -40,7 +41,7 @@ ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fi } } - Profile(ft, body); + Profile(profiled_func_t.get(), body); } ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields) { @@ -56,6 +57,9 @@ ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) { if ( e->Tag() == EXPR_LAMBDA ) { auto func = e->AsLambdaExpr(); + ASSERT(func->GetType()->Tag() == TYPE_FUNC); + profiled_scope = func->GetScope(); + profiled_func_t = cast_intrusive(func->GetType()); int offset = 0; @@ -75,6 +79,11 @@ ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) { void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body) { num_params = ft->Params()->NumFields(); + + auto& ov = profiled_scope->OrderedVars(); + for ( int i = 0; i < num_params; ++i ) + params.insert(ov[i].get()); + TrackType(ft); body->Traverse(this); } @@ -181,28 +190,10 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { TrackType(id->GetType()); if ( id->IsGlobal() ) { - globals.insert(id); - all_globals.insert(id); - - const auto& t = id->GetType(); - if ( t->Tag() == TYPE_FUNC ) - if ( t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT ) - events.insert(id->Name()); - + PreID(id); break; } - // This is a tad ugly. Unfortunately due to the weird way - // that Zeek function *declarations* work, there's no reliable - // way to get the list of parameters for a function *definition*, - // since they can have different names than what's present in the - // declaration. So we identify them directly, by knowing that - // they come at the beginning of the frame ... and being careful - // to avoid misconfusing a lambda capture with a low frame offset - // as a parameter. - if ( captures.count(id) == 0 && id->Offset() < num_params ) - params.insert(id); - locals.insert(id); break; @@ -426,11 +417,6 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { for ( const auto& i : l->OuterIDs() ) { locals.insert(i); TrackID(i); - - // See above re EXPR_NAME regarding the following - // logic. - if ( captures.count(i) == 0 && i->Offset() < num_params ) - params.insert(i); } // In general, we don't want to recurse into the body. diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index 2ce4dfe562..8acd15729f 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -83,6 +83,7 @@ public: // Returns the function, body, or expression profiled. Each can be // null depending on the constructor used. const Func* ProfiledFunc() const { return profiled_func; } + const ScopePtr& ProfiledScope() const { return profiled_scope; } const Stmt* ProfiledBody() const { return profiled_body; } const Expr* ProfiledExpr() const { return profiled_expr; } @@ -139,6 +140,7 @@ protected: TraversalCode PreStmt(const Stmt*) override; TraversalCode PreExpr(const Expr*) override; TraversalCode PreID(const ID*) override; + TraversalCode PreType(const Type*) override; // Take note of the presence of a given type. void TrackType(const Type* t); @@ -157,6 +159,8 @@ protected: // The function, body, or expression profiled. Can be null // depending on which constructor was used. const Func* profiled_func = nullptr; + ScopePtr profiled_scope; // null when not in a full function context + FuncTypePtr profiled_func_t; // null when not in a full function context const Stmt* profiled_body = nullptr; const Expr* profiled_expr = nullptr;