diff --git a/CHANGES b/CHANGES index 3e4814be98..69ad9454a7 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,24 @@ +4.1.0-dev.71 | 2021-01-11 18:03:25 -0800 + + * Add []-style variable-capture-list for Zeek lambda functions (Vern Paxson, Corelight) + + The previous behavior of automatically capturing references to variables + outside a lambda's scope is now deprecated. An explicit capture list which + also specifies the desired copy-semantics will be required when + writing lambda functions that refer to local variables of an outer scope. + + Examples of the new capture-list syntax are described at + https://docs.zeek.org/en/master/script-reference/types.html#type-function + + * nit: fixed some 0/1 values that should instead be false/true (Vern Paxson, Corelight) + + * factored some complexity of begin_func() into static functions for clarity (Vern Paxson, Corelight) + + * error propagation fix: don't complain about "unused" values that themselves are due to errors (Vern Paxson, Corelight) + + * corrected & reflowed some comments, plus a whitespace tweak (Vern Paxson, Corelight) + 4.1.0-dev.52 | 2021-01-11 11:11:13 -0800 * Remove unusable/broken RocksDB code and options (Jon Siwek, Corelight) diff --git a/NEWS b/NEWS index 2acd7c50b3..778918e60b 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,12 @@ Zeek 4.1.0 New Functionality ----------------- +- Lambda functions can now use capture-list to help specify exactly which local + variables from outer scopes need to made available while evaluating the lambda + and also the method by which they're made available: deep vs. shallow copy. + + For examples, see: https://docs.zeek.org/en/master/script-reference/types.html#type-function + Changed Functionality --------------------- @@ -21,6 +27,13 @@ Removed Functionality Deprecated Functionality ------------------------ +- Lambda/closure support: automatic capturing of references to variables + outside a lambda's scope is now deprecated. An explicit capture + list which also specifies the desired copy-semantics is now required when + writing lambda functions that refer to local variables of an outer scope. + + For examples, see: https://docs.zeek.org/en/master/script-reference/types.html#type-function + Zeek 4.0.0 ========== diff --git a/VERSION b/VERSION index bd72a941d9..50cd3b2675 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.1.0-dev.52 +4.1.0-dev.71 diff --git a/doc b/doc index f4af5cbf59..a5b72de20a 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit f4af5cbf59a7f5865a4721a47b7b9e81a28f270a +Subproject commit a5b72de20a6bafc738aab9027ad76f059c5a7601 diff --git a/src/Expr.cc b/src/Expr.cc index c9cbd10e34..c148c5538f 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -326,7 +326,8 @@ ValPtr NameExpr::Eval(Frame* f) const v = f->GetElementByID(id); else - // No frame - evaluating for Simplify() purposes + // No frame - evaluating for purposes of resolving a + // compile-time constant. return nullptr; if ( v ) @@ -4369,6 +4370,8 @@ LambdaExpr::LambdaExpr(std::unique_ptr arg_ing, SetType(ingredients->id->GetType()); + CheckCaptures(); + // Install a dummy version of the function globally for use only // when broker provides a closure. auto dummy_func = make_intrusive( @@ -4394,10 +4397,10 @@ LambdaExpr::LambdaExpr(std::unique_ptr arg_ing, const auto& id = global_scope()->Find(fullname); if ( id ) - // Just try again to make a unique lambda name. If two peer - // processes need to agree on the same lambda name, this assumes - // they're loading the same scripts and thus have the same hash - // collisions. + // Just try again to make a unique lambda name. + // If two peer processes need to agree on the same + // lambda name, this assumes they're loading the same + // scripts and thus have the same hash collisions. d.Add(" "); else break; @@ -4415,6 +4418,67 @@ LambdaExpr::LambdaExpr(std::unique_ptr arg_ing, id->SetConst(); } +void LambdaExpr::CheckCaptures() + { + auto ft = type->AsFuncType(); + const auto& captures = ft->GetCaptures(); + + capture_by_ref = false; + + if ( ! captures ) + { + if ( outer_ids.size() > 0 ) + { + // TODO: Remove in v5.1: these deprecated closure semantics + reporter->Warning("use of outer identifiers in lambdas without [] captures is deprecated: %s%s", + outer_ids.size() > 1 ? "e.g., " : "", + outer_ids[0]->Name()); + capture_by_ref = true; + } + + return; + } + + std::set outer_is_matched; + std::set capture_is_matched; + + for ( const auto& c : *captures ) + { + auto cid = c.id.get(); + + if ( ! cid ) + // This happens for undefined/inappropriate + // identifiers listed in captures. There's + // already been an error message. + continue; + + if ( capture_is_matched.count(cid) > 0 ) + { + ExprError(util::fmt("%s listed multiple times in capture", cid->Name())); + continue; + } + + for ( auto id : outer_ids ) + if ( cid == id ) + { + outer_is_matched.insert(id); + capture_is_matched.insert(cid); + break; + } + } + + for ( auto id : outer_ids ) + if ( outer_is_matched.count(id) == 0 ) + ExprError(util::fmt("%s is used inside lambda but not captured", id->Name())); + + for ( const auto& c : *captures ) + { + auto cid = c.id.get(); + if ( cid && capture_is_matched.count(cid) == 0 ) + ExprError(util::fmt("%s is captured but not used inside lambda", cid->Name())); + } + } + Scope* LambdaExpr::GetScope() const { return ingredients->scope.get(); @@ -4429,7 +4493,10 @@ ValPtr LambdaExpr::Eval(Frame* f) const ingredients->frame_size, ingredients->priority); - lamb->AddClosure(outer_ids, f); + if ( capture_by_ref ) + lamb->AddClosure(outer_ids, f); + else + lamb->CreateCaptures(f); // Set name to corresponding dummy func. // Allows for lookups by the receiver. @@ -5100,7 +5167,9 @@ ExprPtr check_and_promote_expr(Expr* const e, zeek::Type* t) IntrusivePtr{NewRef{}, e}, IntrusivePtr{NewRef{}, t->AsVectorType()}); - t->Error("type clash", e); + if ( t->Tag() != TYPE_ERROR && et->Tag() != TYPE_ERROR ) + t->Error("type clash", e); + return nullptr; } diff --git a/src/Expr.h b/src/Expr.h index 396b1107a4..f294df6098 100644 --- a/src/Expr.h +++ b/src/Expr.h @@ -1042,9 +1042,13 @@ protected: void ExprDescribe(ODesc* d) const override; private: + void CheckCaptures(); + std::unique_ptr ingredients; IDPList outer_ids; + bool capture_by_ref; // if true, use deprecated reference semantics + std::string my_name; }; diff --git a/src/Frame.cc b/src/Frame.cc index b0a106f54f..d4aa13e13b 100644 --- a/src/Frame.cc +++ b/src/Frame.cc @@ -31,6 +31,14 @@ Frame::Frame(int arg_size, const ScriptFunc* func, const zeek::Args* fn_args) closure = nullptr; + // We could Ref()/Unref() the captures frame, but there's really + // no need because by definition this current frame exists to + // enable execution of the function, and its captures frame won't + // go away until the function itself goes away, which can only be + // after this frame does. + captures = function ? function->GetCapturesFrame() : nullptr; + captures_offset_map = + function ? function->GetCapturesOffsetMap() : nullptr; current_offset = 0; } @@ -86,11 +94,18 @@ void Frame::SetElementWeak(int n, Val* v) void Frame::SetElement(const ID* id, ValPtr v) { - if ( closure ) + if ( closure && IsOuterID(id) ) { - if ( IsOuterID(id) ) + closure->SetElement(id, std::move(v)); + return; + } + + if ( captures ) + { + auto cap_off = captures_offset_map->find(id->Name()); + if ( cap_off != captures_offset_map->end() ) { - closure->SetElement(id, std::move(v)); + captures->SetElement(cap_off->second, std::move(v)); return; } } @@ -115,10 +130,14 @@ void Frame::SetElement(const ID* id, ValPtr v) const ValPtr& Frame::GetElementByID(const ID* id) const { - if ( closure ) + if ( closure && IsOuterID(id) ) + return closure->GetElementByID(id); + + if ( captures ) { - if ( IsOuterID(id) ) - return closure->GetElementByID(id); + auto cap_off = captures_offset_map->find(id->Name()); + if ( cap_off != captures_offset_map->end() ) + return captures->GetElement(cap_off->second); } // do we have an offset for it? @@ -191,6 +210,9 @@ Frame* Frame::Clone() const if ( frame[i].val ) other->frame[i].val = frame[i].val->Clone(); + // Note, there's no need to clone "captures" or "captures_offset_map" + // since those get created fresh when constructing "other". + return other; } @@ -269,7 +291,7 @@ Frame* Frame::SelectiveClone(const IDPList& selection, ScriptFunc* func) const // other->closure = closure->SelectiveClone(them); // other->outer_ids = outer_ids; - if( closure ) + if ( closure ) other->CaptureClosure(closure, outer_ids); if ( offset_map ) @@ -285,24 +307,25 @@ Frame* Frame::SelectiveClone(const IDPList& selection, ScriptFunc* func) const return other; } -broker::expected Frame::Serialize(const Frame* target, const IDPList& selection) +broker::expected Frame::SerializeClosureFrame(const IDPList& selection) { broker::vector rval; if ( selection.length() == 0 ) + // Easy - no captures, so frame is irrelvant. return {std::move(rval)}; IDPList us; // and IDPList them; - std::unordered_map new_map; - if ( target->offset_map ) - new_map = *(target->offset_map); + OffsetMap new_map; + if ( offset_map ) + new_map = *offset_map; - for (const auto& we : selection) + for ( const auto& we : selection ) { - if ( target->IsOuterID(we) ) + if ( IsOuterID(we) ) them.append(we); else { @@ -313,18 +336,18 @@ broker::expected Frame::Serialize(const Frame* target, const IDPLi if ( them.length() ) { - if ( ! target->closure ) + if ( ! closure ) reporter->InternalError("Attempting to serialize values from a frame that does not exist."); rval.emplace_back(std::string("ClosureFrame")); - auto ids = SerializeIDList(target->outer_ids); + auto ids = SerializeIDList(outer_ids); if ( ! ids ) return broker::ec::invalid_data; rval.emplace_back(*ids); - auto serialized = Frame::Serialize(target->closure, them); + auto serialized = closure->SerializeClosureFrame(them); if ( ! serialized ) return broker::ec::invalid_data; @@ -341,7 +364,7 @@ broker::expected Frame::Serialize(const Frame* target, const IDPLi broker::vector body; - for ( int i = 0; i < target->size; ++i ) + for ( int i = 0; i < size; ++i ) body.emplace_back(broker::none()); for ( const auto& id : us ) @@ -349,10 +372,10 @@ broker::expected Frame::Serialize(const Frame* target, const IDPLi int location = id->Offset(); auto where = new_map.find(std::string(id->Name())); - if (where != new_map.end()) + if ( where != new_map.end() ) location = where->second; - const auto& val = target->frame[location].val; + const auto& val = frame[location].val; TypeTag tag = val->GetType()->Tag(); @@ -369,15 +392,37 @@ broker::expected Frame::Serialize(const Frame* target, const IDPLi return {std::move(rval)}; } -std::pair Frame::Unserialize(const broker::vector& data) +broker::expected Frame::SerializeCopyFrame() + { + broker::vector rval; + rval.emplace_back(std::string("CopyFrame")); + + broker::vector body; + + for ( int i = 0; i < size; ++i ) + { + const auto& val = frame[i].val; + auto expected = Broker::detail::val_to_data(val.get()); + if ( ! expected ) + return broker::ec::invalid_data; + + TypeTag tag = val->GetType()->Tag(); + broker::vector val_tuple {std::move(*expected), + static_cast(tag)}; + body.emplace_back(std::move(val_tuple)); + } + + rval.emplace_back(std::move(body)); + + return {std::move(rval)}; + } + +std::pair Frame::Unserialize(const broker::vector& data, + const std::optional& captures) { if ( data.size() == 0 ) return std::make_pair(true, nullptr); - IDPList outer_ids; - OffsetMap offset_map; - FramePtr closure; - auto where = data.begin(); auto has_name = broker::get_if(*where); @@ -386,6 +431,54 @@ std::pair Frame::Unserialize(const broker::vector& data) std::advance(where, 1); + if ( captures || *has_name == "CopyFrame" ) + { + ASSERT(captures && *has_name == "CopyFrame"); + + auto has_body = broker::get_if(*where); + if ( ! has_body ) + return std::make_pair(false, nullptr); + + broker::vector body = *has_body; + int frame_size = body.size(); + auto rf = make_intrusive(frame_size, nullptr, nullptr); + + rf->closure = nullptr; + + for ( int i = 0; i < frame_size; ++i ) + { + auto has_vec = broker::get_if(body[i]); + if ( ! has_vec ) + continue; + + broker::vector val_tuple = *has_vec; + if ( val_tuple.size() != 2 ) + return std::make_pair(false, nullptr); + + auto has_type = broker::get_if(val_tuple[1]); + if ( ! has_type ) + return std::make_pair(false, nullptr); + + broker::integer g = *has_type; + Type t( static_cast(g) ); + + auto val = Broker::detail::data_to_val(std::move(val_tuple[0]), &t); + if ( ! val ) + return std::make_pair(false, nullptr); + + rf->frame[i].val = std::move(val); + } + + return std::make_pair(true, std::move(rf)); + } + + + // Code to support deprecated semantics: + + IDPList outer_ids; + OffsetMap offset_map; + FramePtr closure; + if ( *has_name == "ClosureFrame" ) { auto has_vec = broker::get_if(*where); @@ -411,7 +504,7 @@ std::pair Frame::Unserialize(const broker::vector& data) std::advance(where, 1); - auto closure_pair = Frame::Unserialize(*has_vec); + auto closure_pair = Frame::Unserialize(*has_vec, {}); if ( ! closure_pair.first ) { for ( auto& i : outer_ids ) @@ -566,7 +659,7 @@ broker::expected Frame::SerializeIDList(const IDPList& in) } broker::expected -Frame::SerializeOffsetMap(const std::unordered_map& in) +Frame::SerializeOffsetMap(const OffsetMap& in) { broker::vector rval; @@ -621,7 +714,7 @@ Frame::UnserializeIDList(const broker::vector& data) std::pair> Frame::UnserializeOffsetMap(const broker::vector& data) { - std::unordered_map rval; + OffsetMap rval; for ( broker::vector::size_type i = 0; i < data.size(); i += 2 ) { diff --git a/src/Frame.h b/src/Frame.h index 64f496b6b6..7413beacec 100644 --- a/src/Frame.h +++ b/src/Frame.h @@ -7,12 +7,14 @@ #include #include #include +#include #include #include #include "zeek/ZeekList.h" // for typedef val_list #include "zeek/Obj.h" +#include "zeek/Type.h" #include "zeek/IntrusivePtr.h" #include "zeek/ZeekArgs.h" @@ -192,29 +194,42 @@ public: Frame* SelectiveClone(const IDPList& selection, ScriptFunc* func) const; /** - * Serializes the Frame into a Broker representation. - * - * Serializing a frame can be fairly non-trivial. If the frame has no - * closure the serialized frame is just a vector: + * Serializes the frame in the context of supporting the (deprecated) + * reference semantics for closures. This can be fairly non-trivial. + * If the frame itself has no closure then the serialized frame + * is a vector: * * [ "Frame", [offset_map] [serialized_values] ] * - * Where serialized_values are two element vectors. A serialized_value + * where serialized_values are two-element vectors. A serialized_value * has the result of calling broker::data_to_val on the value in the * first index, and an integer representing that value's type in the * second index. offset_map is a serialized version of the frame's * offset_map. * - * A Frame with a closure needs to serialize a little more information. - * It is serialized as: + * A reference-semantics frame with its own closure needs to + * (recursively) serialize more information: * * [ "ClosureFrame", [outer_ids], Serialize(closure), [offset_map], * [serialized_values] ] * - * @return the broker representaton, or an error if the serialization + * @return the broker representation, or an error if the serialization * failed. */ - static broker::expected Serialize(const Frame* target, const IDPList& selection); + broker::expected SerializeClosureFrame(const IDPList& selection); + + /** + * Serializes the frame in the context of supporting copy semantics + * for lambdas: + * + * [ "CopyFrame", serialized_values ] + * + * where serialized_values are two-element vectors. A serialized_value + * has the result of calling broker::data_to_val on the value in the + * first index, and an integer representing that value's type in the + * second index. + */ + broker::expected SerializeCopyFrame(); /** * Instantiates a Frame from a serialized one. @@ -222,8 +237,13 @@ public: * @return a pair in which the first item is the status of the serialization; * and the second is the unserialized frame with reference count +1, or * null if the serialization wasn't successful. + * + * The *captures* argument, if non-nil, specifies that the frame + * reflects captures with copy-semantics rather than deprecated + * reference semantics. */ - static std::pair Unserialize(const broker::vector& data); + static std::pair Unserialize(const broker::vector& data, + const std::optional& captures); /** * Sets the IDs that the frame knows offsets for. These offsets will @@ -300,9 +320,12 @@ private: */ void ClearElement(int n); - /** Have we captured this id? */ + /** Have we captured this id? Version for deprecated semantics. */ bool IsOuterID(const ID* in) const; + /** Have we captured this id? Version for current semantics. */ + bool IsCaptureID(const ID* in) const; + /** Serializes an offset_map */ static broker::expected SerializeOffsetMap(const OffsetMap& in); @@ -337,10 +360,12 @@ private: */ int current_offset; - /** The enclosing frame of this frame. */ + /** The enclosing frame of this frame. Used for reference semantics. */ Frame* closure; - /** ID's used in this frame from the enclosing frame. */ + /** ID's used in this frame from the enclosing frame, when using + * reference semantics (closure != nullptr). + */ IDPList outer_ids; /** @@ -349,8 +374,22 @@ private: */ std::unique_ptr offset_map; + /** Frame used for captures (if any) with copy semantics. */ + Frame* captures; + + /** Maps IDs to offsets into the "captures" frame. If the ID + * isn't present, then it's not a capture. + * + * We keep this separate from offset_map to help ensure we don't + * confuse code from the deprecated semantics with the current + * semantics. + */ + const OffsetMap* captures_offset_map; + /** The function this frame is associated with. */ const ScriptFunc* function; + + // The following is only needed for the debugger. /** The arguments to the function that this Frame is associated with. */ const zeek::Args* func_args; diff --git a/src/Func.cc b/src/Func.cc index 7a120e0538..0206fc7485 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -320,6 +320,9 @@ ScriptFunc::~ScriptFunc() { if ( ! weak_closure_ref ) Unref(closure); + + delete captures_frame; + delete captures_offset_mapping; } bool ScriptFunc::IsPure() const @@ -472,6 +475,56 @@ ValPtr ScriptFunc::Invoke(zeek::Args* args, Frame* parent) const return result; } +void ScriptFunc::CreateCaptures(Frame* f) + { + const auto& captures = type->GetCaptures(); + + if ( ! captures ) + return; + + // Create a private Frame to hold the values of captured variables, + // and a mapping from those variables to their offsets in the Frame. + delete captures_frame; + delete captures_offset_mapping; + captures_frame = new Frame(captures->size(), this, nullptr); + captures_offset_mapping = new OffsetMap; + + int offset = 0; + for ( const auto& c : *captures ) + { + auto v = f->GetElementByID(c.id); + + if ( v ) + { + if ( c.deep_copy || ! v->Modifiable() ) + v = v->Clone(); + + captures_frame->SetElement(offset, std::move(v)); + } + + (*captures_offset_mapping)[c.id->Name()] = offset; + ++offset; + } + } + +void ScriptFunc::SetCaptures(Frame* f) + { + const auto& captures = type->GetCaptures(); + ASSERT(captures); + + delete captures_frame; + delete captures_offset_mapping; + captures_frame = f; + captures_offset_mapping = new OffsetMap; + + int offset = 0; + for ( const auto& c : *captures ) + { + (*captures_offset_mapping)[c.id->Name()] = offset; + ++offset; + } + } + void ScriptFunc::AddBody(StmtPtr new_body, const std::vector& new_inits, size_t new_frame_size, int priority) @@ -545,7 +598,7 @@ void ScriptFunc::SetClosureFrame(Frame* f) bool ScriptFunc::UpdateClosure(const broker::vector& data) { - auto result = Frame::Unserialize(data); + auto result = Frame::Unserialize(data, {}); if ( ! result.first ) return false; @@ -564,6 +617,17 @@ bool ScriptFunc::UpdateClosure(const broker::vector& data) return true; } +bool ScriptFunc::DeserializeCaptures(const broker::vector& data) + { + auto result = Frame::Unserialize(data, GetType()->GetCaptures()); + +ASSERT(result.first); + + SetCaptures(result.second.release()); + + return true; + } + FuncPtr ScriptFunc::DoClone() { @@ -578,12 +642,22 @@ FuncPtr ScriptFunc::DoClone() other->weak_closure_ref = false; other->outer_ids = outer_ids; + if ( captures_frame ) + { + other->captures_frame = captures_frame->Clone(); + other->captures_offset_mapping = new OffsetMap; + *other->captures_offset_mapping = *captures_offset_mapping; + } + return other; } broker::expected ScriptFunc::SerializeClosure() const { - return Frame::Serialize(closure, outer_ids); + if ( captures_frame ) + return captures_frame->SerializeCopyFrame(); + else + return closure->SerializeClosureFrame(outer_ids); } void ScriptFunc::Describe(ODesc* d) const diff --git a/src/Func.h b/src/Func.h index 191a766218..a1cf89ec95 100644 --- a/src/Func.h +++ b/src/Func.h @@ -164,7 +164,39 @@ public: ValPtr Invoke(zeek::Args* args, Frame* parent) const override; /** - * Adds adds a closure to the function. Closures are cloned and + * Creates a separate frame for captures and initializes its + * elements. The list of captures comes from the ScriptFunc's + * type, so doesn't need to be passed in, just the frame to + * use in evaluating the identifiers. + * + * @param f the frame used for evaluating the captured identifiers + */ + void CreateCaptures(Frame* f); + + /** + * Returns the frame associated with this function for tracking + * captures, or nil if there isn't one. + * + * @return internal frame kept by the function for persisting captures + */ + Frame* GetCapturesFrame() const { return captures_frame; } + + // Same definition as in Frame.h. + using OffsetMap = std::unordered_map; + + /** + * Returns the mapping of captures to slots in the captures frame. + * + * @return pointer to mapping of captures to slots + */ + const OffsetMap* GetCapturesOffsetMap() const + { return captures_offset_mapping; } + + // The following "Closure" methods implement the deprecated + // capture-by-reference functionality. + + /** + * Adds a closure to the function. Closures are cloned and * future calls to ScriptFunc methods will not modify *f*. * * @param ids IDs that are captured by the closure. @@ -186,12 +218,19 @@ public: bool StrengthenClosureReference(Frame* f); /** - * Serializes this function's closure. + * Serializes this function's closure or capture frame. * - * @return a serialized version of the function's closure. + * @return a serialized version of the function's closure/capture frame. */ broker::expected SerializeClosure() const; + /** + * Sets the captures frame to one built from *data*. + * + * @param data a serialized frame + */ + bool DeserializeCaptures(const broker::vector& data); + void AddBody(StmtPtr new_body, const std::vector& new_inits, size_t new_frame_size, int priority) override; @@ -237,15 +276,33 @@ protected: */ void SetClosureFrame(Frame* f); + /** + * Uses the given frame for captures, and generates the + * mapping from captured variables to offsets in the frame. + * + * @param f the frame holding the values of capture variables + */ + void SetCaptures(Frame* f); + private: size_t frame_size; // List of the outer IDs used in the function. IDPList outer_ids; + + // The following is used for deprecated capture-by-reference + // closures: // The frame the ScriptFunc was initialized in. Frame* closure = nullptr; bool weak_closure_ref = false; + // Used for capture-by-copy closures. These persist over the + // function's lifetime, providing quasi-globals that maintain + // state across individual calls to the function. + Frame* captures_frame = nullptr; + + OffsetMap* captures_offset_mapping = nullptr; + // The most recently added/updated body. StmtPtr current_body; }; diff --git a/src/Stmt.cc b/src/Stmt.cc index 6211ff507d..eaa65bd4da 100644 --- a/src/Stmt.cc +++ b/src/Stmt.cc @@ -340,7 +340,7 @@ ValPtr PrintStmt::DoExec(std::vector vals, ExprStmt::ExprStmt(ExprPtr arg_e) : Stmt(STMT_EXPR), e(std::move(arg_e)) { - if ( e && e->IsPure() ) + if ( e && e->IsPure() && e->GetType()->Tag() != TYPE_ERROR ) Warn("expression value ignored"); SetLocationInfo(e->GetLocationInfo()); diff --git a/src/Type.cc b/src/Type.cc index eac80264e8..7387368254 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -632,6 +632,7 @@ TypePtr FuncType::ShallowClone() f->yield = yield; f->flavor = flavor; f->prototypes = prototypes; + f->captures = captures; return f; } @@ -654,8 +655,6 @@ string FuncType::FlavorString() const } } -FuncType::~FuncType() = default; - int FuncType::MatchesIndex(detail::ListExpr* const index) const { return check_and_promote_args(index, args.get()) ? @@ -698,6 +697,11 @@ bool FuncType::CheckArgs(const std::vector& args, return success; } +void FuncType::SetCaptures(std::optional _captures) + { + captures = std::move(_captures); + } + void FuncType::Describe(ODesc* d) const { if ( d->IsReadable() ) diff --git a/src/Type.h b/src/Type.h index 68c33eca7b..cae2429719 100644 --- a/src/Type.h +++ b/src/Type.h @@ -10,6 +10,7 @@ #include #include "zeek/Obj.h" +#include "zeek/ID.h" #include "zeek/Attr.h" #include "zeek/ZeekList.h" #include "zeek/IntrusivePtr.h" @@ -491,8 +492,6 @@ public: TypePtr ShallowClone() override; - ~FuncType() override; - [[deprecated("Remove in v4.1. Use Params().")]] RecordType* Args() const { return args.get(); } @@ -540,6 +539,31 @@ public: const std::vector& Prototypes() const { return prototypes; } + /** + * A single lambda "capture" (outer variable used in a lambda's body). + */ + struct Capture { + detail::IDPtr id; + bool deep_copy; + }; + + using CaptureList = std::vector; + + /** + * Sets this function's set of captures. Only valid for lambdas. + * + * @param captures if non-nil, a list of the lambda's captures + */ + void SetCaptures(std::optional captures); + + /** + * Returns the captures declared for this function, or nil if none. + * + * @return a vector giving the captures + */ + const std::optional& GetCaptures() const + { return captures; } + protected: friend FuncTypePtr make_intrusive(); @@ -549,6 +573,8 @@ protected: TypePtr yield; FunctionFlavor flavor; std::vector prototypes; + + std::optional captures; // if nil then no captures specified }; class TypeType final : public Type { diff --git a/src/Var.cc b/src/Var.cc index e0962a9d01..001c0f7b36 100644 --- a/src/Var.cc +++ b/src/Var.cc @@ -488,6 +488,119 @@ static bool canonical_arg_types_match(const FuncType* decl, const FuncType* impl return true; } +static auto get_prototype(IDPtr id, FuncTypePtr t) + { + auto decl = id->GetType()->AsFuncType(); + auto prototype = func_type_check(decl, t.get()); + + if ( prototype ) + { + if ( decl->Flavor() == FUNC_FLAVOR_FUNCTION ) + { + // If a previous declaration of the function had + // &default params, automatically transfer any that + // are missing (convenience so that implementations + // don't need to specify the &default expression again). + transfer_arg_defaults(prototype->args.get(), t->Params().get()); + } + else + { + // Warn for trying to use &default parameters in + // hook/event handler body when it already has a + // declaration since only &default in the declaration + // has any effect. + const auto& args = t->Params(); + + for ( int i = 0; i < args->NumFields(); ++i ) + { + auto f = args->FieldDecl(i); + + if ( f->attrs && f->attrs->Find(ATTR_DEFAULT) ) + { + reporter->PushLocation(args->GetLocationInfo()); + reporter->Warning( + "&default on parameter '%s' has no effect (not a %s declaration)", + args->FieldName(i), t->FlavorString().data()); + reporter->PopLocation(); + } + } + } + + if ( prototype->deprecated ) + { + if ( prototype->deprecation_msg.empty() ) + t->Warn(util::fmt("use of deprecated '%s' prototype", id->Name()), + prototype->args.get(), true); + else + t->Warn(util::fmt("use of deprecated '%s' prototype: %s", + id->Name(), prototype->deprecation_msg.data()), + prototype->args.get(), true); + } + } + + else + { + // Allow renaming arguments, but only for the canonical + // prototypes of hooks/events. + if ( canonical_arg_types_match(decl, t.get()) ) + prototype = decl->Prototypes()[0]; + else + t->Error("use of undeclared alternate prototype", id.get()); + } + + return prototype; + } + +static bool check_params(int i, std::optional prototype, + const RecordTypePtr& args, + const RecordTypePtr& canon_args, + const char* module_name) + { + TypeDecl* arg_i; + bool hide = false; + + if ( prototype ) + { + auto it = prototype->offsets.find(i); + + if ( it == prototype->offsets.end() ) + { + // Alternate prototype hides this param + hide = true; + arg_i = canon_args->FieldDecl(i); + } + else + { + // Alternate prototype maps this param to another index + arg_i = args->FieldDecl(it->second); + } + } + else + { + if ( i < args->NumFields() ) + arg_i = args->FieldDecl(i); + else + return false; + } + + auto arg_id = lookup_ID(arg_i->id, module_name); + + if ( arg_id && ! arg_id->IsGlobal() ) + arg_id->Error("argument name used twice"); + + const char* local_name = arg_i->id; + + if ( hide ) + // Note the illegal '-' in hidden name implies we haven't + // clobbered any local variable names. + local_name = util::fmt("%s-hidden", local_name); + + arg_id = install_ID(local_name, module_name, false, false); + arg_id->SetType(arg_i->type); + + return true; + } + void begin_func(IDPtr id, const char* module_name, FunctionFlavor flavor, bool is_redef, FuncTypePtr t, @@ -506,63 +619,7 @@ void begin_func(IDPtr id, const char* module_name, std::optional prototype; if ( id->GetType() ) - { - auto decl = id->GetType()->AsFuncType(); - prototype = func_type_check(decl, t.get()); - - if ( prototype ) - { - if ( decl->Flavor() == FUNC_FLAVOR_FUNCTION ) - { - // If a previous declaration of the function had &default - // params, automatically transfer any that are missing - // (convenience so that implementations don't need to specify - // the &default expression again). - transfer_arg_defaults(prototype->args.get(), t->Params().get()); - } - else - { - // Warn for trying to use &default parameters in hook/event - // handler body when it already has a declaration since only - // &default in the declaration has any effect. - const auto& args = t->Params(); - - for ( int i = 0; i < args->NumFields(); ++i ) - { - auto f = args->FieldDecl(i); - - if ( f->attrs && f->attrs->Find(ATTR_DEFAULT) ) - { - reporter->PushLocation(args->GetLocationInfo()); - reporter->Warning( - "&default on parameter '%s' has no effect (not a %s declaration)", - args->FieldName(i), t->FlavorString().data()); - reporter->PopLocation(); - } - } - } - - if ( prototype->deprecated ) - { - if ( prototype->deprecation_msg.empty() ) - t->Warn(util::fmt("use of deprecated '%s' prototype", id->Name()), - prototype->args.get(), true); - else - t->Warn(util::fmt("use of deprecated '%s' prototype: %s", - id->Name(), prototype->deprecation_msg.data()), - prototype->args.get(), true); - } - } - else - { - // Allow renaming arguments, but only for the canonical - // prototypes of hooks/events. - if ( canonical_arg_types_match(decl, t.get()) ) - prototype = decl->Prototypes()[0]; - else - t->Error("use of undeclared alternate prototype", id.get()); - } - } + prototype = get_prototype(id, t); else if ( is_redef ) id->Error("redef of not-previously-declared value"); @@ -606,49 +663,8 @@ void begin_func(IDPtr id, const char* module_name, push_scope(std::move(id), std::move(attrs)); for ( int i = 0; i < canon_args->NumFields(); ++i ) - { - TypeDecl* arg_i; - bool hide = false; - - if ( prototype ) - { - auto it = prototype->offsets.find(i); - - if ( it == prototype->offsets.end() ) - { - // Alternate prototype hides this param - hide = true; - arg_i = canon_args->FieldDecl(i); - } - else - { - // Alternate prototype maps this param to another index - arg_i = args->FieldDecl(it->second); - } - } - else - { - if ( i < args->NumFields() ) - arg_i = args->FieldDecl(i); - else - break; - } - - auto arg_id = lookup_ID(arg_i->id, module_name); - - if ( arg_id && ! arg_id->IsGlobal() ) - arg_id->Error("argument name used twice"); - - const char* local_name = arg_i->id; - - if ( hide ) - // Note the illegal '-' in hidden name implies we haven't - // clobbered any local variable names. - local_name = util::fmt("%s-hidden", local_name); - - arg_id = install_ID(local_name, module_name, false, false); - arg_id->SetType(arg_i->type); - } + if ( ! check_params(i, prototype, args, canon_args, module_name) ) + break; if ( Attr* depr_attr = find_attr(current_scope()->Attrs().get(), ATTR_DEPRECATED) ) current_scope()->GetID()->MakeDeprecated(depr_attr->GetExpr()); @@ -665,7 +681,7 @@ public: TraversalCode PostExpr(const Expr*) override; std::vector scopes; - std::vector outer_id_references; + std::unordered_set outer_id_references; }; TraversalCode OuterIDBindingFinder::PreExpr(const Expr* expr) @@ -691,7 +707,7 @@ TraversalCode OuterIDBindingFinder::PreExpr(const Expr* expr) // not something we have to worry about also being at outer scope. return TC_CONTINUE; - outer_id_references.push_back(e); + outer_id_references.insert(e); return TC_CONTINUE; } @@ -754,17 +770,10 @@ IDPList gather_outer_ids(Scope* scope, Stmt* body) OuterIDBindingFinder cb(scope); body->Traverse(&cb); - IDPList idl ( cb.outer_id_references.size() ); + IDPList idl; - for ( size_t i = 0; i < cb.outer_id_references.size(); ++i ) - { - auto id = cb.outer_id_references[i]->Id(); - - if ( idl.is_member(id) ) - continue; - - idl.append(id); - } + for ( auto ne : cb.outer_id_references ) + idl.append(ne->Id()); return idl; } diff --git a/src/broker/Data.cc b/src/broker/Data.cc index ab65355ba4..e5f459b952 100644 --- a/src/broker/Data.cc +++ b/src/broker/Data.cc @@ -395,7 +395,7 @@ struct val_converter { if ( t->Tag() != TYPE_FUNC ) return nullptr; - if ( a.size() == 2 ) // We have a closure. + if ( a.size() == 2 ) // we have a closure/capture frame { auto frame = broker::get_if(a[1]); if ( ! frame ) @@ -405,8 +405,19 @@ struct val_converter { if ( ! b ) return nullptr; - if ( ! b->UpdateClosure(*frame) ) - return nullptr; + auto copy_semantics = b->GetType()->GetCaptures().has_value(); + + if ( copy_semantics ) + { + if ( ! b->DeserializeCaptures(*frame) ) + return nullptr; + } + else + { + // Support for deprecated serialization. + if ( ! b->UpdateClosure(*frame) ) + return nullptr; + } } return rval; diff --git a/src/parse.y b/src/parse.y index f5206b752e..8b1bcbfbfe 100644 --- a/src/parse.y +++ b/src/parse.y @@ -51,9 +51,9 @@ %left '$' '[' ']' '(' ')' TOK_HAS_FIELD TOK_HAS_ATTR %nonassoc TOK_AS TOK_IS -%type opt_no_test opt_no_test_block TOK_PATTERN_END +%type opt_no_test opt_no_test_block TOK_PATTERN_END opt_deep %type TOK_ID TOK_PATTERN_TEXT -%type local_id global_id def_global_id event_id global_or_event_id resolve_id begin_func case_type +%type local_id global_id def_global_id event_id global_or_event_id resolve_id begin_lambda case_type %type local_id_list case_type_list %type init_class %type opt_init @@ -72,6 +72,8 @@ %type case_list %type attr %type attr_list opt_attr +%type capture +%type capture_list opt_captures %{ #include @@ -254,6 +256,8 @@ static bool expr_is_table_type_name(const zeek::detail::Expr* expr) zeek::detail::Attr* attr; std::vector* attr_l; zeek::detail::AttrTag attrtag; + zeek::FuncType::Capture* capture; + std::vector* captures; } %% @@ -532,14 +536,10 @@ expr: $$ = new zeek::detail::FieldAssignExpr($2, {zeek::AdoptRef{}, $4}); } - | '$' TOK_ID func_params '=' + | '$' TOK_ID begin_lambda '=' { func_hdr_location = @1; - auto func_id = zeek::detail::current_scope()->GenerateTemporary("anonymous-function"); - func_id->SetInferReturnType(true); - zeek::detail::begin_func(std::move(func_id), zeek::detail::current_module.c_str(), - zeek::FUNC_FLAVOR_FUNCTION, false, - {zeek::AdoptRef{}, $3}); + $3->SetInferReturnType(true); } lambda_body { @@ -1214,7 +1214,7 @@ func_hdr: { zeek::IntrusivePtr id{zeek::AdoptRef{}, $2}; zeek::detail::begin_func(id, zeek::detail::current_module.c_str(), - zeek::FUNC_FLAVOR_FUNCTION, 0, {zeek::NewRef{}, $3}, + zeek::FUNC_FLAVOR_FUNCTION, false, {zeek::NewRef{}, $3}, std::unique_ptr>{$4}); $$ = $3; zeek::detail::zeekygen_mgr->Identifier(std::move(id)); @@ -1229,7 +1229,7 @@ func_hdr: } zeek::detail::begin_func({zeek::NewRef{}, $2}, zeek::detail::current_module.c_str(), - zeek::FUNC_FLAVOR_EVENT, 0, {zeek::NewRef{}, $3}, + zeek::FUNC_FLAVOR_EVENT, false, {zeek::NewRef{}, $3}, std::unique_ptr>{$4}); $$ = $3; } @@ -1238,14 +1238,14 @@ func_hdr: $3->ClearYieldType(zeek::FUNC_FLAVOR_HOOK); $3->SetYieldType(zeek::base_type(zeek::TYPE_BOOL)); zeek::detail::begin_func({zeek::NewRef{}, $2}, zeek::detail::current_module.c_str(), - zeek::FUNC_FLAVOR_HOOK, 0, {zeek::NewRef{}, $3}, + zeek::FUNC_FLAVOR_HOOK, false, {zeek::NewRef{}, $3}, std::unique_ptr>{$4}); $$ = $3; } | TOK_REDEF TOK_EVENT event_id func_params opt_attr { zeek::detail::begin_func({zeek::NewRef{}, $3}, zeek::detail::current_module.c_str(), - zeek::FUNC_FLAVOR_EVENT, 1, {zeek::NewRef{}, $4}, + zeek::FUNC_FLAVOR_EVENT, true, {zeek::NewRef{}, $4}, std::unique_ptr>{$5}); $$ = $4; } @@ -1288,11 +1288,14 @@ lambda_body: { zeek::detail::set_location(@1, @5); - // Code duplication here is sad but needed. end_func actually instantiates the function - // and associates it with an ID. We perform that association later and need to return - // a lambda expression. + // Code duplication here is sad but needed. + // end_func actually instantiates the function + // and associates it with an ID. We perform that + // association later and need to return a lambda + // expression. - // Gather the ingredients for a BroFunc from the current scope + // Gather the ingredients for a Func from the + // current scope. auto ingredients = std::make_unique( zeek::IntrusivePtr{zeek::NewRef{}, zeek::detail::current_scope()}, zeek::IntrusivePtr{zeek::AdoptRef{}, $3}); @@ -1303,19 +1306,88 @@ lambda_body: ; anonymous_function: - TOK_FUNCTION begin_func lambda_body + TOK_FUNCTION begin_lambda lambda_body { $$ = $3; } ; -begin_func: - func_params +begin_lambda: + opt_captures func_params { auto id = zeek::detail::current_scope()->GenerateTemporary("anonymous-function"); - zeek::detail::begin_func(id, zeek::detail::current_module.c_str(), zeek::FUNC_FLAVOR_FUNCTION, 0, {zeek::AdoptRef{}, $1}); + zeek::detail::begin_func(id, zeek::detail::current_module.c_str(), zeek::FUNC_FLAVOR_FUNCTION, false, {zeek::AdoptRef{}, $2}); + + std::optional captures; + + if ( $1 ) + { + captures = zeek::FuncType::CaptureList{}; + captures->reserve($1->size()); + + for ( auto c : *$1 ) + { + captures->emplace_back(*c); + delete c; + } + + delete $1; + } + + $2->SetCaptures(std::move(captures)); $$ = id.release(); } ; +opt_captures: + '[' capture_list ']' + { $$ = $2; } + | + { $$ = nullptr; } + ; + +capture_list: + capture_list ',' capture + { $1->push_back($3); } + | capture + { + $$ = new std::vector; + $$->push_back($1); + } + ; + +capture: + opt_deep TOK_ID + { + zeek::detail::set_location(@2); + auto id = zeek::detail::lookup_ID($2, + zeek::detail::current_module.c_str()); + + if ( ! id ) + zeek::reporter->Error("no such local identifier: %s", $2); + else if ( id->IsType() ) + { + zeek::reporter->Error("cannot specify type in capture: %s", $2); + id = nullptr; + } + else if ( id->IsGlobal() ) + { + zeek::reporter->Error("cannot specify global in capture: %s", $2); + id = nullptr; + } + + delete [] $2; + + $$ = new zeek::FuncType::Capture; + $$->id = id; + $$->deep_copy = $1; + } + ; + +opt_deep: TOK_COPY + { $$ = true; } + | + { $$ = false; } + ; + func_params: '(' formal_args ')' ':' type { $$ = new zeek::FuncType({zeek::AdoptRef{}, $2}, {zeek::AdoptRef{}, $5}, zeek::FUNC_FLAVOR_FUNCTION); } diff --git a/testing/btest/Baseline/language.closure-binding-errors/.stderr b/testing/btest/Baseline/language.closure-binding-errors/.stderr new file mode 100644 index 0000000000..1b82c68894 --- /dev/null +++ b/testing/btest/Baseline/language.closure-binding-errors/.stderr @@ -0,0 +1,10 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/closure-binding-errors.zeek, line 12: a is captured but not used inside lambda (function(){ print no a!}) +error in <...>/closure-binding-errors.zeek, line 13: no such local identifier: a2 +error in <...>/closure-binding-errors.zeek, line 14: b is used inside lambda but not captured (function(){ print b}) +error in <...>/closure-binding-errors.zeek, line 14: a is captured but not used inside lambda (function(){ print b}) +error in <...>/closure-binding-errors.zeek, line 15: a is captured but not used inside lambda (function(){ print b}) +error in <...>/closure-binding-errors.zeek, line 16: b listed multiple times in capture (function(){ print b}) +error in <...>/closure-binding-errors.zeek, line 18: cannot specify global in capture: c +error in <...>/closure-binding-errors.zeek, line 19: cannot specify type in capture: t +error in <...>/closure-binding-errors.zeek, line 9 and <...>/closure-binding-errors.zeek, line 20: already defined (a) diff --git a/testing/btest/Baseline/language.closure-binding-errors/out b/testing/btest/Baseline/language.closure-binding-errors/out new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/language.closure-binding-errors/out @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/language.closure-binding/out b/testing/btest/Baseline/language.closure-binding/out new file mode 100644 index 0000000000..891492d324 --- /dev/null +++ b/testing/btest/Baseline/language.closure-binding/out @@ -0,0 +1,46 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +4, 10 +6, 8 +7, 7 +4, 10 +5, 8 +6, 7 +4, 10 +5, 9 +6, 8 +4, 10 +5, 8 +6, 7 +4, 10 +5, 9 +6, 8 +4 +2, 10, 47 +4 +2, 8, 47 +3 +1, 7, 47 +4 +2, 10, 47 +5 +3, 8, 47 +6 +4, 7, 47 +4 +2, 10, 47 +5 +3, 9, 47 +6 +4, 8, 47 +4 +2, 10, 91 +5 +3, 9, 91 +6 +4, 9, 91 +4 +2, 10, 91 +5 +3, 10, 91 +6 +4, 10, 91 diff --git a/testing/btest/Baseline/language.closure-sending/recv.recv.out b/testing/btest/Baseline/language.closure-sending/recv.recv.out index 4760591c3c..d48a41b96d 100644 --- a/testing/btest/Baseline/language.closure-sending/recv.recv.out +++ b/testing/btest/Baseline/language.closure-sending/recv.recv.out @@ -2,26 +2,26 @@ hello :-) peer added receiver got ping: function 2 -inside: 1 | outside: 11 | global: 100 +inside: 1 | outside: 12 | global: 100 77 receiver got ping: function 1 begin: 100 | base_step: 2 begin: 100 | base_step: 2 | step: 76 178 receiver got ping: function 2 -inside: 3 | outside: 11 | global: 100 +inside: 3 | outside: 12 | global: 100 79 receiver got ping: function 1 begin: 100 | base_step: 4 begin: 100 | base_step: 4 | step: 76 180 receiver got ping: function 2 -inside: 5 | outside: 11 | global: 100 +inside: 5 | outside: 12 | global: 100 81 receiver got ping: function 1 begin: 100 | base_step: 6 begin: 100 | base_step: 6 | step: 76 182 receiver got ping: function 2 -inside: 7 | outside: 11 | global: 100 +inside: 7 | outside: 12 | global: 100 83 diff --git a/testing/btest/Baseline/language.closure-sending/send.send.out b/testing/btest/Baseline/language.closure-sending/send.send.out index 11c6f46d91..39d70d48d5 100644 --- a/testing/btest/Baseline/language.closure-sending/send.send.out +++ b/testing/btest/Baseline/language.closure-sending/send.send.out @@ -3,7 +3,7 @@ hello :) peer added begin: 100 | base_step: 50 sender got pong: function 2 -inside: 1 | outside: 11 | global: 10 +inside: 1 | outside: 12 | global: 10 77 begin: 100 | base_step: 50 sender got pong: function 1 @@ -12,7 +12,7 @@ begin: 178 | base_step: 2 | step: 76 256 begin: 100 | base_step: 50 sender got pong: function 2 -inside: 3 | outside: 11 | global: 10 +inside: 3 | outside: 12 | global: 10 79 begin: 100 | base_step: 50 sender got pong: function 1 @@ -21,7 +21,7 @@ begin: 180 | base_step: 4 | step: 76 260 begin: 100 | base_step: 50 sender got pong: function 2 -inside: 5 | outside: 11 | global: 10 +inside: 5 | outside: 12 | global: 10 81 begin: 100 | base_step: 50 sender got pong: function 1 diff --git a/testing/btest/Baseline/language.closure-sending2/recv.recv.out b/testing/btest/Baseline/language.closure-sending2/recv.recv.out new file mode 100644 index 0000000000..4760591c3c --- /dev/null +++ b/testing/btest/Baseline/language.closure-sending2/recv.recv.out @@ -0,0 +1,27 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +hello :-) +peer added +receiver got ping: function 2 +inside: 1 | outside: 11 | global: 100 +77 +receiver got ping: function 1 +begin: 100 | base_step: 2 +begin: 100 | base_step: 2 | step: 76 +178 +receiver got ping: function 2 +inside: 3 | outside: 11 | global: 100 +79 +receiver got ping: function 1 +begin: 100 | base_step: 4 +begin: 100 | base_step: 4 | step: 76 +180 +receiver got ping: function 2 +inside: 5 | outside: 11 | global: 100 +81 +receiver got ping: function 1 +begin: 100 | base_step: 6 +begin: 100 | base_step: 6 | step: 76 +182 +receiver got ping: function 2 +inside: 7 | outside: 11 | global: 100 +83 diff --git a/testing/btest/Baseline/language.closure-sending2/send.send.out b/testing/btest/Baseline/language.closure-sending2/send.send.out new file mode 100644 index 0000000000..1d644231bc --- /dev/null +++ b/testing/btest/Baseline/language.closure-sending2/send.send.out @@ -0,0 +1,32 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +hello :) +peer added +begin: 100 | base_step: 50 +sender got pong: function 2 +inside: 1 | outside: 11 | global: 10 +77 +begin: 100 | base_step: 50 +sender got pong: function 1 +begin: 100 | base_step: 2 +begin: 100 | base_step: 2 | step: 76 +178 +begin: 100 | base_step: 50 +sender got pong: function 2 +inside: 3 | outside: 11 | global: 10 +79 +begin: 100 | base_step: 50 +sender got pong: function 1 +begin: 100 | base_step: 4 +begin: 100 | base_step: 4 | step: 76 +180 +begin: 100 | base_step: 50 +sender got pong: function 2 +inside: 5 | outside: 11 | global: 10 +81 +begin: 100 | base_step: 50 +sender got pong: function 1 +begin: 100 | base_step: 6 +begin: 100 | base_step: 6 | step: 76 +182 +begin: 100 | base_step: 50 +peer lost diff --git a/testing/btest/Baseline/language.function-closures/out b/testing/btest/Baseline/language.function-closures/out index 648170033b..13869d6338 100644 --- a/testing/btest/Baseline/language.function-closures/out +++ b/testing/btest/Baseline/language.function-closures/out @@ -19,8 +19,8 @@ expect: 160 160 expect: 225 225 -expect: 290 -290 +expect: 225 +225 tables: diff --git a/testing/btest/Baseline/language.hook_calls/invalid.out b/testing/btest/Baseline/language.hook_calls/invalid.out index 2ce20fc128..762168bcef 100644 --- a/testing/btest/Baseline/language.hook_calls/invalid.out +++ b/testing/btest/Baseline/language.hook_calls/invalid.out @@ -1,11 +1,9 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. error in ./invalid.zeek, line 9: hook cannot be called directly, use hook operator (myhook) -warning in ./invalid.zeek, line 9: expression value ignored (myhook(3)) error in ./invalid.zeek, line 10: hook cannot be called directly, use hook operator (myhook) error in ./invalid.zeek, line 11: hook cannot be called directly, use hook operator (myhook) error in ./invalid.zeek, line 12: not a valid hook call expression (2 + 2) warning in ./invalid.zeek, line 12: expression value ignored (2 + 2) error in ./invalid.zeek, line 13: not a valid hook call expression (2 + 2) error in ./invalid.zeek, line 15: hook cannot be called directly, use hook operator (h) -warning in ./invalid.zeek, line 15: expression value ignored (h(3)) error in ./invalid.zeek, line 16: hook cannot be called directly, use hook operator (h) diff --git a/testing/btest/language/closure-binding-errors.zeek b/testing/btest/language/closure-binding-errors.zeek new file mode 100644 index 0000000000..694918dc7d --- /dev/null +++ b/testing/btest/language/closure-binding-errors.zeek @@ -0,0 +1,21 @@ +# @TEST-EXEC-FAIL: zeek -b %INPUT >out +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr + +global c: string; +type t: addr; + +event zeek_init() + { + local a = 3; + local b = "hi there"; + + local f1 = function[a]() { print "no a!"; }; + local f2 = function[a2](a2: addr) { print a2; }; + local f3 = function[a]() { print b; }; + local f4 = function[a, b]() { print b; }; + local f5 = function[b, b]() { print b; }; + local f6 = function() { print c; }; # should be okay + local f7 = function[c]() { print c; }; + local f8 = function[t]() { local t2: t; }; + local f9 = function[a]() { local a = 4; }; # error due to shadowing + } diff --git a/testing/btest/language/closure-binding.zeek b/testing/btest/language/closure-binding.zeek new file mode 100644 index 0000000000..6e1596b6f9 --- /dev/null +++ b/testing/btest/language/closure-binding.zeek @@ -0,0 +1,199 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +type mutable_aggregate: record { x: count; }; + +function reference_capture() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function() { print ++a, --b$x; }; + f(); + ++a; + --b$x; + f(); + + return f; + } + +function shallow_copy_capture() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[a, b]() { print ++a, --b$x; }; + f(); + ++a; + --b$x; + f(); + + return f; + } + +function deep_copy_capture() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[copy a, copy b]() { print ++a, --b$x; }; + f(); + ++a; + --b$x; + f(); + + return f; + } + +function mixed_copy_capture_a() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[copy a, b]() { print ++a, --b$x; }; + f(); + ++a; + --b$x; + f(); + + return f; + } + +function mixed_copy_capture_b() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[a, copy b]() { print ++a, --b$x; }; + f(); + ++a; + --b$x; + f(); + + return f; + } + +function reference_capture_double() : function() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function() : function() { + local c = mutable_aggregate($x=88); + print ++a; + local f2 = function() { print a -= 2, --b$x, c$x += 3; }; + c$x = c$x / 2; + return f2; + }; + f()(); + ++a; + --b$x; + f()(); + + return f; + } + +function shallow_copy_capture_double() : function() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[a,b]() : function() { + local c = mutable_aggregate($x=88); + print ++a; + local f2 = function[a, b, c]() { print a -= 2, --b$x, c$x += 3; }; + c$x = c$x / 2; + return f2; + }; + f()(); + ++a; + --b$x; + f()(); + + return f; + } + +function deep_copy1_capture_double() : function() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[copy a, copy b]() : function() { + local c = mutable_aggregate($x=88); + print ++a; + local f2 = function[a, b, c]() { print a -= 2, --b$x, c$x += 3; }; + c$x = c$x / 2; + return f2; + }; + f()(); + ++a; + --b$x; + f()(); + + return f; + } + +function deep_copy2_capture_double() : function() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[a, b]() : function() { + local c = mutable_aggregate($x=88); + print ++a; + local f2 = function[copy a, copy b, copy c]() + { print a -= 2, --b$x, c$x += 3; }; + c$x = c$x / 2; + return f2; + }; + f()(); + ++a; + --b$x; + f()(); + + return f; + } + +function deep_copy3_capture_double() : function() : function() + { + local a = 3; + local b = mutable_aggregate($x=11); + local f = function[copy a, copy b]() : function() { + local c = mutable_aggregate($x=88); + print ++a; + local f2 = function[copy a, copy b, copy c]() + { print a -= 2, --b$x, c$x += 3; }; + c$x = c$x / 2; + return f2; + }; + f()(); + ++a; + --b$x; + f()(); + + return f; + } + +event zeek_init() + { + local rc = reference_capture(); + rc(); + + local scc = shallow_copy_capture(); + scc(); + + local dcc = deep_copy_capture(); + dcc(); + + local mcca = mixed_copy_capture_a(); + mcca(); + + local mccb = mixed_copy_capture_b(); + mccb(); + + local rc2 = reference_capture_double(); + rc2()(); + + local scc2 = shallow_copy_capture_double(); + scc2()(); + + local dcc2_1 = deep_copy1_capture_double(); + dcc2_1()(); + + local dcc2_2 = deep_copy2_capture_double(); + dcc2_2()(); + + local dcc2_3 = deep_copy3_capture_double(); + dcc2_3()(); + } diff --git a/testing/btest/language/closure-sending-naming.zeek b/testing/btest/language/closure-sending-naming.zeek index 4210c758cb..27bc367d6d 100644 --- a/testing/btest/language/closure-sending-naming.zeek +++ b/testing/btest/language/closure-sending-naming.zeek @@ -33,7 +33,7 @@ function send_event() local log : myfunctype = function(c: count) : function(d: count) : count { # print fmt("inside: %s | outside: %s | global: %s", c, event_count, global_with_same_name); - return function(d: count) : count { return d + c; }; + return function[c](d: count) : count { return d + c; }; }; local e2 = Broker::make_event(ping, "function 1", log); @@ -85,7 +85,7 @@ function my_funcs() local l : myfunctype = function(c: count) : function(d: count) : count { print fmt("dogs"); - return function(d: count) : count { return d + c; }; + return function[c](d: count) : count { return d + c; }; }; } diff --git a/testing/btest/language/closure-sending.zeek b/testing/btest/language/closure-sending.zeek index c6ff25464c..f5a3e44035 100644 --- a/testing/btest/language/closure-sending.zeek +++ b/testing/btest/language/closure-sending.zeek @@ -54,6 +54,7 @@ function send_event() local stepper = l(50); ++n; + ++event_count; if ( n % 2 == 0) { local e2 = Broker::make_event(ping, "function 1", l); diff --git a/testing/btest/language/closure-sending2.zeek b/testing/btest/language/closure-sending2.zeek new file mode 100644 index 0000000000..c6a421a708 --- /dev/null +++ b/testing/btest/language/closure-sending2.zeek @@ -0,0 +1,167 @@ +# @TEST-PORT: BROKER_PORT +# +# @TEST-EXEC: btest-bg-run recv "zeek -B broker -b ../recv.zeek >recv.out" +# @TEST-EXEC: btest-bg-run send "zeek -B broker -b ../send.zeek >send.out" +# +# @TEST-EXEC: btest-bg-wait 45 +# @TEST-EXEC: btest-diff recv/recv.out +# @TEST-EXEC: btest-diff send/send.out + +@TEST-START-FILE send.zeek + +redef exit_only_after_terminate = T; +type myfunctype: function(c: count) : function(d: count) : count; + +global global_with_same_name = 10; + +global ping: event(msg: string, f: myfunctype); + +event zeek_init() + { + print "hello :)"; + Broker::subscribe("zeek/event/my_topic"); + Broker::peer("127.0.0.1", to_port(getenv("BROKER_PORT"))); + } + +global n = 0; + +function send_event() + { + # in this frame event_count has an offset of three. + # in the receiving frame it has an offset of one. + # this tests to ensure that id lookups are being routed properly. + local dog = 0; + local not_dog = 1; + local event_count = 11; + + local log : myfunctype = function[event_count](c: count) : function(d: count) : count + { + print fmt("inside: %s | outside: %s | global: %s", c, event_count, global_with_same_name); + return function[c](d: count) : count { return d + c; }; + }; + + local two_part_adder_maker = function (begin : count) : function (base_step : count) : function ( step : count) : count + { + return function [begin](base_step : count) : function (step : count) : count + { + print fmt("begin: %s | base_step: %s", begin, base_step); + return function[begin, base_step] (step : count) : count + { + print fmt("begin: %s | base_step: %s | step: %s", begin, base_step, step); + return (begin += base_step + step); }; }; }; + + local l = two_part_adder_maker(100); + local stepper = l(50); + + ++n; + ++event_count; + if ( n % 2 == 0) + { + local e2 = Broker::make_event(ping, "function 1", l); + Broker::publish("zeek/event/my_topic", e2); + } + else + { + local e = Broker::make_event(ping, "function 2", log); + Broker::publish("zeek/event/my_topic", e); + } + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + print "peer added"; + send_event(); + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + print "peer lost"; + terminate(); + } + +event pong(msg: string, f: myfunctype) + { + print fmt("sender got pong: %s", msg); + local adder = f(n); + print adder(76); + send_event(); + } + +@TEST-END-FILE + +@TEST-START-FILE recv.zeek + +redef exit_only_after_terminate = T; +const events_to_recv = 7; +type myfunctype: function(c: count) : function(d: count) : count; +# type myfunctype: function(c: count); + +global global_with_same_name = 100; + +global pong: event(msg: string, f: myfunctype); + +# This is one, of many, ways to declare your functions that you plan to receive. +# All you are doing is giving the parser a version of their body, so they can be +# anywhere. This seems to work quite nicely because it keeps them scoped and stops +# them from ever being evaluated. +function my_funcs() + { + return; + + local begin = 100; + local event_count = begin; + + local l : myfunctype = function[event_count](c: count) : function(d: count) : count + { + print fmt("inside: %s | outside: %s | global: %s", c, event_count, global_with_same_name); + return function[c](d: count) : count { return d + c; }; + }; + + local dog_fish = function [begin](base_step : count) : function (step : count) : count + { +# actual formatting doesn't matter for name resolution. +print fmt("begin: %s | base_step: %s", begin, base_step); + return function [begin, base_step](step : count) : count + { + print fmt("begin: %s | base_step: %s | step: %s", begin, base_step, step); + return (begin += base_step + step); }; }; + } + +event zeek_init() + { + print "hello :-)"; + Broker::subscribe("zeek/event/my_topic"); + Broker::listen("127.0.0.1", to_port(getenv("BROKER_PORT"))); + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + print "peer added"; + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + print "peer lost"; + } + +global n = 0; + +event ping(msg: string, f: myfunctype) + { + print fmt("receiver got ping: %s", msg); + ++n; + local adder = f(n); + print adder(76); + + if ( n == events_to_recv ) + { + terminate(); + } + else + { + local e = Broker::make_event(pong, msg, f); + Broker::publish("zeek/event/my_topic", e); + } + } + +@TEST-END-FILE diff --git a/testing/btest/language/function-closures.zeek b/testing/btest/language/function-closures.zeek index 80112da0cf..c4e4505481 100644 --- a/testing/btest/language/function-closures.zeek +++ b/testing/btest/language/function-closures.zeek @@ -3,9 +3,11 @@ global numberone : count = 1; +type mutable_aggregate: record { x: count; }; + function make_count_upper (start : count) : function(step : count) : count { - return function(step : count) : count + return function[start](step : count) : count { return (start += (step + numberone)); }; } @@ -14,7 +16,7 @@ function dog_maker(name: string, weight: count) : function (action: string) local eat = function (lbs: count) { print fmt("eat i weigh %s", lbs); }; local bark = function (who: string) { print fmt("bark i am %s", who); }; - local dog = function (action: string) + local dog = function [eat, bark, name, weight](action: string) { switch action { @@ -72,14 +74,14 @@ event zeek_init() print c(1) == two(3); # a little more complicated ... - local cat_dog = 100; - local add_n_and_m = function(n: count) : function(m : count) : function(o : count) : count + local cat_dog = mutable_aggregate($x=100); + local add_n_and_m = function[cat_dog](n: count) : function(m : count) : function(o : count) : count { - cat_dog += 1; + cat_dog$x += 1; local can_we_make_variables_inside = 11; - return function(m : count) : function(o : count) : count - { return function(o : count) : count - { return n + m + o + cat_dog + can_we_make_variables_inside; }; }; + return function[can_we_make_variables_inside, cat_dog, n](m : count) : function(o : count) : count + { return function[cat_dog, can_we_make_variables_inside, m, n](o : count) : count + { return n + m + o + cat_dog$x + can_we_make_variables_inside; }; }; }; local add_m = add_n_and_m(2); @@ -95,14 +97,14 @@ event zeek_init() # can mutate closure: print "expect: 101"; - print cat_dog; + print cat_dog$x; # complicated - has state across calls local two_part_adder_maker = function (begin : count) : function (base_step : count) : function ( step : count) : count { - return function (base_step : count) : function (step : count) : count + return function [begin](base_step : count) : function (step : count) : count { - return function (step : count) : count + return function [base_step, begin](step : count) : count { return (begin += base_step + step); }; }; }; @@ -115,10 +117,17 @@ event zeek_init() print stepper(15); # another copy check - print "expect: 290"; + # + # under old reference capture semantics, this would print 290 because + # the twotwofive copy wouldn't have a copy of the "begin" variable but + # instead a reference to it; under copy capture semantics, though, + # those are separate values, so executing stepper() after the copy + # won't affect the copy + # + print "expect: 225"; print twotwofive(15); - local hamster : count = 3; + local hamster = mutable_aggregate($x=3); print ""; print "tables:"; @@ -128,10 +137,10 @@ event zeek_init() [1] = "symmetric active", [2] = "symmetric passive", [3] = "client", - } &default = function(i: count):string { return fmt("unknown-%d. outside-%d", i, hamster += 1); } &redef; + } &default = function[hamster](i: count):string { return fmt("unknown-%d. outside-%d", i, hamster$x += 1); } &redef; # changing the value here will show in the function. - hamster += hamster; + hamster$x += hamster$x; print "expect: unknown-11. outside-7"; print modes[11]; @@ -156,7 +165,7 @@ event zeek_init() [1] = "symmetric active", [2] = "symmetric passive", [3] = "client" - )&default = function(i: count):string { return fmt("unknown-%d. outside-%d", i, hamster_also += 1); } &redef; + )&default = function[hamster_also](i: count):string { return fmt("unknown-%d. outside-%d", i, hamster_also += 1); } &redef; print "expect: unknown-11. outside-4"; print modes_also[11]; diff --git a/testing/btest/language/lambda-escaping.zeek b/testing/btest/language/lambda-escaping.zeek index 7552d39cb2..8e2deb1af6 100644 --- a/testing/btest/language/lambda-escaping.zeek +++ b/testing/btest/language/lambda-escaping.zeek @@ -13,7 +13,7 @@ event zeek_init() &priority=+10 { local outer = 101; - local lambda = function() + local lambda = function[outer]() { print outer; }; lambda(); diff --git a/testing/btest/language/lambda-nested-copy.zeek b/testing/btest/language/lambda-nested-copy.zeek index 6cf0e6952a..ae4578fdcd 100644 --- a/testing/btest/language/lambda-nested-copy.zeek +++ b/testing/btest/language/lambda-nested-copy.zeek @@ -3,9 +3,9 @@ local outer = 100; -local lambda = function() +local lambda = function[outer]() { - local inner = function(a: count, b: count, c: count, d: count, e: count, f: count) + local inner = function[outer](a: count, b: count, c: count, d: count, e: count, f: count) { print outer + f; }; diff --git a/testing/btest/language/lambda-record-field.zeek b/testing/btest/language/lambda-record-field.zeek index bcb7e1bc75..fb63dbaab8 100644 --- a/testing/btest/language/lambda-record-field.zeek +++ b/testing/btest/language/lambda-record-field.zeek @@ -8,6 +8,6 @@ type myrec: record { event zeek_init() { local w = "world"; - local mr = myrec($foo(a: string) = { print a + w; }); + local mr = myrec($foo[w](a: string) = { print a + w; }); mr$foo("hello"); } diff --git a/testing/btest/language/lambda-zeek-init.zeek b/testing/btest/language/lambda-zeek-init.zeek index 2576831c22..cc596b431b 100644 --- a/testing/btest/language/lambda-zeek-init.zeek +++ b/testing/btest/language/lambda-zeek-init.zeek @@ -5,7 +5,7 @@ event zeek_init() &priority=+10 { local outer = 101; - local lambda = function() + local lambda = function[outer]() { print outer + 2; }; lambda(); diff --git a/testing/btest/language/more-closure-tests.zeek b/testing/btest/language/more-closure-tests.zeek index f8a01cd0a8..2f6d6fbd91 100644 --- a/testing/btest/language/more-closure-tests.zeek +++ b/testing/btest/language/more-closure-tests.zeek @@ -17,7 +17,7 @@ function map_1 (f: function(a: count): count, v: vector of count) : vector of co # stacks two functions function stacker (one : function(a: count): count, two: function (b: count): count): function(c: count): count { - return function (c: count): count + return function [one,two](c: count): count { return one(two(c)); }; @@ -25,7 +25,7 @@ function stacker (one : function(a: count): count, two: function (b: count): cou function make_dog(name: string, weight: count) : function(i: string, item: string) { - return function(i: string, item: string) + return function[name, weight](i: string, item: string) { switch i { @@ -69,7 +69,7 @@ event zeek_init() local make_laster = function(start: count) : function(i: count): count { - return function(i: count): count + return function[start](i: count): count { local temp = i; i += start; @@ -111,7 +111,7 @@ event zeek_init() local vs = vector("dog", "cat", "fish"); for (i in vs) { - mfs += function() { print i, vs[i]; }; + mfs += function[i, vs]() { print i, vs[i]; }; } for ( i in mfs) mfs[i](); diff --git a/testing/btest/language/outer_param_binding.zeek b/testing/btest/language/outer_param_binding.zeek index 1f3f32a1fc..f3a4686f24 100644 --- a/testing/btest/language/outer_param_binding.zeek +++ b/testing/btest/language/outer_param_binding.zeek @@ -9,7 +9,7 @@ function bar(b: string, c: string) { local f: Foo; local d = 8; - f = [$x=function(a: string) : string + f = [$x=function[b, c, d](a: string) : string { local x = 0; # Fail here: we've captured the closure.