diff --git a/CHANGES b/CHANGES index 206a804ab7..f8ca072f83 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,24 @@ +2.5-710 | 2018-06-26 18:06:22 -0500 + + * Add memory leak unit test for pattern operations (Corelight) + + * fixed 3 leaks in creating pattern values (Vern Paxson) + + * add & and | operators for patterns (Vern Paxson) + + * deprecate merge_patterns() (Vern Paxson) + + * deprecate boolean scalar+vector operations (Vern Paxson) + + * deprecate mixing scalars and vectors (Vern Paxson) + + * deprecate && / || operators for patterns (Vern Paxson) + +2.5-690 | 2018-06-26 15:05:23 -0500 + + * Fix deprecated actor_system_config field usages (Corelight) + 2.5-689 | 2018-06-26 11:45:52 -0500 * Remove header self-inclusions (Corelight) diff --git a/NEWS b/NEWS index 93a28cb200..a4f233a116 100644 --- a/NEWS +++ b/NEWS @@ -16,7 +16,7 @@ New Functionality to the version in 2.5), and much of its implementation has been redone. There's a new script-level "broker" framework that supersedes the old "communication" framework, which is now - depracated. The "cluster" and "control" frameworks have been ported + deprecated. The "cluster" and "control" frameworks have been ported to Broker; same for BroControl. For more about the new Broker framework, see doc/frameworks/broker.rst (there's also guide there for porting existing Bro scripts to Broker). For more about Broker @@ -249,10 +249,16 @@ New Functionality '^' are binary "and", "or" and "xor" operators, and '~' is a unary ones-complement operator. +- The '&' and '|' operators can apply to patterns, too. p1 & p2 yields + a pattern that represents matching p1 followed by p2, and p1 | p2 yields + a pattern representing matching p1 or p2. The p1 | p2 functionality was + semi-present in previous versions of Bro, but required constants as + its operands; now you can use any pattern-valued expressions. + Changed Functionality --------------------- - - ALl communication is now handled through Broker, requiring changes + - All communication is now handled through Broker, requiring changes to existing scripts to port them over to the new API. The Broker framework documentation comes with a porting guide. @@ -356,6 +362,16 @@ Deprecated Functionality removal with the next Bro release. Bro's new configuration framework is taking its place. +- Mixing of scalars and vectors, such as "v + e" yielding a vector + corresponding to the vector v with the scalar e added to each of + its elements, has been deprecated. + +- The built-in function merge_pattern() has been deprecated. It will + be replaced by the '&' operator for patterns. + +- The undocumented feature of using "&&" and "||" operators for patterns + has been deprecated. + Bro 2.5.1 ========= diff --git a/VERSION b/VERSION index b34c5c7be1..ca1bc4e12b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5-689 +2.5-710 diff --git a/aux/broker b/aux/broker index 17fcb73a30..6d5b3c785c 160000 --- a/aux/broker +++ b/aux/broker @@ -1 +1 @@ -Subproject commit 17fcb73a30388862f0a921040a605ac38f47ff74 +Subproject commit 6d5b3c785c3c1e517c4c7596beda23e411edeff2 diff --git a/doc/script-reference/types.rst b/doc/script-reference/types.rst index de5fa689f9..8fb717a74d 100644 --- a/doc/script-reference/types.rst +++ b/doc/script-reference/types.rst @@ -237,13 +237,19 @@ Here is a more detailed description of each type: is false since "oob" does not appear at the start of "foobar". The ``!in`` operator would yield the negation of ``in``. - Finally, you can create a disjunction (either-or) of two literal patterns + You can create a disjunction (either-or) of two patterns using the ``|`` operator. For example:: /foo/ | /bar/ in "foobar" - yields true, like in the similar example above. (This does not presently - work for variables whose values are patterns, however.) + yields true, like in the similar example above. You can also + create the conjunction (concatenation) of patterns using the ``&`` + operator. For example: + + /foo/ & /bar/ in "foobar" + + will yield true because the pattern /(foo)(bar)/ appears in + the string "foobar". .. bro:type:: port diff --git a/src/3rdparty b/src/3rdparty index c78abc8454..0b10bb2943 160000 --- a/src/3rdparty +++ b/src/3rdparty @@ -1 +1 @@ -Subproject commit c78abc8454932019f030045340348560a8ac9b23 +Subproject commit 0b10bb2943beaf972ddc494ba34699ed37ef3dbf diff --git a/src/Expr.cc b/src/Expr.cc index 1ab82853c3..4c3c7a536a 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -663,6 +663,9 @@ Val* BinaryExpr::Fold(Val* v1, Val* v2) const if ( it == TYPE_INTERNAL_STRING ) return StringFold(v1, v2); + if ( v1->Type()->Tag() == TYPE_PATTERN ) + return PatternFold(v1, v2); + if ( it == TYPE_INTERNAL_ADDR ) return AddrFold(v1, v2); @@ -849,6 +852,21 @@ Val* BinaryExpr::StringFold(Val* v1, Val* v2) const return new Val(result, TYPE_BOOL); } +Val* BinaryExpr::PatternFold(Val* v1, Val* v2) const + { + const RE_Matcher* re1 = v1->AsPattern(); + const RE_Matcher* re2 = v2->AsPattern(); + + if ( tag != EXPR_AND && tag != EXPR_OR ) + BadTag("BinaryExpr::PatternFold"); + + RE_Matcher* res = tag == EXPR_AND ? + RE_Matcher_conjunction(re1, re2) : + RE_Matcher_disjunction(re1, re2); + + return new PatternVal(res); + } + Val* BinaryExpr::AddrFold(Val* v1, Val* v2) const { IPAddr a1 = v1->AsAddr(); @@ -909,11 +927,17 @@ void BinaryExpr::PromoteOps(TypeTag t) TypeTag bt1 = op1->Type()->Tag(); TypeTag bt2 = op2->Type()->Tag(); - if ( IsVector(bt1) ) + bool is_vec1 = IsVector(bt1); + bool is_vec2 = IsVector(bt2); + + if ( is_vec1 ) bt1 = op1->Type()->AsVectorType()->YieldType()->Tag(); - if ( IsVector(bt2) ) + if ( is_vec2 ) bt2 = op2->Type()->AsVectorType()->YieldType()->Tag(); + if ( (is_vec1 || is_vec2) && ! (is_vec1 && is_vec2) ) + reporter->Warning("mixing vector and scalar operands is deprecated"); + if ( bt1 != t ) op1 = new ArithCoerceExpr(op1, t); if ( bt2 != t ) @@ -1003,7 +1027,10 @@ IncrExpr::IncrExpr(BroExprTag arg_tag, Expr* arg_op) if ( ! IsIntegral(t->AsVectorType()->YieldType()->Tag()) ) ExprError("vector elements must be integral for increment operator"); else + { + reporter->Warning("increment/decrement operations for vectors deprecated"); SetType(t->Ref()); + } } else { @@ -1689,13 +1716,20 @@ BoolExpr::BoolExpr(BroExprTag arg_tag, Expr* arg_op1, Expr* arg_op2) if ( BothBool(bt1, bt2) ) { if ( is_vector(op1) || is_vector(op2) ) + { + if ( ! (is_vector(op1) && is_vector(op2)) ) + reporter->Warning("mixing vector and scalar operands is deprecated"); SetType(new VectorType(base_type(TYPE_BOOL))); + } else SetType(base_type(TYPE_BOOL)); } else if ( bt1 == TYPE_PATTERN && bt2 == bt1 ) + { + reporter->Warning("&& and || operators deprecated for pattern operands"); SetType(base_type(TYPE_PATTERN)); + } else ExprError("requires boolean operands"); @@ -1706,22 +1740,6 @@ Val* BoolExpr::DoSingleEval(Frame* f, Val* v1, Expr* op2) const if ( ! v1 ) return 0; - if ( Type()->Tag() == TYPE_PATTERN ) - { - Val* v2 = op2->Eval(f); - if ( ! v2 ) - return 0; - - RE_Matcher* re1 = v1->AsPattern(); - RE_Matcher* re2 = v2->AsPattern(); - - RE_Matcher* res = tag == EXPR_AND_AND ? - RE_Matcher_conjunction(re1, re2) : - RE_Matcher_disjunction(re1, re2); - - return new PatternVal(res); - } - if ( tag == EXPR_AND_AND ) { if ( v1->IsZero() ) @@ -1786,7 +1804,7 @@ Val* BoolExpr::Eval(Frame* f) const VectorVal* result = 0; - // It's either and EXPR_AND_AND or an EXPR_OR_OR. + // It's either an EXPR_AND_AND or an EXPR_OR_OR. bool is_and = (tag == EXPR_AND_AND); if ( scalar_v->IsZero() == is_and ) @@ -1883,6 +1901,16 @@ BitExpr::BitExpr(BroExprTag arg_tag, Expr* arg_op1, Expr* arg_op2) SetType(base_type(TYPE_COUNT)); } + else if ( bt1 == TYPE_PATTERN ) + { + if ( bt2 != TYPE_PATTERN ) + ExprError("cannot mix pattern and non-pattern operands"); + else if ( tag == EXPR_XOR ) + ExprError("'^' operator does not apply to patterns"); + else + SetType(base_type(TYPE_PATTERN)); + } + else ExprError("requires \"count\" operands"); } diff --git a/src/Expr.h b/src/Expr.h index 9fc9aa15ed..c21547d91c 100644 --- a/src/Expr.h +++ b/src/Expr.h @@ -329,6 +329,9 @@ protected: // Same for when the constants are strings. virtual Val* StringFold(Val* v1, Val* v2) const; + // Same for when the constants are patterns. + virtual Val* PatternFold(Val* v1, Val* v2) const; + // Same for when the constants are addresses or subnets. virtual Val* AddrFold(Val* v1, Val* v2) const; virtual Val* SubNetFold(Val* v1, Val* v2) const; diff --git a/src/NFA.cc b/src/NFA.cc index 8fb78a7131..c53aa4304b 100644 --- a/src/NFA.cc +++ b/src/NFA.cc @@ -12,6 +12,7 @@ NFA_State::NFA_State(int arg_sym, EquivClass* ec) sym = arg_sym; ccl = 0; accept = NO_ACCEPT; + first_trans_is_back_ref = false; mark = 0; epsclosure = 0; id = ++nfa_state_id; @@ -33,6 +34,7 @@ NFA_State::NFA_State(CCL* arg_ccl) sym = SYM_CCL; ccl = arg_ccl; accept = NO_ACCEPT; + first_trans_is_back_ref = false; mark = 0; id = ++nfa_state_id; epsclosure = 0; @@ -41,7 +43,8 @@ NFA_State::NFA_State(CCL* arg_ccl) NFA_State::~NFA_State() { for ( int i = 0; i < xtions.length(); ++i ) - Unref(xtions[i]); + if ( i > 0 || ! first_trans_is_back_ref ) + Unref(xtions[i]); delete epsclosure; } @@ -247,7 +250,10 @@ void NFA_Machine::MakePositiveClosure() { AppendEpsilon(); final_state->AddXtion(first_state); - Ref(first_state); + + // Don't Ref the state the final epsilon points to, otherwise we'll + // have reference cycles that lead to leaks. + final_state->SetFirstTransIsBackRef(); } void NFA_Machine::MakeRepl(int lower, int upper) @@ -307,6 +313,13 @@ NFA_Machine* make_alternate(NFA_Machine* m1, NFA_Machine* m2) m2->AppendState(last); Ref(last); + // Keep these around. + Ref(m1->FirstState()); + Ref(m2->FirstState()); + + Unref(m1); + Unref(m2); + return new NFA_Machine(first, last); } diff --git a/src/NFA.h b/src/NFA.h index 560f0930b4..79c3961dd5 100644 --- a/src/NFA.h +++ b/src/NFA.h @@ -46,6 +46,8 @@ public: NFA_State* Mark() const { return mark; } void ClearMarks(); + void SetFirstTransIsBackRef() { first_trans_is_back_ref = true; } + int TransSym() const { return sym; } CCL* TransCCL() const { return ccl; } int ID() const { return id; } @@ -62,7 +64,13 @@ protected: int sym; // if SYM_CCL, then use ccl CCL* ccl; // if nil, then use sym int accept; + + // Whether the first transition points backwards. Used + // to avoid reference-counting loops. + bool first_trans_is_back_ref; + int id; // number that uniquely identifies this state + NFA_state_list xtions; NFA_state_list* epsclosure; NFA_State* mark; diff --git a/src/RE.cc b/src/RE.cc index a4294e064d..4d26ce2423 100644 --- a/src/RE.cc +++ b/src/RE.cc @@ -19,6 +19,7 @@ int case_insensitive = 0; extern int RE_parse(void); extern void RE_set_input(const char* str); +extern void RE_done_with_scan(); Specific_RE_Matcher::Specific_RE_Matcher(match_type arg_mt, int arg_multiline) : equiv_class(NUM_SYM) @@ -108,9 +109,15 @@ int Specific_RE_Matcher::Compile(int lazy) rem = this; RE_set_input(pattern_text); - if ( RE_parse() ) + + int parse_status = RE_parse(); + RE_done_with_scan(); + + if ( parse_status ) { reporter->Error("error compiling pattern /%s/", pattern_text); + Unref(nfa); + nfa = 0; return 0; } @@ -139,9 +146,18 @@ int Specific_RE_Matcher::CompileSet(const string_list& set, const int_list& idx) loop_over_list(set, i) { RE_set_input(set[i]); - if ( RE_parse() ) + int parse_status = RE_parse(); + RE_done_with_scan(); + + if ( parse_status ) { reporter->Error("error compiling pattern /%s/", set[i]); + + if ( set_nfa != nfa ) + Unref(set_nfa); + Unref(nfa); + nfa = 0; + return 0; } diff --git a/src/Val.cc b/src/Val.cc index acf0b6f915..36269ad9c9 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -3228,7 +3228,7 @@ bool VectorVal::AssignRepeat(unsigned int index, unsigned int how_many, ResizeAtLeast(index + how_many); for ( unsigned int i = index; i < index + how_many; ++i ) - if ( ! Assign(i, element ) ) + if ( ! Assign(i, element->Ref() ) ) return false; return true; diff --git a/src/bro.bif b/src/bro.bif index 652b8cdd07..27020b2601 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -2958,7 +2958,7 @@ function uuid_to_string%(uuid: string%): string ## ## This function must be called at Bro startup time, e.g., in the event ## :bro:id:`bro_init`. -function merge_pattern%(p1: pattern, p2: pattern%): pattern +function merge_pattern%(p1: pattern, p2: pattern%): pattern &deprecated %{ if ( bro_start_network_time != 0.0 ) { diff --git a/src/broker/Manager.cc b/src/broker/Manager.cc index f31537f3b7..6f5e09e611 100644 --- a/src/broker/Manager.cc +++ b/src/broker/Manager.cc @@ -185,7 +185,7 @@ void Manager::InitPostScript() auto max_sleep = get_option("Broker::max_sleep")->AsCount(); if ( max_threads ) - config.scheduler_max_threads = max_threads; + config.set("scheduler.max-threads", max_threads); else { // On high-core-count systems, spawning one thread per core @@ -193,7 +193,7 @@ void Manager::InitPostScript() // threads are under-utilized. Related: // https://github.com/actor-framework/actor-framework/issues/699 if ( reading_pcaps ) - config.scheduler_max_threads = 2u; + config.set("scheduler.max-threads", 2u); else { auto hc = std::thread::hardware_concurrency(); @@ -203,18 +203,18 @@ void Manager::InitPostScript() else if ( hc < 4u) hc = 4u; - config.scheduler_max_threads = hc; + config.set("scheduler.max-threads", hc); } } if ( max_sleep ) - config.work_stealing_relaxed_sleep_duration_us = max_sleep; + config.set("work-stealing.relaxed-sleep-duration-us", max_sleep); else // 64ms is just an arbitrary amount derived from testing // the overhead of a unused CAF actor system on a 32-core system. // Performance was within 2% of baseline timings (w/o CAF) // when using this sleep duration. - config.work_stealing_relaxed_sleep_duration_us = 64000; + config.set("work-stealing.relaxed-sleep-duration-us", 64000); bstate = std::make_shared(std::move(config)); } diff --git a/src/parse.y b/src/parse.y index 8145b66809..34d6f31373 100644 --- a/src/parse.y +++ b/src/parse.y @@ -5,7 +5,7 @@ // Switching parser table type fixes ambiguity problems. %define lr.type ielr -%expect 142 +%expect 141 %token TOK_ADD TOK_ADD_TO TOK_ADDR TOK_ANY %token TOK_ATENDIF TOK_ATELSE TOK_ATIF TOK_ATIFDEF TOK_ATIFNDEF @@ -53,13 +53,12 @@ %nonassoc TOK_AS TOK_IS %type opt_no_test opt_no_test_block opt_deprecated -%type TOK_ID TOK_PATTERN_TEXT single_pattern +%type TOK_ID TOK_PATTERN_TEXT %type local_id global_id def_global_id event_id global_or_event_id resolve_id begin_func case_type %type local_id_list case_type_list %type init_class %type opt_init %type TOK_CONSTANT -%type pattern %type expr opt_expr init anonymous_function %type event %type stmt stmt_list func_body for_head @@ -724,11 +723,15 @@ expr: $$ = new ConstExpr($1); } - | pattern + | '/' { begin_RE(); } TOK_PATTERN_TEXT { end_RE(); } '/' { - set_location(@1); - $1->Compile(); - $$ = new ConstExpr(new PatternVal($1)); + set_location(@3); + + RE_Matcher* re = new RE_Matcher($3); + delete [] $3; + + re->Compile(); + $$ = new ConstExpr(new PatternVal(re)); } | '|' expr '|' %prec '(' @@ -770,25 +773,6 @@ opt_expr_list: { $$ = new ListExpr(); } ; -pattern: - pattern '|' single_pattern - { - $1->AddPat($3); - delete [] $3; - } - - | single_pattern - { - $$ = new RE_Matcher($1); - delete [] $1; - } - ; - -single_pattern: - '/' { begin_RE(); } TOK_PATTERN_TEXT { end_RE(); } '/' - { $$ = $3; } - ; - enum_body: enum_body_list { diff --git a/src/re-scan.l b/src/re-scan.l index 0d737f08a6..8bd00c8bba 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -196,8 +196,15 @@ CCL_EXPR ("[:"[[:alpha:]]+":]") %% +YY_BUFFER_STATE RE_buf; + void RE_set_input(const char* str) { RE_parse_input = str; - yy_scan_string(str); + RE_buf = yy_scan_string(str); + } + +void RE_done_with_scan() + { + yy_delete_buffer(RE_buf); } diff --git a/testing/btest/Baseline/language.pattern/out b/testing/btest/Baseline/language.pattern/out index 4a5b8de670..9c801eb60b 100644 --- a/testing/btest/Baseline/language.pattern/out +++ b/testing/btest/Baseline/language.pattern/out @@ -6,3 +6,7 @@ inequality operator (order of operands) (PASS) in operator (PASS) in operator (PASS) !in operator (PASS) +& operator (PASS) +& operator (FAIL) +| operator (PASS) +| operator (FAIL) diff --git a/testing/btest/core/leaks/pattern.bro b/testing/btest/core/leaks/pattern.bro new file mode 100644 index 0000000000..3119ad12af --- /dev/null +++ b/testing/btest/core/leaks/pattern.bro @@ -0,0 +1,38 @@ +# @TEST-GROUP: leaks +# @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks + +# @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local btest-bg-run bro bro -m -b -r $TRACES/http/get.trace %INPUT +# @TEST-EXEC: btest-bg-wait 60 + +function test_case(msg: string, expect: bool) + { + print fmt("%s (%s)", msg, expect ? "PASS" : "FAIL"); + } + +event new_connection(c: connection) + { + print "new connection"; + + local p1: pattern = /foo|bar/; + local p2: pattern = /oob/; + local p3: pattern = /^oob/; + local p4 = /foo/; + + # Type inference tests + + test_case( "type inference", type_name(p4) == "pattern" ); + + # Operator tests + + test_case( "equality operator", "foo" == p1 ); + test_case( "equality operator (order of operands)", p1 == "foo" ); + test_case( "inequality operator", "foobar" != p1 ); + test_case( "inequality operator (order of operands)", p1 != "foobar" ); + test_case( "in operator", p1 in "foobar" ); + test_case( "in operator", p2 in "foobar" ); + test_case( "!in operator", p3 !in "foobar" ); + test_case( "& operator", p1 & p2 in "baroob" ); + test_case( "& operator", p2 & p1 in "baroob" ); + test_case( "| operator", p1 | p2 in "lazybarlazy" ); + test_case( "| operator", p3 | p4 in "xoob" ); + } diff --git a/testing/btest/language/pattern.bro b/testing/btest/language/pattern.bro index b904fe8737..1c137969eb 100644 --- a/testing/btest/language/pattern.bro +++ b/testing/btest/language/pattern.bro @@ -27,6 +27,10 @@ event bro_init() test_case( "in operator", p1 in "foobar" ); test_case( "in operator", p2 in "foobar" ); test_case( "!in operator", p3 !in "foobar" ); + test_case( "& operator", p1 & p2 in "baroob" ); + test_case( "& operator", p2 & p1 in "baroob" ); + test_case( "| operator", p1 | p2 in "lazybarlazy" ); + test_case( "| operator", p3 | p4 in "xoob" ); }