diff --git a/NEWS b/NEWS index bddb781f4e..0098c11e95 100644 --- a/NEWS +++ b/NEWS @@ -62,6 +62,12 @@ New Functionality * The DNS analyzer was extended to support TKEY RRs (RFC 2390). A corresponding ``dns_TKEY`` event was added. +* The ``signature_match()`` and custom signature events now receive the end of + match offset within the ``data`` parameter as an optional parameter named + ``end_of_match``. + + event signature_match(state: signature_state, msg: string, data: string, end_of_match: count); + Changed Functionality --------------------- diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 671972c2e5..6dabb04d4d 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -4527,7 +4527,6 @@ type signature_state: record { conn: connection; ##< Matching connection. is_orig: bool; ##< True if matching endpoint is originator. payload_size: count; ##< Payload size of the first matching packet of current endpoint. - pattern_end_offset: count &optional; ##< 0-based offset of the last character of the matched payload in the last matching chunk of current endpoint. Non-existing for pure rules }; ## A BitTorrent peer. diff --git a/src/RuleAction.cc b/src/RuleAction.cc index e6b0f7f204..74e9a0a406 100644 --- a/src/RuleAction.cc +++ b/src/RuleAction.cc @@ -2,12 +2,17 @@ #include "zeek/zeek-config.h" +#include #include #include "zeek/Conn.h" +#include "zeek/Desc.h" #include "zeek/Event.h" +#include "zeek/Func.h" +#include "zeek/ID.h" #include "zeek/NetVar.h" #include "zeek/RuleMatcher.h" +#include "zeek/Type.h" #include "zeek/analyzer/Manager.h" #include "zeek/analyzer/protocol/pia/PIA.h" @@ -18,7 +23,7 @@ namespace zeek::detail { bool is_event(const char* id) { return zeek::event_registry->Lookup(id) != nullptr; } RuleActionEvent::RuleActionEvent(const char* arg_msg) - : msg(make_intrusive(arg_msg)), handler(signature_match) {} + : msg(make_intrusive(arg_msg)), handler(signature_match), want_end_of_match(true) {} RuleActionEvent::RuleActionEvent(const char* arg_msg, const char* event_name) { if ( arg_msg ) // Message can be null (not provided). @@ -36,33 +41,75 @@ RuleActionEvent::RuleActionEvent(const char* arg_msg, const char* event_name) { static const auto& signature_match_params = signature_match->GetFunc()->GetType()->ParamList()->GetTypes(); // Fabricated params for non-message event(state: signature_state, data: string) - static const std::vector signature_match2_params = {signature_match_params[0], - signature_match_params[2]}; + static const std::vector signature_match_no_msg2_params = {signature_match_params[0], + signature_match_params[2]}; + // Fabricated params for non-message event(state: signature_state, data: string, end_of_match: count) + static const std::vector signature_match_no_msg3_params = {signature_match_params[0], + signature_match_params[2], + signature_match_params[3]}; if ( msg ) { // If msg was provided, the function signature needs to agree with - // the signature_match event, even if it's a different event. - if ( ! handler->GetFunc()->GetType()->CheckArgs(signature_match_params, true, true) ) - zeek::reporter->Error("wrong event parameters for '%s'", event_name); + // one of the signature_match() events that take the message. + const auto& handler_args_rt = handler->GetType()->Params(); + auto prototype = signature_match->GetFunc()->GetType()->FindPrototype(*handler_args_rt); + + // No prototype matched, call CheckArgs() for those where at least + // the number of arguments matches for better error messaging (if any). + if ( ! prototype ) { + for ( const auto& p : signature_match->GetType()->Prototypes() ) { + if ( p.args->NumFields() != handler_args_rt->NumFields() ) + continue; + + std::vector tplist; + std::for_each(p.args->Types()->begin(), p.args->Types()->end(), + [&tplist](const auto* td) { tplist.push_back(td->type); }); + + (void)handler->GetType()->CheckArgs(tplist, true, true); + } + + zeek::reporter->Error("wrong event parameters for '%s' (%s)", event_name, + obj_desc_short(handler_args_rt.get()).c_str()); + return; + } + + // signature_match(state, msg, data, [end_of_match]) + want_end_of_match = prototype->args->NumFields() > 3; } else { // When no message is provided, use non-message parameters. - if ( ! handler->GetFunc()->GetType()->CheckArgs(signature_match2_params, true, true) ) + const auto& handler_args_rt = handler->GetType()->Params(); + want_end_of_match = handler_args_rt->NumFields() > 2; + + const auto& check_args = + handler_args_rt->NumFields() == 2 ? signature_match_no_msg2_params : signature_match_no_msg3_params; + + if ( ! handler->GetFunc()->GetType()->CheckArgs(check_args, true, true) ) zeek::reporter->Error("wrong event parameters for '%s'", event_name); } } + void RuleActionEvent::DoAction(const Rule* parent, RuleEndpointState* state, const u_char* data, int len) { if ( handler ) { zeek::Args args; args.reserve(msg ? 3 : 2); args.push_back({AdoptRef{}, rule_matcher->BuildRuleStateValue(parent, state)}); + if ( msg ) args.push_back(msg); + if ( data ) args.push_back(make_intrusive(len, reinterpret_cast(data))); else args.push_back(zeek::val_mgr->EmptyString()); + if ( want_end_of_match ) { + // PList::member_pos() doesn't like const Rule*, need const_cast. + int rule_offset = state->matched_by_patterns.member_pos(const_cast(parent)); + MatchPos end_of_match = (rule_offset >= 0 && data) ? state->matched_text_end_of_match[rule_offset] : 0; + args.push_back(zeek::val_mgr->Count(end_of_match)); + } + event_mgr.Enqueue(handler, std::move(args)); } } diff --git a/src/RuleAction.h b/src/RuleAction.h index edbdf94c9d..ed6ed064a7 100644 --- a/src/RuleAction.h +++ b/src/RuleAction.h @@ -43,6 +43,7 @@ public: private: StringValPtr msg; EventHandlerPtr handler; + bool want_end_of_match; // Whether handler accepts end_of_match parameter. }; class RuleActionMIME : public RuleAction { diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index bffd9d4e3f..93722a4c88 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -91,10 +91,6 @@ Val* RuleMatcher::BuildRuleStateValue(const Rule* rule, const RuleEndpointState* val->Assign(1, state->GetAnalyzer()->ConnVal()); val->Assign(2, state->is_orig); val->Assign(3, state->payload_size); - - int rule_offset = state->matched_by_patterns.member_pos(const_cast(rule)); - if ( rule_offset >= 0 ) - val->Assign(4, state->match_offsets[rule_offset]); return val; } @@ -180,6 +176,7 @@ void RuleHdrTest::PrintDebug() const { RuleEndpointState::RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_is_orig, RuleEndpointState* arg_opposite, analyzer::pia::PIA* arg_PIA) { payload_size = -1; + current_pos = 0; analyzer = arg_analyzer; is_orig = arg_is_orig; @@ -654,8 +651,7 @@ RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state, const u set rule_matches; for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin(); it != accepted_matches.end(); ++it ) { - AcceptIdx aidx = it->first; - MatchPos mpos = it->second; + auto [aidx, mpos] = *it; Rule* r = Rule::rule_table[aidx - 1]; @@ -805,12 +801,19 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const state->payload_size = 0; } + if ( clear ) + state->current_pos = 0; + + size_t pre_match_pos = state->current_pos; + // Feed data into all relevant matchers. for ( const auto& m : state->matchers ) { if ( m->type == type && m->state->Match((const u_char*)data, data_len, bol, eol, clear) ) newmatch = true; } + state->current_pos += data_len; + // If no new match found, we're already done. if ( ! newmatch ) return; @@ -844,8 +847,7 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const // Check which of the matching rules really belong to any of our nodes. for ( set>::const_iterator it = rule_matches.begin(); it != rule_matches.end(); ++it ) { - Rule* r = it->first; - MatchPos match_end_offset = it->second; + auto [r, match_end_pos] = *it; DBG_LOG(DBG_RULES, "Accepted rule: %s", r->id); @@ -867,7 +869,7 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const state->matched_by_patterns.push_back(r); String* s = new String(data, data_len, false); state->matched_text.push_back(s); - state->match_offsets.push_back(match_end_offset); + state->matched_text_end_of_match.push_back(match_end_pos - pre_match_pos); } DBG_LOG(DBG_RULES, "And has not already fired"); diff --git a/src/RuleMatcher.h b/src/RuleMatcher.h index 50da437b63..3d0de6a94f 100644 --- a/src/RuleMatcher.h +++ b/src/RuleMatcher.h @@ -161,6 +161,7 @@ public: private: friend class RuleMatcher; + friend class RuleActionEvent; // For access to match state. // Constructor is private; use RuleMatcher::InitEndpoint() // for creating an instance. @@ -184,13 +185,14 @@ private: // The follow tracks which rules for which all patterns have matched, // in a parallel list the (first instance of the) corresponding - // matched text, and in another parallel list the offset of the - // end of the last pattern match. + // matched text, and in another parallel list the offset of the + // end of the last pattern match. rule_list matched_by_patterns; bstr_list matched_text; - match_offset_list match_offsets; + match_offset_list matched_text_end_of_match; int payload_size; + size_t current_pos; // The number of bytes fed into state. bool is_orig; int_list matched_rules; // Rules for which all conditions have matched diff --git a/src/event.bif b/src/event.bif index 02e55c4f14..e8ffaa3ea1 100644 --- a/src/event.bif +++ b/src/event.bif @@ -575,6 +575,11 @@ event file_weird%(name: string, f: fa_file, addl: string%); ## specifics here are not well-defined as Zeek does not buffer any input. ## If a match is split across packet boundaries, only the last chunk ## triggering the match will be passed on to the event. +## end_of_match: Where within data the pattern match ended. 0 if not applicable or when *data* is empty. +event signature_match%(state: signature_state, msg: string, data: string, end_of_match: count%); + +# Same as above, but allow users to ignore the end_of_match parameter +# for backwards compatibility, but also convenience. event signature_match%(state: signature_state, msg: string, data: string%); ## Generated each time Zeek's internal profiling log is updated. The file is diff --git a/testing/btest/Baseline/signatures.custom-event-errors/.stderr b/testing/btest/Baseline/signatures.custom-event-errors/.stderr index 97f17fa2a5..2917f62cdf 100644 --- a/testing/btest/Baseline/signatures.custom-event-errors/.stderr +++ b/testing/btest/Baseline/signatures.custom-event-errors/.stderr @@ -1,10 +1,9 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -warning in <...>/custom-event-errors.zeek, line 7: Wrong number of arguments for function. Expected 3, got 2. (event(state:signature_state, data:string)) -error: wrong event parameters for 'wrong_signature2' -warning in <...>/custom-event-errors.zeek, line 9: Wrong number of arguments for function. Expected 2, got 3. (event(state:signature_state, msg:string, data:string)) +error: wrong event parameters for 'wrong_signature2' (record { state:signature_state; data:string; }) +warning in <...>/custom-event-errors.zeek, line 9: Type mismatch in function argument #2. Expected count, got string. (event(state:signature_state, msg:string, data:string)) error: wrong event parameters for 'wrong_signature3' warning in <...>/custom-event-errors.zeek, line 11: Type mismatch in function argument #1. Expected string, got count. (event(state:signature_state, msg:count, data:string)) -error: wrong event parameters for 'wrong_signature4' +error: wrong event parameters for 'wrong_signature4' (record { state:signature_state; msg:count; data:string; }) error: Error in signature (./id.sig:19): identifier is not an event (non_existing_event) error: Error in signature (./id.sig:24): identifier is not an event (cat) error: Error in signature (./id.sig:29): identifier is not an event (ignore_checksums) diff --git a/testing/btest/Baseline/signatures.custom-event/out b/testing/btest/Baseline/signatures.custom-event/out index fca040926f..cbea2775c4 100644 --- a/testing/btest/Baseline/signatures.custom-event/out +++ b/testing/btest/Baseline/signatures.custom-event/out @@ -1,3 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. signature_match2 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp] +signature_match5 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp] - end_of_match=0 +signature_match4 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp] - message end_of_match=0 signature_match3 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp] - message diff --git a/testing/btest/Baseline/signatures.custom-event/id.out b/testing/btest/Baseline/signatures.tcp-end-of-match-2/.stderr similarity index 100% rename from testing/btest/Baseline/signatures.custom-event/id.out rename to testing/btest/Baseline/signatures.tcp-end-of-match-2/.stderr diff --git a/testing/btest/Baseline/signatures.tcp-end-of-match-2/.stdout b/testing/btest/Baseline/signatures.tcp-end-of-match-2/.stdout new file mode 100644 index 0000000000..ff6bed7632 --- /dev/null +++ b/testing/btest/Baseline/signatures.tcp-end-of-match-2/.stdout @@ -0,0 +1,7 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +signature_match, message, 6 +signature_match with end_of_match, message, 6, end_of_match, 2, match, BX +custom_match with end_of_match, 6, end_of_match, 2, match, BX +custom_match, 6 +custom_match_with_msg with end_of_match, custom message, 6, end_of_match, 2, match, BX +custom_match_with_msg, custom message, 6 diff --git a/testing/btest/Baseline/signatures.tcp-end-of-match/.stderr b/testing/btest/Baseline/signatures.tcp-end-of-match/.stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/signatures.tcp-end-of-match/.stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/signatures.tcp-end-of-match/.stdout b/testing/btest/Baseline/signatures.tcp-end-of-match/.stdout new file mode 100644 index 0000000000..e34d908ff0 --- /dev/null +++ b/testing/btest/Baseline/signatures.tcp-end-of-match/.stdout @@ -0,0 +1,7 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +signature_match, message, 1448 +signature_match with end_of_match, message, 1448, rather than all. (Robin Sommer)\x0a\x0a * Fix parallel make portability +portability_match with end_of_match, 1448, rather than all. (Robin Sommer)\x0a\x0a * Fix parallel make portability +portability_match, 1448 +portability_match_with_msg with end_of_match, custom message, 1448, 69, rather than all. (Robin Sommer)\x0a\x0a * Fix parallel make portability +portability_match_with_msg, custom message, 1448 diff --git a/testing/btest/Baseline/signatures.udp-end-of-match/.stderr b/testing/btest/Baseline/signatures.udp-end-of-match/.stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/signatures.udp-end-of-match/.stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/signatures.udp-end-of-match/.stdout b/testing/btest/Baseline/signatures.udp-end-of-match/.stdout new file mode 100644 index 0000000000..d7658afbb0 --- /dev/null +++ b/testing/btest/Baseline/signatures.udp-end-of-match/.stdout @@ -0,0 +1,5 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +signature_match [orig_h=192.168.17.58, orig_p=58755/udp, resp_h=8.8.8.8, resp_p=53/udp] - my_sig_udp_orig - offset=19 +0000 35 5e 01 00 00 01 00 00 00 00 00 00 06 67 6f 6f 5^...... .....goo +0010 67 6c 65 gle + diff --git a/testing/btest/Traces/tcp-sig-match.pcap b/testing/btest/Traces/tcp-sig-match.pcap new file mode 100644 index 0000000000..0234ce3c5d Binary files /dev/null and b/testing/btest/Traces/tcp-sig-match.pcap differ diff --git a/testing/btest/signatures/custom-event.zeek b/testing/btest/signatures/custom-event.zeek index 2a824eba7f..30719237d6 100644 --- a/testing/btest/signatures/custom-event.zeek +++ b/testing/btest/signatures/custom-event.zeek @@ -10,6 +10,16 @@ signature udp-proto { event my_signature_match3 "message" } +signature udp-proto-with-offset { + ip-proto == 17 + event my_signature_match4 "message" +} + +signature udp-proto-with-offset-no-msg { + ip-proto == 17 + event my_signature_match5 +} + signature udp-stuff { dst-ip == mynets event my_signature_match2 @@ -37,3 +47,13 @@ event my_signature_match3(state: signature_state, msg: string, data: string) { print fmt("signature_match3 %s - %s", state$conn$id, msg); } + +event my_signature_match4(state: signature_state, msg: string, data: string, end_of_match: count) + { + print fmt("signature_match4 %s - %s end_of_match=%s", state$conn$id, msg, end_of_match); + } + +event my_signature_match5(state: signature_state, data: string, end_of_match: count) + { + print fmt("signature_match5 %s - end_of_match=%s", state$conn$id, end_of_match); + } diff --git a/testing/btest/signatures/tcp-end-of-match.zeek b/testing/btest/signatures/tcp-end-of-match.zeek new file mode 100644 index 0000000000..5e7bb7dac6 --- /dev/null +++ b/testing/btest/signatures/tcp-end-of-match.zeek @@ -0,0 +1,69 @@ +# @TEST-DOC: Check optional end_of_match parameter for signature_match() and custom events. +# @TEST-EXEC: zeek -b %INPUT -r $TRACES/http/get.trace +# @TEST-EXEC: btest-diff .stdout +# @TEST-EXEC: btest-diff .stderr + +# Default is 1024, so we don't even peek into the second packet. +redef dpd_buffer_size = 1024 * 1024; + +module DataEndOffset; + +export { + global portability_match: event(state: signature_state, data: string, end_of_match: count); + global portability_match: event(state: signature_state, data: string); + + global portability_match_with_msg: event(state: signature_state, msg: string, data: string, end_of_match: count); + global portability_match_with_msg: event(state: signature_state, msg: string, data: string); +} + +@load-sigs ./test.sig + +event signature_match(state: signature_state, msg: string, data: string) + { + print "signature_match", msg, |data|; + } + +event signature_match(state: signature_state, msg: string, data: string, end_of_match: count) + { + print "signature_match with end_of_match", msg, |data|, data[:end_of_match]; + } + +event portability_match(state: signature_state, data: string, end_of_match: count) + { + print "portability_match with end_of_match", |data|, data[:end_of_match]; + } + +event portability_match(state: signature_state, data: string) + { + print "portability_match", |data|; + } + +event portability_match_with_msg(state: signature_state, msg: string, data: string, end_of_match: count) + { + print "portability_match_with_msg with end_of_match", msg, |data|, end_of_match, data[:end_of_match]; + } + +event portability_match_with_msg(state: signature_state, msg: string, data: string) + { + print "portability_match_with_msg", msg, |data|; + } + +@TEST-START-FILE test.sig +signature with-msg { + ip-proto == tcp + payload /.*portability.*/ # this is in the second packet. + event "message" +} + +signature my-custom-event { + ip-proto == tcp + payload /.*portability.*/ # this is in the second packet. + event DataEndOffset::portability_match +} + +signature my-custom-event2 { + ip-proto == tcp + payload /.*portability.*/ # this is in the second packet. + event DataEndOffset::portability_match_with_msg "custom message" +} +@TEST-END-FILE diff --git a/testing/btest/signatures/udp-end-of-match.zeek b/testing/btest/signatures/udp-end-of-match.zeek new file mode 100644 index 0000000000..de7113476d --- /dev/null +++ b/testing/btest/signatures/udp-end-of-match.zeek @@ -0,0 +1,22 @@ +# @TEST-DOC: Check optional data_end_offset parameter for signature_match() +# @TEST-EXEC: zeek -b -r $TRACES/dns-caa.pcap %INPUT +# @TEST-EXEC: btest-diff .stdout +# @TEST-EXEC: btest-diff .stderr + +@load-sigs ./test.sig + +event signature_match(state: signature_state, msg: string, data: string, data_end_offset: count) + { + print fmt("signature_match %s - %s - offset=%s", state$conn$id, msg, data_end_offset); + local s = split_string(hexdump(data[:data_end_offset]), /\n/); + for ( i in s ) print s[i]; + } + +@TEST-START-FILE test.sig +signature my_sig_udp_orig { + ip-proto == udp + payload /.+google/ + udp-state originator + event "my_sig_udp_orig" +} +@TEST-END-FILE