signatures: Add data_end_offset to signature_match() and custom events

This change tracks the current offset (number of bytes fed into matchers)
on the top-level RuleEndpointState such that we can compute the relative ending
for matched texts individually.

Additionally, it adds the data_end_offset as a new optional parameter to
signature_match().
This commit is contained in:
Arne Welzel 2024-10-28 18:59:58 +01:00
parent 5859e23198
commit 04872d4e78
19 changed files with 219 additions and 24 deletions

6
NEWS
View file

@ -62,6 +62,12 @@ New Functionality
* The DNS analyzer was extended to support TKEY RRs (RFC 2390). A corresponding
``dns_TKEY`` event was added.
* The ``signature_match()`` and custom signature events now receive the end of
match offset within the ``data`` parameter as an optional parameter named
``end_of_match``.
event signature_match(state: signature_state, msg: string, data: string, end_of_match: count);
Changed Functionality
---------------------

View file

@ -4527,7 +4527,6 @@ type signature_state: record {
conn: connection; ##< Matching connection.
is_orig: bool; ##< True if matching endpoint is originator.
payload_size: count; ##< Payload size of the first matching packet of current endpoint.
pattern_end_offset: count &optional; ##< 0-based offset of the last character of the matched payload in the last matching chunk of current endpoint. Non-existing for pure rules
};
## A BitTorrent peer.

View file

@ -2,12 +2,17 @@
#include "zeek/zeek-config.h"
#include <algorithm>
#include <string>
#include "zeek/Conn.h"
#include "zeek/Desc.h"
#include "zeek/Event.h"
#include "zeek/Func.h"
#include "zeek/ID.h"
#include "zeek/NetVar.h"
#include "zeek/RuleMatcher.h"
#include "zeek/Type.h"
#include "zeek/analyzer/Manager.h"
#include "zeek/analyzer/protocol/pia/PIA.h"
@ -18,7 +23,7 @@ namespace zeek::detail {
bool is_event(const char* id) { return zeek::event_registry->Lookup(id) != nullptr; }
RuleActionEvent::RuleActionEvent(const char* arg_msg)
: msg(make_intrusive<StringVal>(arg_msg)), handler(signature_match) {}
: msg(make_intrusive<StringVal>(arg_msg)), handler(signature_match), want_end_of_match(true) {}
RuleActionEvent::RuleActionEvent(const char* arg_msg, const char* event_name) {
if ( arg_msg ) // Message can be null (not provided).
@ -36,33 +41,75 @@ RuleActionEvent::RuleActionEvent(const char* arg_msg, const char* event_name) {
static const auto& signature_match_params = signature_match->GetFunc()->GetType()->ParamList()->GetTypes();
// Fabricated params for non-message event(state: signature_state, data: string)
static const std::vector<zeek::TypePtr> signature_match2_params = {signature_match_params[0],
signature_match_params[2]};
static const std::vector<zeek::TypePtr> signature_match_no_msg2_params = {signature_match_params[0],
signature_match_params[2]};
// Fabricated params for non-message event(state: signature_state, data: string, end_of_match: count)
static const std::vector<zeek::TypePtr> signature_match_no_msg3_params = {signature_match_params[0],
signature_match_params[2],
signature_match_params[3]};
if ( msg ) {
// If msg was provided, the function signature needs to agree with
// the signature_match event, even if it's a different event.
if ( ! handler->GetFunc()->GetType()->CheckArgs(signature_match_params, true, true) )
zeek::reporter->Error("wrong event parameters for '%s'", event_name);
// one of the signature_match() events that take the message.
const auto& handler_args_rt = handler->GetType()->Params();
auto prototype = signature_match->GetFunc()->GetType()->FindPrototype(*handler_args_rt);
// No prototype matched, call CheckArgs() for those where at least
// the number of arguments matches for better error messaging (if any).
if ( ! prototype ) {
for ( const auto& p : signature_match->GetType()->Prototypes() ) {
if ( p.args->NumFields() != handler_args_rt->NumFields() )
continue;
std::vector<TypePtr> tplist;
std::for_each(p.args->Types()->begin(), p.args->Types()->end(),
[&tplist](const auto* td) { tplist.push_back(td->type); });
(void)handler->GetType()->CheckArgs(tplist, true, true);
}
zeek::reporter->Error("wrong event parameters for '%s' (%s)", event_name,
obj_desc_short(handler_args_rt.get()).c_str());
return;
}
// signature_match(state, msg, data, [end_of_match])
want_end_of_match = prototype->args->NumFields() > 3;
}
else {
// When no message is provided, use non-message parameters.
if ( ! handler->GetFunc()->GetType()->CheckArgs(signature_match2_params, true, true) )
const auto& handler_args_rt = handler->GetType()->Params();
want_end_of_match = handler_args_rt->NumFields() > 2;
const auto& check_args =
handler_args_rt->NumFields() == 2 ? signature_match_no_msg2_params : signature_match_no_msg3_params;
if ( ! handler->GetFunc()->GetType()->CheckArgs(check_args, true, true) )
zeek::reporter->Error("wrong event parameters for '%s'", event_name);
}
}
void RuleActionEvent::DoAction(const Rule* parent, RuleEndpointState* state, const u_char* data, int len) {
if ( handler ) {
zeek::Args args;
args.reserve(msg ? 3 : 2);
args.push_back({AdoptRef{}, rule_matcher->BuildRuleStateValue(parent, state)});
if ( msg )
args.push_back(msg);
if ( data )
args.push_back(make_intrusive<StringVal>(len, reinterpret_cast<const char*>(data)));
else
args.push_back(zeek::val_mgr->EmptyString());
if ( want_end_of_match ) {
// PList::member_pos() doesn't like const Rule*, need const_cast.
int rule_offset = state->matched_by_patterns.member_pos(const_cast<Rule*>(parent));
MatchPos end_of_match = (rule_offset >= 0 && data) ? state->matched_text_end_of_match[rule_offset] : 0;
args.push_back(zeek::val_mgr->Count(end_of_match));
}
event_mgr.Enqueue(handler, std::move(args));
}
}

View file

@ -43,6 +43,7 @@ public:
private:
StringValPtr msg;
EventHandlerPtr handler;
bool want_end_of_match; // Whether handler accepts end_of_match parameter.
};
class RuleActionMIME : public RuleAction {

View file

@ -91,10 +91,6 @@ Val* RuleMatcher::BuildRuleStateValue(const Rule* rule, const RuleEndpointState*
val->Assign(1, state->GetAnalyzer()->ConnVal());
val->Assign(2, state->is_orig);
val->Assign(3, state->payload_size);
int rule_offset = state->matched_by_patterns.member_pos(const_cast<Rule*>(rule));
if ( rule_offset >= 0 )
val->Assign(4, state->match_offsets[rule_offset]);
return val;
}
@ -180,6 +176,7 @@ void RuleHdrTest::PrintDebug() const {
RuleEndpointState::RuleEndpointState(analyzer::Analyzer* arg_analyzer, bool arg_is_orig,
RuleEndpointState* arg_opposite, analyzer::pia::PIA* arg_PIA) {
payload_size = -1;
current_pos = 0;
analyzer = arg_analyzer;
is_orig = arg_is_orig;
@ -654,8 +651,7 @@ RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state, const u
set<Rule*> rule_matches;
for ( AcceptingMatchSet::const_iterator it = accepted_matches.begin(); it != accepted_matches.end(); ++it ) {
AcceptIdx aidx = it->first;
MatchPos mpos = it->second;
auto [aidx, mpos] = *it;
Rule* r = Rule::rule_table[aidx - 1];
@ -805,12 +801,19 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const
state->payload_size = 0;
}
if ( clear )
state->current_pos = 0;
size_t pre_match_pos = state->current_pos;
// Feed data into all relevant matchers.
for ( const auto& m : state->matchers ) {
if ( m->type == type && m->state->Match((const u_char*)data, data_len, bol, eol, clear) )
newmatch = true;
}
state->current_pos += data_len;
// If no new match found, we're already done.
if ( ! newmatch )
return;
@ -844,8 +847,7 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const
// Check which of the matching rules really belong to any of our nodes.
for ( set<pair<Rule*, MatchPos>>::const_iterator it = rule_matches.begin(); it != rule_matches.end(); ++it ) {
Rule* r = it->first;
MatchPos match_end_offset = it->second;
auto [r, match_end_pos] = *it;
DBG_LOG(DBG_RULES, "Accepted rule: %s", r->id);
@ -867,7 +869,7 @@ void RuleMatcher::Match(RuleEndpointState* state, Rule::PatternType type, const
state->matched_by_patterns.push_back(r);
String* s = new String(data, data_len, false);
state->matched_text.push_back(s);
state->match_offsets.push_back(match_end_offset);
state->matched_text_end_of_match.push_back(match_end_pos - pre_match_pos);
}
DBG_LOG(DBG_RULES, "And has not already fired");

View file

@ -161,6 +161,7 @@ public:
private:
friend class RuleMatcher;
friend class RuleActionEvent; // For access to match state.
// Constructor is private; use RuleMatcher::InitEndpoint()
// for creating an instance.
@ -184,13 +185,14 @@ private:
// The follow tracks which rules for which all patterns have matched,
// in a parallel list the (first instance of the) corresponding
// matched text, and in another parallel list the offset of the
// end of the last pattern match.
// matched text, and in another parallel list the offset of the
// end of the last pattern match.
rule_list matched_by_patterns;
bstr_list matched_text;
match_offset_list match_offsets;
match_offset_list matched_text_end_of_match;
int payload_size;
size_t current_pos; // The number of bytes fed into state.
bool is_orig;
int_list matched_rules; // Rules for which all conditions have matched

View file

@ -575,6 +575,11 @@ event file_weird%(name: string, f: fa_file, addl: string%);
## specifics here are not well-defined as Zeek does not buffer any input.
## If a match is split across packet boundaries, only the last chunk
## triggering the match will be passed on to the event.
## end_of_match: Where within data the pattern match ended. 0 if not applicable or when *data* is empty.
event signature_match%(state: signature_state, msg: string, data: string, end_of_match: count%);
# Same as above, but allow users to ignore the end_of_match parameter
# for backwards compatibility, but also convenience.
event signature_match%(state: signature_state, msg: string, data: string%);
## Generated each time Zeek's internal profiling log is updated. The file is

View file

@ -1,10 +1,9 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
warning in <...>/custom-event-errors.zeek, line 7: Wrong number of arguments for function. Expected 3, got 2. (event(state:signature_state, data:string))
error: wrong event parameters for 'wrong_signature2'
warning in <...>/custom-event-errors.zeek, line 9: Wrong number of arguments for function. Expected 2, got 3. (event(state:signature_state, msg:string, data:string))
error: wrong event parameters for 'wrong_signature2' (record { state:signature_state; data:string; })
warning in <...>/custom-event-errors.zeek, line 9: Type mismatch in function argument #2. Expected count, got string. (event(state:signature_state, msg:string, data:string))
error: wrong event parameters for 'wrong_signature3'
warning in <...>/custom-event-errors.zeek, line 11: Type mismatch in function argument #1. Expected string, got count. (event(state:signature_state, msg:count, data:string))
error: wrong event parameters for 'wrong_signature4'
error: wrong event parameters for 'wrong_signature4' (record { state:signature_state; msg:count; data:string; })
error: Error in signature (./id.sig:19): identifier is not an event (non_existing_event)
error: Error in signature (./id.sig:24): identifier is not an event (cat)
error: Error in signature (./id.sig:29): identifier is not an event (ignore_checksums)

View file

@ -1,3 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
signature_match2 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp]
signature_match5 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp] - end_of_match=0
signature_match4 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp] - message end_of_match=0
signature_match3 [orig_h=127.0.0.1, orig_p=30000/udp, resp_h=127.0.0.1, resp_p=13000/udp] - message

View file

@ -0,0 +1,7 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
signature_match, message, 6
signature_match with end_of_match, message, 6, end_of_match, 2, match, BX
custom_match with end_of_match, 6, end_of_match, 2, match, BX
custom_match, 6
custom_match_with_msg with end_of_match, custom message, 6, end_of_match, 2, match, BX
custom_match_with_msg, custom message, 6

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,7 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
signature_match, message, 1448
signature_match with end_of_match, message, 1448, rather than all. (Robin Sommer)\x0a\x0a * Fix parallel make portability
portability_match with end_of_match, 1448, rather than all. (Robin Sommer)\x0a\x0a * Fix parallel make portability
portability_match, 1448
portability_match_with_msg with end_of_match, custom message, 1448, 69, rather than all. (Robin Sommer)\x0a\x0a * Fix parallel make portability
portability_match_with_msg, custom message, 1448

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
signature_match [orig_h=192.168.17.58, orig_p=58755/udp, resp_h=8.8.8.8, resp_p=53/udp] - my_sig_udp_orig - offset=19
0000 35 5e 01 00 00 01 00 00 00 00 00 00 06 67 6f 6f 5^...... .....goo
0010 67 6c 65 gle

Binary file not shown.

View file

@ -10,6 +10,16 @@ signature udp-proto {
event my_signature_match3 "message"
}
signature udp-proto-with-offset {
ip-proto == 17
event my_signature_match4 "message"
}
signature udp-proto-with-offset-no-msg {
ip-proto == 17
event my_signature_match5
}
signature udp-stuff {
dst-ip == mynets
event my_signature_match2
@ -37,3 +47,13 @@ event my_signature_match3(state: signature_state, msg: string, data: string)
{
print fmt("signature_match3 %s - %s", state$conn$id, msg);
}
event my_signature_match4(state: signature_state, msg: string, data: string, end_of_match: count)
{
print fmt("signature_match4 %s - %s end_of_match=%s", state$conn$id, msg, end_of_match);
}
event my_signature_match5(state: signature_state, data: string, end_of_match: count)
{
print fmt("signature_match5 %s - end_of_match=%s", state$conn$id, end_of_match);
}

View file

@ -0,0 +1,69 @@
# @TEST-DOC: Check optional end_of_match parameter for signature_match() and custom events.
# @TEST-EXEC: zeek -b %INPUT -r $TRACES/http/get.trace
# @TEST-EXEC: btest-diff .stdout
# @TEST-EXEC: btest-diff .stderr
# Default is 1024, so we don't even peek into the second packet.
redef dpd_buffer_size = 1024 * 1024;
module DataEndOffset;
export {
global portability_match: event(state: signature_state, data: string, end_of_match: count);
global portability_match: event(state: signature_state, data: string);
global portability_match_with_msg: event(state: signature_state, msg: string, data: string, end_of_match: count);
global portability_match_with_msg: event(state: signature_state, msg: string, data: string);
}
@load-sigs ./test.sig
event signature_match(state: signature_state, msg: string, data: string)
{
print "signature_match", msg, |data|;
}
event signature_match(state: signature_state, msg: string, data: string, end_of_match: count)
{
print "signature_match with end_of_match", msg, |data|, data[:end_of_match];
}
event portability_match(state: signature_state, data: string, end_of_match: count)
{
print "portability_match with end_of_match", |data|, data[:end_of_match];
}
event portability_match(state: signature_state, data: string)
{
print "portability_match", |data|;
}
event portability_match_with_msg(state: signature_state, msg: string, data: string, end_of_match: count)
{
print "portability_match_with_msg with end_of_match", msg, |data|, end_of_match, data[:end_of_match];
}
event portability_match_with_msg(state: signature_state, msg: string, data: string)
{
print "portability_match_with_msg", msg, |data|;
}
@TEST-START-FILE test.sig
signature with-msg {
ip-proto == tcp
payload /.*portability.*/ # this is in the second packet.
event "message"
}
signature my-custom-event {
ip-proto == tcp
payload /.*portability.*/ # this is in the second packet.
event DataEndOffset::portability_match
}
signature my-custom-event2 {
ip-proto == tcp
payload /.*portability.*/ # this is in the second packet.
event DataEndOffset::portability_match_with_msg "custom message"
}
@TEST-END-FILE

View file

@ -0,0 +1,22 @@
# @TEST-DOC: Check optional data_end_offset parameter for signature_match()
# @TEST-EXEC: zeek -b -r $TRACES/dns-caa.pcap %INPUT
# @TEST-EXEC: btest-diff .stdout
# @TEST-EXEC: btest-diff .stderr
@load-sigs ./test.sig
event signature_match(state: signature_state, msg: string, data: string, data_end_offset: count)
{
print fmt("signature_match %s - %s - offset=%s", state$conn$id, msg, data_end_offset);
local s = split_string(hexdump(data[:data_end_offset]), /\n/);
for ( i in s ) print s[i];
}
@TEST-START-FILE test.sig
signature my_sig_udp_orig {
ip-proto == udp
payload /.+google/
udp-state originator
event "my_sig_udp_orig"
}
@TEST-END-FILE