spicy-redis: Begin Spicy Redis analyzer

This commit is contained in:
Evan Typanski 2024-09-19 13:55:55 -04:00
parent 897b1546a2
commit 4210e62e57
15 changed files with 383 additions and 0 deletions

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,97 @@
@load base/protocols/conn/removal-hooks
module RESP;
export {
## Log stream identifier.
redef enum Log::ID += { LOG };
## The ports to register RESP for.
const ports = {
6379/tcp,
} &redef;
type RESPData: record {
simple_string: string &optional &log;
simple_error: string &optional &log;
i: int &optional &log;
bulk_string: string &optional &log;
#array:
is_null: bool &log;
boolean: bool &optional &log;
double_: double &optional &log;
big_num: string &optional &log;
bulk_error: string &optional &log;
verbatim_string: string &optional &log;
};
## Record type containing the column fields of the RESP log.
type Info: record {
## Timestamp for when the activity happened.
ts: time &log;
## Unique ID for the connection.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
resp_data: RESPData &log;
};
## A default logging policy hook for the stream.
global log_policy: Log::PolicyHook;
## Default hook into RESP logging.
global log_resp: event(rec: Info);
}
redef record connection += {
redis_resp: Info &optional;
};
redef likely_server_ports += { ports };
# TODO: If you're going to send file data into the file analysis framework, you
# need to provide a file handle function. This is a simple example that's
# sufficient if the protocol only transfers a single, complete file at a time.
#
# function get_file_handle(c: connection, is_orig: bool): string
# {
# return cat(Analyzer::ANALYZER_SPICY_RESP, c$start_time, c$id, is_orig);
# }
event zeek_init() &priority=5
{
Log::create_stream(RESP::LOG, [$columns=Info, $ev=log_resp, $path="resp", $policy=log_policy]);
Analyzer::register_for_ports(Analyzer::ANALYZER_SPICY_RESP, ports);
# TODO: To activate the file handle function above, uncomment this.
# Files::register_protocol(Analyzer::ANALYZER_SPICY_RESP, [$get_file_handle=RESP::get_file_handle ]);
}
# Initialize logging state.
hook set_session(c: connection)
{
if ( c?$redis_resp )
return;
c$redis_resp = Info($ts=network_time(), $uid=c$uid, $id=c$id);
}
function emit_log(c: connection)
{
if ( ! c?$redis_resp )
return;
Log::write(RESP::LOG, c$redis_resp);
delete c$redis_resp;
}
# Example event defined in resp.evt.
event RESP::data(c: connection, payload: RESPData)
{
hook set_session(c);
local info = c$redis_resp;
info$resp_data = payload;
emit_log(c);
}

View file

@ -0,0 +1,6 @@
spicy_add_analyzer(
NAME RESP
PACKAGE_NAME spicy-resp
SOURCES resp.spicy resp.evt zeek_resp.spicy
SCRIPTS __load__.zeek main.zeek
)

View file

@ -0,0 +1,8 @@
protocol analyzer spicy::RESP over TCP:
parse with RESP::Messages,
port 6379/tcp;
import RESP;
import Zeek_RESP;
on RESP::Data -> event RESP::data($conn, Zeek_RESP::create_data(self));

View file

@ -0,0 +1,107 @@
module RESP;
public type Messages = unit {
: Data[];
};
public type Data = unit {
ty: uint8 &convert=DataType($$);
switch ( self.ty ) {
DataType::SIMPLE_STRING -> simple_string: SimpleString(False);
DataType::SIMPLE_ERROR -> simple_error: SimpleString(True);
DataType::INTEGER -> integer: Integer;
DataType::BULK_STRING -> bulk_string: BulkString(False);
DataType::ARRAY -> array: Array;
DataType::NULL -> null: Null_;
DataType::BOOLEAN -> boolean: Boolean;
DataType::DOUBLE -> double: Double;
DataType::BIG_NUM -> big_num: BigNum;
DataType::BULK_ERROR -> bulk_error: BulkString(True);
# This can be a different type, but the docs also say:
# "Some client libraries may ignore the difference between this type and the string type"
# It just includes the encoding first in the content
DataType::VERBATIM_STRING -> verbatim_string: BulkString(False);
DataType::MAP -> map_: Map;
DataType::SET -> set_: Set;
# "Push events are encoded similarly to arrays, differing only in their
# first byte" - TODO: can probably make it more obvious, though
DataType::PUSH -> push: Array;
};
};
type DataType = enum {
SIMPLE_STRING = '+',
SIMPLE_ERROR = '-',
INTEGER = ':',
BULK_STRING = '$',
ARRAY = '*',
NULL = '_',
BOOLEAN = '#',
DOUBLE = ',',
BIG_NUM = '(',
BULK_ERROR = '!',
VERBATIM_STRING = '=',
MAP = '%',
SET = '~',
PUSH = '>'
};
type SimpleString = unit(is_error: bool) {
content: bytes &until=b"\x0d\x0a";
};
type Integer = unit {
int: bytes &convert=$$.to_int(10) &until=b"\x0d\x0a";
};
type BulkString = unit(is_error: bool) {
length: bytes &convert=$$.to_int(10) &until=b"\x0d\x0a";
# NullBulkString is a BulkString with content unset
content: bytes &size=uint64( self.length ) if ( self.length >= 0 );
# Consume last CLRF
: bytes &until=b"\x0d\x0a";
};
type Array = unit {
num_elements: bytes &convert=$$.to_int(10) &until=b"\x0d\x0a";
# Null array is an array with elements unset. This is different from an empty array
elements: Data[uint64(self.num_elements)] if ( self.num_elements >= 0 );
};
type Null_ = unit {
# Still must consume CLRF
: bytes &until=b"\x0d\x0a";
};
type Boolean = unit {
val: uint8 &convert=$$ == 't';
: bytes &until=b"\x0d\x0a";
};
type Double = unit {
val: bytes &convert=$$.to_real() &until=b"\x0d\x0a";
};
type BigNum = unit {
# Big num can be very big so put it in a UTF-8 decoded string
val: bytes &convert=$$.decode() &until=b"\x0d\x0a";
};
type Map = unit {
var key_val_pairs: vector<tuple<Data, Data>>;
num_elements: bytes &convert=$$.to_uint(10) &until=b"\x0d\x0a";
# TODO: How can I make this into a map? Alternatively, how can I do this better?
raw_data: Data[self.num_elements*2] {
while ( local i = 0; i < self.num_elements ) {
self.key_val_pairs.push_back(($$[i], $$[i+1]));
i += 2;
}
}
};
type Set = unit {
num_elements: bytes &convert=$$.to_uint(10) &until=b"\x0d\x0a";
# TODO: This should be a set but doesn't go in the backed C++ set
elements: Data[self.num_elements];
};

View file

@ -0,0 +1,72 @@
# Set up protocol confirmation/rejection for analyzers, as well as any further
# Zeek-specific analysis.
module Zeek_RESP;
import RESP;
import zeek;
# Any error bubbling up to the top unit will trigger a protocol rejection.
on RESP::Data::%done {
zeek::confirm_protocol();
}
on RESP::Data::%error {
zeek::reject_protocol("error while parsing RESP data");
}
type ZeekData = tuple<
optional<bytes>,
optional<bytes>,
optional<int64>,
optional<bytes>,
#optional<vector<ZeekData>>, # TODO: This segfaults because recursive type :(
bool,
optional<bool>,
optional<real>,
optional<string>,
optional<bytes>,
optional<bytes>,
>;
public function create_data(data: RESP::Data): ZeekData {
local simple_string: optional<bytes>;
local simple_error: optional<bytes>;
local i: optional<int64>;
local bulk_string: optional<bytes>;
#local array: optional<vector<ZeekData>>;
local null: bool;
local boolean: optional<bool>;
local double: optional<real>;
local big_num: optional<string>;
local bulk_error: optional<bytes>;
local verbatim_string: optional<bytes>;
if (data?.simple_string)
simple_string = data.simple_string.content;
if (data?.simple_error)
simple_error = data.simple_error.content;
if (data?.integer)
i = data.integer.int;
if (data?.bulk_string)
bulk_string = data.bulk_string.content;
#if (data?.array) {
# for ( data in data.array.elements ) {
# array.push_back(data);
# }
#}
if (data?.null)
null = True;
else
null = False;
if (data?.boolean)
boolean = data.boolean.val;
if (data?.double)
double = data.double.val;
if (data?.big_num)
big_num = data.big_num.val;
if (data?.bulk_error)
bulk_error = data.bulk_error.content;
if (data?.verbatim_string)
verbatim_string = data.verbatim_string.content;
return (simple_string, simple_error, i, bulk_string, null, boolean, double, big_num, bulk_error, verbatim_string);
}

View file

@ -0,0 +1,2 @@
ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes tunnel_parents
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 10.1.9.63 63526 54.175.222.246 80 tcp http 0.755677 207 489 SF 0 ShADTadFf 7 790 4 705 -

View file

@ -0,0 +1,2 @@
Hello world!
Goodbye world!

View file

@ -0,0 +1,25 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
RESP::Data {
ty: SIMPLE_STRING
simple_string: RESP::SimpleString {
content: OK
}
}
RESP::Data {
ty: INTEGER
integer: RESP::Integer {
int_: 1000
}
}
RESP::Data {
ty: INTEGER
integer: RESP::Integer {
int_: -1000
}
}
RESP::Data {
ty: INTEGER
integer: RESP::Integer {
int_: 1000
}
}

View file

@ -0,0 +1,15 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=SET, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=hi:2, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=2, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=<uninitialized>, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=OK, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=<uninitialized>, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=SET, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=hi:3, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=sup, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=<uninitialized>, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=OK, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=<uninitialized>, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=GET, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=hi:3, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=<uninitialized>, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]
Testing RESP: [simple_string=<uninitialized>, simple_error=<uninitialized>, i=<uninitialized>, bulk_string=sup, is_null=F, boolean=<uninitialized>, double_=<uninitialized>, big_num=<uninitialized>, bulk_error=<uninitialized>, verbatim_string=<uninitialized>]

View file

@ -0,0 +1,24 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path resp
#open XXXX-XX-XX-XX-XX-XX
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p resp_data.simple_string resp_data.simple_error resp_data.i resp_data.bulk_string resp_data.is_null resp_data.boolean resp_data.double_ resp_data.big_num resp_data.bulk_error resp_data.verbatim_string
#types time string addr port addr port string string int string bool bool double string string string
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - SET F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - hi:2 F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - 2 F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - - F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 OK - - - F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - SET F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - hi:3 F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - sup F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - - F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 OK - - - F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - GET F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - hi:3 F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - - F - - - - -
XXXXXXXXXX.XXXXXX C4J4Th3PJpwUYZZ6gc 127.0.0.1 58972 127.0.0.1 6379 - - - sup F - - - - -
#close XXXX-XX-XX-XX-XX-XX

Binary file not shown.

View file

@ -0,0 +1,3 @@
# @TEST-DOC: Check that the RESP analyzer is available.
#
# @TEST-EXEC: zeek -NN | grep -Eqi 'ANALYZER_SPICY_RESP'

View file

@ -0,0 +1,11 @@
# @TEST-DOC: Test parsing behavior of RESP.
#
# @TEST-EXEC: spicyc ${DIST}/analyzer/resp.spicy -j -d -o resp.hlto
#
# TODO: A lot of tests are possible from the docs and having them would be nice.
# But, a lot of characters ($, -, etc.) cause problems with TEST_EXEC. ugh.
# @TEST-EXEC: printf "+OK\x0d\x0a" | spicy-dump -p RESP::Data resp.hlto >>output 2>&1
# @TEST-EXEC: printf ":1000\x0d\x0a" | spicy-dump -p RESP::Data resp.hlto >>output 2>&1
# @TEST-EXEC: printf ":-1000\x0d\x0a" | spicy-dump -p RESP::Data resp.hlto >>output 2>&1
# @TEST-EXEC: printf ":+1000\x0d\x0a" | spicy-dump -p RESP::Data resp.hlto >>output 2>&1
# @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output

View file

@ -0,0 +1,10 @@
# @TEST-DOC: Test Zeek parsing a trace file through the RESP analyzer.
#
# @TEST-EXEC: zeek -Cr $TRACES/redis/loop-redis.trace base/protocols/redis %INPUT >output
# @TEST-EXEC: btest-diff output
# @TEST-EXEC: btest-diff resp.log
event RESP::data(c: connection, payload: RESP::RESPData)
{
print fmt("Testing RESP: %s", payload);
}