websocket: Add Spicy parser version, too.

The Spicy analyzer is added as a child analyzer when enabled and the
WebSocket.cc logic dispatches between the BinPac and Spicy version.

It substantially slower when tested against a somewhat artificial
2.4GB PCAP. The first flamegraph indicates that the unmask() function
stands out with 35% of all samples, and above it shared_ptr samples.
This commit is contained in:
Arne Welzel 2024-01-24 20:00:40 +01:00
parent 160ccda45f
commit c1a685a05d
9 changed files with 280 additions and 14 deletions

View file

@ -470,6 +470,12 @@ export {
## Whether to enable DPD on WebSocket frame payload by default.
const use_dpd_default = T &redef;
## Whether to use the Spicy WebSocket protocol analyzer.
##
## As of now, the BinPac version has better performance, but
## we may change the default in the future.
const use_spicy_analyzer = F &redef;
## Record type that is passed to :zeek:see:`WebSocket::configure_analyzer`.
##
## This record allows to configure the WebSocket analyzer given

View file

@ -13,3 +13,7 @@ zeek_add_plugin(
websocket.pac
websocket-analyzer.pac
websocket-protocol.pac)
if (USE_SPICY_ANALYZERS)
spicy_add_analyzer(NAME WebSocket SOURCES websocket.spicy websocket.evt)
endif ()

View file

@ -10,14 +10,7 @@
namespace zeek::analyzer::websocket {
WebSocket_Analyzer::WebSocket_Analyzer(Connection* conn) : analyzer::tcp::TCP_ApplicationAnalyzer("WebSocket", conn) {
// TODO: Consider approaches dispatching to optionally use a
// Spicy analyzer here instead of the BinPac interpreter.
//
// E.g. we could instantiate a SPICY_WEBSOCKET analyzer and pass it the necessary
// information and call DeliverStream() directly on it.
interp = std::make_unique<binpac::WebSocket::WebSocket_Conn>(this);
}
WebSocket_Analyzer::WebSocket_Analyzer(Connection* conn) : analyzer::tcp::TCP_ApplicationAnalyzer("WebSocket", conn) {}
void WebSocket_Analyzer::Init() {
tcp::TCP_ApplicationAnalyzer::Init();
@ -42,6 +35,25 @@ bool WebSocket_Analyzer::Configure(zeek::RecordValPtr config) {
static const auto& config_type = id::find_type<zeek::RecordType>("WebSocket::AnalyzerConfig");
static int analyzer_idx = config_type->FieldOffset("analyzer");
static int use_dpd_idx = config_type->FieldOffset("use_dpd");
static const bool use_spicy_analyzer = id::find_val<BoolVal>("WebSocket::use_spicy_analyzer")->AsBool();
analyzer::Analyzer* effective_analyzer = nullptr;
if ( use_spicy_analyzer ) {
static const auto* component = zeek::analyzer_mgr->Lookup("SPICY_WEBSOCKET");
if ( ! component ) {
reporter->FatalError("SPICY_WEBSOCKET analyzer tag not available");
return false;
}
effective_analyzer = zeek::analyzer_mgr->InstantiateAnalyzer(component->Tag(), Conn());
if ( ! AddChildAnalyzer(effective_analyzer) )
return false;
}
else {
interp = std::make_unique<binpac::WebSocket::WebSocket_Conn>(this);
effective_analyzer = this;
}
if ( config->HasField(analyzer_idx) ) {
const auto& analyzer_tag_val = config->GetField(analyzer_idx);
@ -60,13 +72,13 @@ bool WebSocket_Analyzer::Configure(zeek::RecordValPtr config) {
if ( ! analyzer )
return false;
return AddChildAnalyzer(analyzer);
return effective_analyzer->AddChildAnalyzer(analyzer);
}
else if ( config->GetField(use_dpd_idx)->AsBool() ) {
DBG_LOG(DBG_ANALYZER, "%s Configure() enables DPD via PIA_TCP", fmt_analyzer(this).c_str());
auto* pia = new analyzer::pia::PIA_TCP(Conn());
if ( AddChildAnalyzer(pia) ) {
if ( effective_analyzer->AddChildAnalyzer(pia) ) {
pia->FirstPacket(true, nullptr);
pia->FirstPacket(false, nullptr);
return true;
@ -87,10 +99,16 @@ void WebSocket_Analyzer::DeliverStream(int len, const u_char* data, bool orig) {
return;
}
try {
interp->NewData(orig, data, data + len);
} catch ( const binpac::Exception& e ) {
AnalyzerViolation(e.c_msg(), reinterpret_cast<const char*>(data), len);
if ( interp ) {
try {
interp->NewData(orig, data, data + len);
} catch ( const binpac::Exception& e ) {
AnalyzerViolation(e.c_msg(), reinterpret_cast<const char*>(data), len);
}
}
else {
// The Spicy analyzer was attached as a child directly.
ForwardStream(len, data, orig);
}
}

View file

@ -0,0 +1,18 @@
# Copyright (c) 2024 by the Zeek Project. See LICENSE for details.
%doc-id= Zeek::WebSocket;
%doc-description = "webSocket analyzer";
protocol analyzer spicy::WebSocket over TCP:
parse originator with WebSocket::Messages,
parse responder with WebSocket::Messages;
import WebSocket;
on WebSocket::Message -> event websocket_message($conn, $is_orig, self.opcode);
on WebSocket::Frame -> event websocket_frame($conn, $is_orig, self.fin, self.rsv, self.opcode, self.payload_len);
on WebSocket::CloseFrame -> event websocket_close($conn, $is_orig, self.status, self.reason);
on WebSocket::Frame::chunk -> event websocket_frame_data($conn, $is_orig, self.chunk);

View file

@ -0,0 +1,128 @@
# Copyright (c) 2024 by the Zeek Project. See LICENSE for details.
module WebSocket;
import spicy;
import zeek;
const OPCODE_CONTINUATION = 0x00;
const OPCODE_TEXT = 0x01;
const OPCODE_BINARY = 0x02;
const OPCODE_CLOSE = 0x08;
const OPCODE_PING = 0x09;
const OPCODE_PONG = 0x0a;
# Are bytes copied like a string in C++, or is that ref counted and cheap
# like it would be in Python?
function unmask(inout f: Frame, chunk: bytes): bytes {
if ( ! f.mask )
return chunk;
local unmasked_chunk: bytes;
local i = 0;
# This is very very expensive :-( Creates a new shared_ptr for
# each byte we iterate over it seems. Visible in the first
# flamegraph I looked at.
for ( c in chunk ) {
unmasked_chunk += c ^ f.masking_key[f.masking_key_idx % 4];
i++;
f.masking_key_idx++;
}
return unmasked_chunk;
}
type Frame = unit(m: Message) {
var payload_len: uint64;
var masking_key_idx: uint64;
var close_data: bytes;
var effective_opcode: uint8;
: bitfield(16) {
fin: 0 &convert=cast<bool>($$);
rsv: 1..3;
opcode: 4..7 &convert=cast<uint8>($$);
mask: 8 &convert=cast<bool>($$);
payload_len1: 9..15;
} &bit-order=spicy::BitOrder::MSB0;
# Verify that this is either a continuation frame, or the Message does not
# yet have a non-continuation opcode, but this frame does.
: void &requires=(m.opcode != OPCODE_CONTINUATION && self.opcode == OPCODE_CONTINUATION || m.opcode == OPCODE_CONTINUATION && self.opcode != OPCODE_CONTINUATION);
# Type/opcode to decide what to do with individual chunks.
: void {
self.effective_opcode = m.opcode != OPCODE_CONTINUATION ? m.opcode : self.opcode;
}
payload_len2: uint16 if (self.payload_len1 == 126);
payload_len8: uint64 if (self.payload_len1 == 127);
: void {
self.payload_len = self.payload_len1;
if ( self?.payload_len2 )
self.payload_len = self.payload_len2;
else if ( self?.payload_len8 )
self.payload_len = self.payload_len8;
}
# This being an uint8[] allows masking_key[x] indexing, while a bytes
# object would require *masking_key.at(i) which took roughly 20% more
# runtime when I tested it.
masking_key: uint8[] &size=4 if (self.mask);
chunk: bytes &size=self.payload_len &chunked &convert=unmask(self, $$) {
self.chunk = $$; # workaround to trigger websocket_frame_data
# which has no access to $$ (???)
# Forward TEXT and BINARY data to dowstream analyzers.
if ( self.effective_opcode == OPCODE_TEXT || self.effective_opcode == OPCODE_BINARY )
zeek::protocol_data_in(zeek::is_orig(), $$);
# Accumulate the unmasked data in close_data if this a close frame
# so it can be parsed by the outer Message. It's a bit of a hack.
if ( self.effective_opcode == OPCODE_CLOSE )
self.close_data += $$;
}
};
type CloseFrame = unit {
var status: uint16;
var reason: bytes;
: bytes &eod {
if ( |$$| > 0 ) {
self.status = cast<uint16>($$.sub(0, 2).to_uint(spicy::ByteOrder::Network));
self.reason = $$.sub(2, 0);
}
}
};
public type Message = unit {
# transient trickery
var done: bool = False;
var opcode: uint8;
# Keep the first one persistent to have access
# to the payload if it's a close frame.
first_frame: Frame(self) {
self.opcode = $$.opcode;
self.done = $$.fin;
}
: Frame(self)[] &until=(self.done) if (!self.done) foreach {
self.done = $$.fin;
}
: CloseFrame &parse-from=self.first_frame.close_data if (self.opcode == OPCODE_CLOSE);
on %done {
spicy::accept_input();
}
};
public type Messages = unit {
: Message[];
};

View file

@ -0,0 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
ts uid history service
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 ShADadR ssh,websocket,http
XXXXXXXXXX.XXXXXX ClEkJM2Vm5giqnMf4h ShADadR ssh,websocket,http

View file

@ -0,0 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
ts uid client server auth_success auth_attempts kex_alg host_key_alg
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.6 SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.5 F 4 curve25519-sha256 ssh-ed25519
XXXXXXXXXX.XXXXXX ClEkJM2Vm5giqnMf4h SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.6 SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.5 T 5 curve25519-sha256 ssh-ed25519

View file

@ -0,0 +1,50 @@
# @TEST-DOC: Test WebSocket events of BinPac and Spicy analyzer versions
#
# @TEST-REQUIRES: ${SCRIPTS}/have-spicy
#
# @TEST-EXEC: echo "jupyter-websocket.pcap" >>out
# @TEST-EXEC: zeek -b -r $TRACES/websocket/jupyter-websocket.pcap %INPUT >>out
# @TEST-EXEC: echo "message-too-big-status.pcap" >>out
# @TEST-EXEC: zeek -b -r $TRACES//websocket/message-too-big-status.pcap %INPUT >>out
#
# @TEST-EXEC: echo "jupyter-websocket.pcap" >>out.spicy
# @TEST-EXEC: zeek -b -r $TRACES/websocket/jupyter-websocket.pcap %INPUT WebSocket::use_spicy_analyzer=T >>out.spicy
# @TEST-EXEC: echo "message-too-big-status.pcap" >>out.spicy
# @TEST-EXEC: zeek -b -r $TRACES//websocket/message-too-big-status.pcap %INPUT WebSocket::use_spicy_analyzer=T >>out.spicy
# @TEST-EXEC: diff -u out.spicy out >&2
# @TEST-EXEC: test ! -f analyzer.log
# @TEST-EXEC: test ! -f weird.log
@load base/protocols/websocket
redef record connection += {
ws_data_len: count &default=0;
};
event websocket_established(c: connection, aid: count)
{
print "websocket_established", c$uid, aid, c$websocket;
}
event websocket_message(c: connection, is_orig: bool, opcode: count)
{
print "websocket_message", c$uid, is_orig, "opcode", WebSocket::opcodes[opcode], "data_len", c$ws_data_len;
c$ws_data_len = 0;
}
event websocket_frame(c: connection, is_orig: bool, fin: bool, rsv: count, opcode: count, payload_len: count)
{
print "websocket_frame", c$uid, is_orig, "fin", fin, "rsv", rsv, "opcode", WebSocket::opcodes[opcode], "payload_len", payload_len;
}
event websocket_frame_data(c: connection, is_orig: bool, data: string)
{
# Spicy and binpac differ for data events, just ensure they end up having the same total data length.
# print "websocket_frame_data", c$uid, is_orig, "len", |data|, "data", data[:120];
c$ws_data_len += |data|;
}
event websocket_close(c: connection, is_orig: bool, status: count, reason: string)
{
print "websocket_close", c$uid, is_orig, "status", status, "reason", reason;
}

View file

@ -0,0 +1,34 @@
# @TEST-DOC: Test SSH connection tunneled within WebSocket using wstunnel, comparing BinPac and Spicy.
#
# @TEST-REQUIRES: ${SCRIPTS}/have-spicy
#
# @TEST-EXEC: zeek -b -r $TRACES/websocket/wstunnel-ssh.pcap %INPUT
# @TEST-EXEC: zeek-cut -m ts uid history service < conn.log > conn.log.cut
# @TEST-EXEC: zeek-cut -m ts uid client server auth_success auth_attempts kex_alg host_key_alg < ssh.log > ssh.log.cut
# @TEST-EXEC: rm -v *log
# @TEST-EXEC: zeek -b -r $TRACES/websocket/wstunnel-ssh.pcap WebSocket::use_spicy_analyzer=T %INPUT
# @TEST-EXEC: zeek-cut -m ts uid history service < conn.log > conn.log.cut.spicy
# @TEST-EXEC: zeek-cut -m ts uid client server auth_success auth_attempts kex_alg host_key_alg < ssh.log > ssh.log.cut.spicy
#
# @TEST-EXEC: diff -u conn.log.cut.spicy conn.log.cut >&2
# @TEST-EXEC: diff -u ssh.log.cut.spicy ssh.log.cut >&2
# @TEST-EXEC: btest-diff conn.log.cut.spicy
# @TEST-EXEC: btest-diff ssh.log.cut.spicy
# @TEST-EXEC: test ! -f analyzer.log
# @TEST-EXEC: test ! -f weird.log
@load base/protocols/conn
@load base/protocols/ssh
@load base/protocols/websocket
# Make conn.log compatible, the spicy version uses SPICY_ANALYZER, so need
# to normalize the c$service entry (and do it in either case to keep determinism).
event connection_state_remove(c: connection) &priority=10
{
if ( "SPICY_WEBSOCKET" in c$service || "WEBSOCKET" in c$service )
{
delete c$service["SPICY_WEBSOCKET"];
delete c$service["WEBSOCKET"];
add c$service["WEBSOCKET"];
}
}