From c1a685a05dc2ad94cb52053dd4624b3255b26a0e Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Wed, 24 Jan 2024 20:00:40 +0100 Subject: [PATCH] websocket: Add Spicy parser version, too. The Spicy analyzer is added as a child analyzer when enabled and the WebSocket.cc logic dispatches between the BinPac and Spicy version. It substantially slower when tested against a somewhat artificial 2.4GB PCAP. The first flamegraph indicates that the unmask() function stands out with 35% of all samples, and above it shared_ptr samples. --- scripts/base/init-bare.zeek | 6 + .../protocol/websocket/CMakeLists.txt | 4 + src/analyzer/protocol/websocket/WebSocket.cc | 46 +++++-- src/analyzer/protocol/websocket/websocket.evt | 18 +++ .../protocol/websocket/websocket.spicy | 128 ++++++++++++++++++ .../conn.log.cut.spicy | 4 + .../ssh.log.cut.spicy | 4 + .../protocols/websocket/events-spicy.zeek | 50 +++++++ .../websocket/wstunnel-ssh-spicy.zeek | 34 +++++ 9 files changed, 280 insertions(+), 14 deletions(-) create mode 100644 src/analyzer/protocol/websocket/websocket.evt create mode 100644 src/analyzer/protocol/websocket/websocket.spicy create mode 100644 testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/conn.log.cut.spicy create mode 100644 testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/ssh.log.cut.spicy create mode 100644 testing/btest/scripts/base/protocols/websocket/events-spicy.zeek create mode 100644 testing/btest/scripts/base/protocols/websocket/wstunnel-ssh-spicy.zeek diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 8345d71290..dd0371ab09 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -470,6 +470,12 @@ export { ## Whether to enable DPD on WebSocket frame payload by default. const use_dpd_default = T &redef; + ## Whether to use the Spicy WebSocket protocol analyzer. + ## + ## As of now, the BinPac version has better performance, but + ## we may change the default in the future. + const use_spicy_analyzer = F &redef; + ## Record type that is passed to :zeek:see:`WebSocket::configure_analyzer`. ## ## This record allows to configure the WebSocket analyzer given diff --git a/src/analyzer/protocol/websocket/CMakeLists.txt b/src/analyzer/protocol/websocket/CMakeLists.txt index d522c58821..20688f61db 100644 --- a/src/analyzer/protocol/websocket/CMakeLists.txt +++ b/src/analyzer/protocol/websocket/CMakeLists.txt @@ -13,3 +13,7 @@ zeek_add_plugin( websocket.pac websocket-analyzer.pac websocket-protocol.pac) + +if (USE_SPICY_ANALYZERS) + spicy_add_analyzer(NAME WebSocket SOURCES websocket.spicy websocket.evt) +endif () diff --git a/src/analyzer/protocol/websocket/WebSocket.cc b/src/analyzer/protocol/websocket/WebSocket.cc index de098e472c..81318f6963 100644 --- a/src/analyzer/protocol/websocket/WebSocket.cc +++ b/src/analyzer/protocol/websocket/WebSocket.cc @@ -10,14 +10,7 @@ namespace zeek::analyzer::websocket { -WebSocket_Analyzer::WebSocket_Analyzer(Connection* conn) : analyzer::tcp::TCP_ApplicationAnalyzer("WebSocket", conn) { - // TODO: Consider approaches dispatching to optionally use a - // Spicy analyzer here instead of the BinPac interpreter. - // - // E.g. we could instantiate a SPICY_WEBSOCKET analyzer and pass it the necessary - // information and call DeliverStream() directly on it. - interp = std::make_unique(this); -} +WebSocket_Analyzer::WebSocket_Analyzer(Connection* conn) : analyzer::tcp::TCP_ApplicationAnalyzer("WebSocket", conn) {} void WebSocket_Analyzer::Init() { tcp::TCP_ApplicationAnalyzer::Init(); @@ -42,6 +35,25 @@ bool WebSocket_Analyzer::Configure(zeek::RecordValPtr config) { static const auto& config_type = id::find_type("WebSocket::AnalyzerConfig"); static int analyzer_idx = config_type->FieldOffset("analyzer"); static int use_dpd_idx = config_type->FieldOffset("use_dpd"); + static const bool use_spicy_analyzer = id::find_val("WebSocket::use_spicy_analyzer")->AsBool(); + + analyzer::Analyzer* effective_analyzer = nullptr; + + if ( use_spicy_analyzer ) { + static const auto* component = zeek::analyzer_mgr->Lookup("SPICY_WEBSOCKET"); + if ( ! component ) { + reporter->FatalError("SPICY_WEBSOCKET analyzer tag not available"); + return false; + } + + effective_analyzer = zeek::analyzer_mgr->InstantiateAnalyzer(component->Tag(), Conn()); + if ( ! AddChildAnalyzer(effective_analyzer) ) + return false; + } + else { + interp = std::make_unique(this); + effective_analyzer = this; + } if ( config->HasField(analyzer_idx) ) { const auto& analyzer_tag_val = config->GetField(analyzer_idx); @@ -60,13 +72,13 @@ bool WebSocket_Analyzer::Configure(zeek::RecordValPtr config) { if ( ! analyzer ) return false; - return AddChildAnalyzer(analyzer); + return effective_analyzer->AddChildAnalyzer(analyzer); } else if ( config->GetField(use_dpd_idx)->AsBool() ) { DBG_LOG(DBG_ANALYZER, "%s Configure() enables DPD via PIA_TCP", fmt_analyzer(this).c_str()); auto* pia = new analyzer::pia::PIA_TCP(Conn()); - if ( AddChildAnalyzer(pia) ) { + if ( effective_analyzer->AddChildAnalyzer(pia) ) { pia->FirstPacket(true, nullptr); pia->FirstPacket(false, nullptr); return true; @@ -87,10 +99,16 @@ void WebSocket_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { return; } - try { - interp->NewData(orig, data, data + len); - } catch ( const binpac::Exception& e ) { - AnalyzerViolation(e.c_msg(), reinterpret_cast(data), len); + if ( interp ) { + try { + interp->NewData(orig, data, data + len); + } catch ( const binpac::Exception& e ) { + AnalyzerViolation(e.c_msg(), reinterpret_cast(data), len); + } + } + else { + // The Spicy analyzer was attached as a child directly. + ForwardStream(len, data, orig); } } diff --git a/src/analyzer/protocol/websocket/websocket.evt b/src/analyzer/protocol/websocket/websocket.evt new file mode 100644 index 0000000000..ff182a5156 --- /dev/null +++ b/src/analyzer/protocol/websocket/websocket.evt @@ -0,0 +1,18 @@ +# Copyright (c) 2024 by the Zeek Project. See LICENSE for details. + +%doc-id= Zeek::WebSocket; +%doc-description = "webSocket analyzer"; + +protocol analyzer spicy::WebSocket over TCP: + parse originator with WebSocket::Messages, + parse responder with WebSocket::Messages; + +import WebSocket; + +on WebSocket::Message -> event websocket_message($conn, $is_orig, self.opcode); + +on WebSocket::Frame -> event websocket_frame($conn, $is_orig, self.fin, self.rsv, self.opcode, self.payload_len); + +on WebSocket::CloseFrame -> event websocket_close($conn, $is_orig, self.status, self.reason); + +on WebSocket::Frame::chunk -> event websocket_frame_data($conn, $is_orig, self.chunk); diff --git a/src/analyzer/protocol/websocket/websocket.spicy b/src/analyzer/protocol/websocket/websocket.spicy new file mode 100644 index 0000000000..9fbc74fdf4 --- /dev/null +++ b/src/analyzer/protocol/websocket/websocket.spicy @@ -0,0 +1,128 @@ +# Copyright (c) 2024 by the Zeek Project. See LICENSE for details. + +module WebSocket; + +import spicy; +import zeek; + +const OPCODE_CONTINUATION = 0x00; +const OPCODE_TEXT = 0x01; +const OPCODE_BINARY = 0x02; +const OPCODE_CLOSE = 0x08; +const OPCODE_PING = 0x09; +const OPCODE_PONG = 0x0a; + +# Are bytes copied like a string in C++, or is that ref counted and cheap +# like it would be in Python? +function unmask(inout f: Frame, chunk: bytes): bytes { + if ( ! f.mask ) + return chunk; + + local unmasked_chunk: bytes; + local i = 0; + # This is very very expensive :-( Creates a new shared_ptr for + # each byte we iterate over it seems. Visible in the first + # flamegraph I looked at. + for ( c in chunk ) { + unmasked_chunk += c ^ f.masking_key[f.masking_key_idx % 4]; + i++; + f.masking_key_idx++; + } + + return unmasked_chunk; +} + +type Frame = unit(m: Message) { + var payload_len: uint64; + var masking_key_idx: uint64; + var close_data: bytes; + var effective_opcode: uint8; + + : bitfield(16) { + fin: 0 &convert=cast($$); + rsv: 1..3; + opcode: 4..7 &convert=cast($$); + mask: 8 &convert=cast($$); + payload_len1: 9..15; + } &bit-order=spicy::BitOrder::MSB0; + + # Verify that this is either a continuation frame, or the Message does not + # yet have a non-continuation opcode, but this frame does. + : void &requires=(m.opcode != OPCODE_CONTINUATION && self.opcode == OPCODE_CONTINUATION || m.opcode == OPCODE_CONTINUATION && self.opcode != OPCODE_CONTINUATION); + + # Type/opcode to decide what to do with individual chunks. + : void { + self.effective_opcode = m.opcode != OPCODE_CONTINUATION ? m.opcode : self.opcode; + } + + payload_len2: uint16 if (self.payload_len1 == 126); + payload_len8: uint64 if (self.payload_len1 == 127); + + : void { + self.payload_len = self.payload_len1; + if ( self?.payload_len2 ) + self.payload_len = self.payload_len2; + else if ( self?.payload_len8 ) + self.payload_len = self.payload_len8; + } + + # This being an uint8[] allows masking_key[x] indexing, while a bytes + # object would require *masking_key.at(i) which took roughly 20% more + # runtime when I tested it. + masking_key: uint8[] &size=4 if (self.mask); + + chunk: bytes &size=self.payload_len &chunked &convert=unmask(self, $$) { + self.chunk = $$; # workaround to trigger websocket_frame_data + # which has no access to $$ (???) + + # Forward TEXT and BINARY data to dowstream analyzers. + if ( self.effective_opcode == OPCODE_TEXT || self.effective_opcode == OPCODE_BINARY ) + zeek::protocol_data_in(zeek::is_orig(), $$); + + # Accumulate the unmasked data in close_data if this a close frame + # so it can be parsed by the outer Message. It's a bit of a hack. + if ( self.effective_opcode == OPCODE_CLOSE ) + self.close_data += $$; + } +}; + +type CloseFrame = unit { + var status: uint16; + var reason: bytes; + + : bytes &eod { + if ( |$$| > 0 ) { + self.status = cast($$.sub(0, 2).to_uint(spicy::ByteOrder::Network)); + self.reason = $$.sub(2, 0); + } + } +}; + + +public type Message = unit { + # transient trickery + var done: bool = False; + + var opcode: uint8; + + # Keep the first one persistent to have access + # to the payload if it's a close frame. + first_frame: Frame(self) { + self.opcode = $$.opcode; + self.done = $$.fin; + } + + : Frame(self)[] &until=(self.done) if (!self.done) foreach { + self.done = $$.fin; + } + + : CloseFrame &parse-from=self.first_frame.close_data if (self.opcode == OPCODE_CLOSE); + + on %done { + spicy::accept_input(); + } +}; + +public type Messages = unit { + : Message[]; +}; diff --git a/testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/conn.log.cut.spicy b/testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/conn.log.cut.spicy new file mode 100644 index 0000000000..3e8944eb50 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/conn.log.cut.spicy @@ -0,0 +1,4 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +ts uid history service +XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 ShADadR ssh,websocket,http +XXXXXXXXXX.XXXXXX ClEkJM2Vm5giqnMf4h ShADadR ssh,websocket,http diff --git a/testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/ssh.log.cut.spicy b/testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/ssh.log.cut.spicy new file mode 100644 index 0000000000..0780213a89 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.protocols.websocket.wstunnel-ssh-spicy/ssh.log.cut.spicy @@ -0,0 +1,4 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +ts uid client server auth_success auth_attempts kex_alg host_key_alg +XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.6 SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.5 F 4 curve25519-sha256 ssh-ed25519 +XXXXXXXXXX.XXXXXX ClEkJM2Vm5giqnMf4h SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.6 SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.5 T 5 curve25519-sha256 ssh-ed25519 diff --git a/testing/btest/scripts/base/protocols/websocket/events-spicy.zeek b/testing/btest/scripts/base/protocols/websocket/events-spicy.zeek new file mode 100644 index 0000000000..87c27dbd73 --- /dev/null +++ b/testing/btest/scripts/base/protocols/websocket/events-spicy.zeek @@ -0,0 +1,50 @@ +# @TEST-DOC: Test WebSocket events of BinPac and Spicy analyzer versions +# +# @TEST-REQUIRES: ${SCRIPTS}/have-spicy +# +# @TEST-EXEC: echo "jupyter-websocket.pcap" >>out +# @TEST-EXEC: zeek -b -r $TRACES/websocket/jupyter-websocket.pcap %INPUT >>out +# @TEST-EXEC: echo "message-too-big-status.pcap" >>out +# @TEST-EXEC: zeek -b -r $TRACES//websocket/message-too-big-status.pcap %INPUT >>out +# +# @TEST-EXEC: echo "jupyter-websocket.pcap" >>out.spicy +# @TEST-EXEC: zeek -b -r $TRACES/websocket/jupyter-websocket.pcap %INPUT WebSocket::use_spicy_analyzer=T >>out.spicy +# @TEST-EXEC: echo "message-too-big-status.pcap" >>out.spicy +# @TEST-EXEC: zeek -b -r $TRACES//websocket/message-too-big-status.pcap %INPUT WebSocket::use_spicy_analyzer=T >>out.spicy +# @TEST-EXEC: diff -u out.spicy out >&2 +# @TEST-EXEC: test ! -f analyzer.log +# @TEST-EXEC: test ! -f weird.log + +@load base/protocols/websocket + +redef record connection += { + ws_data_len: count &default=0; +}; + +event websocket_established(c: connection, aid: count) + { + print "websocket_established", c$uid, aid, c$websocket; + } + +event websocket_message(c: connection, is_orig: bool, opcode: count) + { + print "websocket_message", c$uid, is_orig, "opcode", WebSocket::opcodes[opcode], "data_len", c$ws_data_len; + c$ws_data_len = 0; + } + +event websocket_frame(c: connection, is_orig: bool, fin: bool, rsv: count, opcode: count, payload_len: count) + { + print "websocket_frame", c$uid, is_orig, "fin", fin, "rsv", rsv, "opcode", WebSocket::opcodes[opcode], "payload_len", payload_len; + } + +event websocket_frame_data(c: connection, is_orig: bool, data: string) + { + # Spicy and binpac differ for data events, just ensure they end up having the same total data length. + # print "websocket_frame_data", c$uid, is_orig, "len", |data|, "data", data[:120]; + c$ws_data_len += |data|; + } + +event websocket_close(c: connection, is_orig: bool, status: count, reason: string) + { + print "websocket_close", c$uid, is_orig, "status", status, "reason", reason; + } diff --git a/testing/btest/scripts/base/protocols/websocket/wstunnel-ssh-spicy.zeek b/testing/btest/scripts/base/protocols/websocket/wstunnel-ssh-spicy.zeek new file mode 100644 index 0000000000..c4d7a659be --- /dev/null +++ b/testing/btest/scripts/base/protocols/websocket/wstunnel-ssh-spicy.zeek @@ -0,0 +1,34 @@ +# @TEST-DOC: Test SSH connection tunneled within WebSocket using wstunnel, comparing BinPac and Spicy. +# +# @TEST-REQUIRES: ${SCRIPTS}/have-spicy +# +# @TEST-EXEC: zeek -b -r $TRACES/websocket/wstunnel-ssh.pcap %INPUT +# @TEST-EXEC: zeek-cut -m ts uid history service < conn.log > conn.log.cut +# @TEST-EXEC: zeek-cut -m ts uid client server auth_success auth_attempts kex_alg host_key_alg < ssh.log > ssh.log.cut +# @TEST-EXEC: rm -v *log +# @TEST-EXEC: zeek -b -r $TRACES/websocket/wstunnel-ssh.pcap WebSocket::use_spicy_analyzer=T %INPUT +# @TEST-EXEC: zeek-cut -m ts uid history service < conn.log > conn.log.cut.spicy +# @TEST-EXEC: zeek-cut -m ts uid client server auth_success auth_attempts kex_alg host_key_alg < ssh.log > ssh.log.cut.spicy +# +# @TEST-EXEC: diff -u conn.log.cut.spicy conn.log.cut >&2 +# @TEST-EXEC: diff -u ssh.log.cut.spicy ssh.log.cut >&2 +# @TEST-EXEC: btest-diff conn.log.cut.spicy +# @TEST-EXEC: btest-diff ssh.log.cut.spicy +# @TEST-EXEC: test ! -f analyzer.log +# @TEST-EXEC: test ! -f weird.log + +@load base/protocols/conn +@load base/protocols/ssh +@load base/protocols/websocket + +# Make conn.log compatible, the spicy version uses SPICY_ANALYZER, so need +# to normalize the c$service entry (and do it in either case to keep determinism). +event connection_state_remove(c: connection) &priority=10 + { + if ( "SPICY_WEBSOCKET" in c$service || "WEBSOCKET" in c$service ) + { + delete c$service["SPICY_WEBSOCKET"]; + delete c$service["WEBSOCKET"]; + add c$service["WEBSOCKET"]; + } + }