mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
websocket: Add Spicy parser version, too.
The Spicy analyzer is added as a child analyzer when enabled and the WebSocket.cc logic dispatches between the BinPac and Spicy version. It substantially slower when tested against a somewhat artificial 2.4GB PCAP. The first flamegraph indicates that the unmask() function stands out with 35% of all samples, and above it shared_ptr samples.
This commit is contained in:
parent
160ccda45f
commit
c1a685a05d
9 changed files with 280 additions and 14 deletions
|
@ -470,6 +470,12 @@ export {
|
||||||
## Whether to enable DPD on WebSocket frame payload by default.
|
## Whether to enable DPD on WebSocket frame payload by default.
|
||||||
const use_dpd_default = T &redef;
|
const use_dpd_default = T &redef;
|
||||||
|
|
||||||
|
## Whether to use the Spicy WebSocket protocol analyzer.
|
||||||
|
##
|
||||||
|
## As of now, the BinPac version has better performance, but
|
||||||
|
## we may change the default in the future.
|
||||||
|
const use_spicy_analyzer = F &redef;
|
||||||
|
|
||||||
## Record type that is passed to :zeek:see:`WebSocket::configure_analyzer`.
|
## Record type that is passed to :zeek:see:`WebSocket::configure_analyzer`.
|
||||||
##
|
##
|
||||||
## This record allows to configure the WebSocket analyzer given
|
## This record allows to configure the WebSocket analyzer given
|
||||||
|
|
|
@ -13,3 +13,7 @@ zeek_add_plugin(
|
||||||
websocket.pac
|
websocket.pac
|
||||||
websocket-analyzer.pac
|
websocket-analyzer.pac
|
||||||
websocket-protocol.pac)
|
websocket-protocol.pac)
|
||||||
|
|
||||||
|
if (USE_SPICY_ANALYZERS)
|
||||||
|
spicy_add_analyzer(NAME WebSocket SOURCES websocket.spicy websocket.evt)
|
||||||
|
endif ()
|
||||||
|
|
|
@ -10,14 +10,7 @@
|
||||||
|
|
||||||
namespace zeek::analyzer::websocket {
|
namespace zeek::analyzer::websocket {
|
||||||
|
|
||||||
WebSocket_Analyzer::WebSocket_Analyzer(Connection* conn) : analyzer::tcp::TCP_ApplicationAnalyzer("WebSocket", conn) {
|
WebSocket_Analyzer::WebSocket_Analyzer(Connection* conn) : analyzer::tcp::TCP_ApplicationAnalyzer("WebSocket", conn) {}
|
||||||
// TODO: Consider approaches dispatching to optionally use a
|
|
||||||
// Spicy analyzer here instead of the BinPac interpreter.
|
|
||||||
//
|
|
||||||
// E.g. we could instantiate a SPICY_WEBSOCKET analyzer and pass it the necessary
|
|
||||||
// information and call DeliverStream() directly on it.
|
|
||||||
interp = std::make_unique<binpac::WebSocket::WebSocket_Conn>(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
void WebSocket_Analyzer::Init() {
|
void WebSocket_Analyzer::Init() {
|
||||||
tcp::TCP_ApplicationAnalyzer::Init();
|
tcp::TCP_ApplicationAnalyzer::Init();
|
||||||
|
@ -42,6 +35,25 @@ bool WebSocket_Analyzer::Configure(zeek::RecordValPtr config) {
|
||||||
static const auto& config_type = id::find_type<zeek::RecordType>("WebSocket::AnalyzerConfig");
|
static const auto& config_type = id::find_type<zeek::RecordType>("WebSocket::AnalyzerConfig");
|
||||||
static int analyzer_idx = config_type->FieldOffset("analyzer");
|
static int analyzer_idx = config_type->FieldOffset("analyzer");
|
||||||
static int use_dpd_idx = config_type->FieldOffset("use_dpd");
|
static int use_dpd_idx = config_type->FieldOffset("use_dpd");
|
||||||
|
static const bool use_spicy_analyzer = id::find_val<BoolVal>("WebSocket::use_spicy_analyzer")->AsBool();
|
||||||
|
|
||||||
|
analyzer::Analyzer* effective_analyzer = nullptr;
|
||||||
|
|
||||||
|
if ( use_spicy_analyzer ) {
|
||||||
|
static const auto* component = zeek::analyzer_mgr->Lookup("SPICY_WEBSOCKET");
|
||||||
|
if ( ! component ) {
|
||||||
|
reporter->FatalError("SPICY_WEBSOCKET analyzer tag not available");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
effective_analyzer = zeek::analyzer_mgr->InstantiateAnalyzer(component->Tag(), Conn());
|
||||||
|
if ( ! AddChildAnalyzer(effective_analyzer) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
interp = std::make_unique<binpac::WebSocket::WebSocket_Conn>(this);
|
||||||
|
effective_analyzer = this;
|
||||||
|
}
|
||||||
|
|
||||||
if ( config->HasField(analyzer_idx) ) {
|
if ( config->HasField(analyzer_idx) ) {
|
||||||
const auto& analyzer_tag_val = config->GetField(analyzer_idx);
|
const auto& analyzer_tag_val = config->GetField(analyzer_idx);
|
||||||
|
@ -60,13 +72,13 @@ bool WebSocket_Analyzer::Configure(zeek::RecordValPtr config) {
|
||||||
if ( ! analyzer )
|
if ( ! analyzer )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return AddChildAnalyzer(analyzer);
|
return effective_analyzer->AddChildAnalyzer(analyzer);
|
||||||
}
|
}
|
||||||
else if ( config->GetField(use_dpd_idx)->AsBool() ) {
|
else if ( config->GetField(use_dpd_idx)->AsBool() ) {
|
||||||
DBG_LOG(DBG_ANALYZER, "%s Configure() enables DPD via PIA_TCP", fmt_analyzer(this).c_str());
|
DBG_LOG(DBG_ANALYZER, "%s Configure() enables DPD via PIA_TCP", fmt_analyzer(this).c_str());
|
||||||
|
|
||||||
auto* pia = new analyzer::pia::PIA_TCP(Conn());
|
auto* pia = new analyzer::pia::PIA_TCP(Conn());
|
||||||
if ( AddChildAnalyzer(pia) ) {
|
if ( effective_analyzer->AddChildAnalyzer(pia) ) {
|
||||||
pia->FirstPacket(true, nullptr);
|
pia->FirstPacket(true, nullptr);
|
||||||
pia->FirstPacket(false, nullptr);
|
pia->FirstPacket(false, nullptr);
|
||||||
return true;
|
return true;
|
||||||
|
@ -87,12 +99,18 @@ void WebSocket_Analyzer::DeliverStream(int len, const u_char* data, bool orig) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( interp ) {
|
||||||
try {
|
try {
|
||||||
interp->NewData(orig, data, data + len);
|
interp->NewData(orig, data, data + len);
|
||||||
} catch ( const binpac::Exception& e ) {
|
} catch ( const binpac::Exception& e ) {
|
||||||
AnalyzerViolation(e.c_msg(), reinterpret_cast<const char*>(data), len);
|
AnalyzerViolation(e.c_msg(), reinterpret_cast<const char*>(data), len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
// The Spicy analyzer was attached as a child directly.
|
||||||
|
ForwardStream(len, data, orig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void WebSocket_Analyzer::Undelivered(uint64_t seq, int len, bool orig) {
|
void WebSocket_Analyzer::Undelivered(uint64_t seq, int len, bool orig) {
|
||||||
analyzer::tcp::TCP_ApplicationAnalyzer::Undelivered(seq, len, orig);
|
analyzer::tcp::TCP_ApplicationAnalyzer::Undelivered(seq, len, orig);
|
||||||
|
|
18
src/analyzer/protocol/websocket/websocket.evt
Normal file
18
src/analyzer/protocol/websocket/websocket.evt
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
# Copyright (c) 2024 by the Zeek Project. See LICENSE for details.
|
||||||
|
|
||||||
|
%doc-id= Zeek::WebSocket;
|
||||||
|
%doc-description = "webSocket analyzer";
|
||||||
|
|
||||||
|
protocol analyzer spicy::WebSocket over TCP:
|
||||||
|
parse originator with WebSocket::Messages,
|
||||||
|
parse responder with WebSocket::Messages;
|
||||||
|
|
||||||
|
import WebSocket;
|
||||||
|
|
||||||
|
on WebSocket::Message -> event websocket_message($conn, $is_orig, self.opcode);
|
||||||
|
|
||||||
|
on WebSocket::Frame -> event websocket_frame($conn, $is_orig, self.fin, self.rsv, self.opcode, self.payload_len);
|
||||||
|
|
||||||
|
on WebSocket::CloseFrame -> event websocket_close($conn, $is_orig, self.status, self.reason);
|
||||||
|
|
||||||
|
on WebSocket::Frame::chunk -> event websocket_frame_data($conn, $is_orig, self.chunk);
|
128
src/analyzer/protocol/websocket/websocket.spicy
Normal file
128
src/analyzer/protocol/websocket/websocket.spicy
Normal file
|
@ -0,0 +1,128 @@
|
||||||
|
# Copyright (c) 2024 by the Zeek Project. See LICENSE for details.
|
||||||
|
|
||||||
|
module WebSocket;
|
||||||
|
|
||||||
|
import spicy;
|
||||||
|
import zeek;
|
||||||
|
|
||||||
|
const OPCODE_CONTINUATION = 0x00;
|
||||||
|
const OPCODE_TEXT = 0x01;
|
||||||
|
const OPCODE_BINARY = 0x02;
|
||||||
|
const OPCODE_CLOSE = 0x08;
|
||||||
|
const OPCODE_PING = 0x09;
|
||||||
|
const OPCODE_PONG = 0x0a;
|
||||||
|
|
||||||
|
# Are bytes copied like a string in C++, or is that ref counted and cheap
|
||||||
|
# like it would be in Python?
|
||||||
|
function unmask(inout f: Frame, chunk: bytes): bytes {
|
||||||
|
if ( ! f.mask )
|
||||||
|
return chunk;
|
||||||
|
|
||||||
|
local unmasked_chunk: bytes;
|
||||||
|
local i = 0;
|
||||||
|
# This is very very expensive :-( Creates a new shared_ptr for
|
||||||
|
# each byte we iterate over it seems. Visible in the first
|
||||||
|
# flamegraph I looked at.
|
||||||
|
for ( c in chunk ) {
|
||||||
|
unmasked_chunk += c ^ f.masking_key[f.masking_key_idx % 4];
|
||||||
|
i++;
|
||||||
|
f.masking_key_idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return unmasked_chunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
type Frame = unit(m: Message) {
|
||||||
|
var payload_len: uint64;
|
||||||
|
var masking_key_idx: uint64;
|
||||||
|
var close_data: bytes;
|
||||||
|
var effective_opcode: uint8;
|
||||||
|
|
||||||
|
: bitfield(16) {
|
||||||
|
fin: 0 &convert=cast<bool>($$);
|
||||||
|
rsv: 1..3;
|
||||||
|
opcode: 4..7 &convert=cast<uint8>($$);
|
||||||
|
mask: 8 &convert=cast<bool>($$);
|
||||||
|
payload_len1: 9..15;
|
||||||
|
} &bit-order=spicy::BitOrder::MSB0;
|
||||||
|
|
||||||
|
# Verify that this is either a continuation frame, or the Message does not
|
||||||
|
# yet have a non-continuation opcode, but this frame does.
|
||||||
|
: void &requires=(m.opcode != OPCODE_CONTINUATION && self.opcode == OPCODE_CONTINUATION || m.opcode == OPCODE_CONTINUATION && self.opcode != OPCODE_CONTINUATION);
|
||||||
|
|
||||||
|
# Type/opcode to decide what to do with individual chunks.
|
||||||
|
: void {
|
||||||
|
self.effective_opcode = m.opcode != OPCODE_CONTINUATION ? m.opcode : self.opcode;
|
||||||
|
}
|
||||||
|
|
||||||
|
payload_len2: uint16 if (self.payload_len1 == 126);
|
||||||
|
payload_len8: uint64 if (self.payload_len1 == 127);
|
||||||
|
|
||||||
|
: void {
|
||||||
|
self.payload_len = self.payload_len1;
|
||||||
|
if ( self?.payload_len2 )
|
||||||
|
self.payload_len = self.payload_len2;
|
||||||
|
else if ( self?.payload_len8 )
|
||||||
|
self.payload_len = self.payload_len8;
|
||||||
|
}
|
||||||
|
|
||||||
|
# This being an uint8[] allows masking_key[x] indexing, while a bytes
|
||||||
|
# object would require *masking_key.at(i) which took roughly 20% more
|
||||||
|
# runtime when I tested it.
|
||||||
|
masking_key: uint8[] &size=4 if (self.mask);
|
||||||
|
|
||||||
|
chunk: bytes &size=self.payload_len &chunked &convert=unmask(self, $$) {
|
||||||
|
self.chunk = $$; # workaround to trigger websocket_frame_data
|
||||||
|
# which has no access to $$ (???)
|
||||||
|
|
||||||
|
# Forward TEXT and BINARY data to dowstream analyzers.
|
||||||
|
if ( self.effective_opcode == OPCODE_TEXT || self.effective_opcode == OPCODE_BINARY )
|
||||||
|
zeek::protocol_data_in(zeek::is_orig(), $$);
|
||||||
|
|
||||||
|
# Accumulate the unmasked data in close_data if this a close frame
|
||||||
|
# so it can be parsed by the outer Message. It's a bit of a hack.
|
||||||
|
if ( self.effective_opcode == OPCODE_CLOSE )
|
||||||
|
self.close_data += $$;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
type CloseFrame = unit {
|
||||||
|
var status: uint16;
|
||||||
|
var reason: bytes;
|
||||||
|
|
||||||
|
: bytes &eod {
|
||||||
|
if ( |$$| > 0 ) {
|
||||||
|
self.status = cast<uint16>($$.sub(0, 2).to_uint(spicy::ByteOrder::Network));
|
||||||
|
self.reason = $$.sub(2, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
public type Message = unit {
|
||||||
|
# transient trickery
|
||||||
|
var done: bool = False;
|
||||||
|
|
||||||
|
var opcode: uint8;
|
||||||
|
|
||||||
|
# Keep the first one persistent to have access
|
||||||
|
# to the payload if it's a close frame.
|
||||||
|
first_frame: Frame(self) {
|
||||||
|
self.opcode = $$.opcode;
|
||||||
|
self.done = $$.fin;
|
||||||
|
}
|
||||||
|
|
||||||
|
: Frame(self)[] &until=(self.done) if (!self.done) foreach {
|
||||||
|
self.done = $$.fin;
|
||||||
|
}
|
||||||
|
|
||||||
|
: CloseFrame &parse-from=self.first_frame.close_data if (self.opcode == OPCODE_CLOSE);
|
||||||
|
|
||||||
|
on %done {
|
||||||
|
spicy::accept_input();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
public type Messages = unit {
|
||||||
|
: Message[];
|
||||||
|
};
|
|
@ -0,0 +1,4 @@
|
||||||
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
|
ts uid history service
|
||||||
|
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 ShADadR ssh,websocket,http
|
||||||
|
XXXXXXXXXX.XXXXXX ClEkJM2Vm5giqnMf4h ShADadR ssh,websocket,http
|
|
@ -0,0 +1,4 @@
|
||||||
|
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||||
|
ts uid client server auth_success auth_attempts kex_alg host_key_alg
|
||||||
|
XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.6 SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.5 F 4 curve25519-sha256 ssh-ed25519
|
||||||
|
XXXXXXXXXX.XXXXXX ClEkJM2Vm5giqnMf4h SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.6 SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.5 T 5 curve25519-sha256 ssh-ed25519
|
|
@ -0,0 +1,50 @@
|
||||||
|
# @TEST-DOC: Test WebSocket events of BinPac and Spicy analyzer versions
|
||||||
|
#
|
||||||
|
# @TEST-REQUIRES: ${SCRIPTS}/have-spicy
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: echo "jupyter-websocket.pcap" >>out
|
||||||
|
# @TEST-EXEC: zeek -b -r $TRACES/websocket/jupyter-websocket.pcap %INPUT >>out
|
||||||
|
# @TEST-EXEC: echo "message-too-big-status.pcap" >>out
|
||||||
|
# @TEST-EXEC: zeek -b -r $TRACES//websocket/message-too-big-status.pcap %INPUT >>out
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: echo "jupyter-websocket.pcap" >>out.spicy
|
||||||
|
# @TEST-EXEC: zeek -b -r $TRACES/websocket/jupyter-websocket.pcap %INPUT WebSocket::use_spicy_analyzer=T >>out.spicy
|
||||||
|
# @TEST-EXEC: echo "message-too-big-status.pcap" >>out.spicy
|
||||||
|
# @TEST-EXEC: zeek -b -r $TRACES//websocket/message-too-big-status.pcap %INPUT WebSocket::use_spicy_analyzer=T >>out.spicy
|
||||||
|
# @TEST-EXEC: diff -u out.spicy out >&2
|
||||||
|
# @TEST-EXEC: test ! -f analyzer.log
|
||||||
|
# @TEST-EXEC: test ! -f weird.log
|
||||||
|
|
||||||
|
@load base/protocols/websocket
|
||||||
|
|
||||||
|
redef record connection += {
|
||||||
|
ws_data_len: count &default=0;
|
||||||
|
};
|
||||||
|
|
||||||
|
event websocket_established(c: connection, aid: count)
|
||||||
|
{
|
||||||
|
print "websocket_established", c$uid, aid, c$websocket;
|
||||||
|
}
|
||||||
|
|
||||||
|
event websocket_message(c: connection, is_orig: bool, opcode: count)
|
||||||
|
{
|
||||||
|
print "websocket_message", c$uid, is_orig, "opcode", WebSocket::opcodes[opcode], "data_len", c$ws_data_len;
|
||||||
|
c$ws_data_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
event websocket_frame(c: connection, is_orig: bool, fin: bool, rsv: count, opcode: count, payload_len: count)
|
||||||
|
{
|
||||||
|
print "websocket_frame", c$uid, is_orig, "fin", fin, "rsv", rsv, "opcode", WebSocket::opcodes[opcode], "payload_len", payload_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
event websocket_frame_data(c: connection, is_orig: bool, data: string)
|
||||||
|
{
|
||||||
|
# Spicy and binpac differ for data events, just ensure they end up having the same total data length.
|
||||||
|
# print "websocket_frame_data", c$uid, is_orig, "len", |data|, "data", data[:120];
|
||||||
|
c$ws_data_len += |data|;
|
||||||
|
}
|
||||||
|
|
||||||
|
event websocket_close(c: connection, is_orig: bool, status: count, reason: string)
|
||||||
|
{
|
||||||
|
print "websocket_close", c$uid, is_orig, "status", status, "reason", reason;
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
# @TEST-DOC: Test SSH connection tunneled within WebSocket using wstunnel, comparing BinPac and Spicy.
|
||||||
|
#
|
||||||
|
# @TEST-REQUIRES: ${SCRIPTS}/have-spicy
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: zeek -b -r $TRACES/websocket/wstunnel-ssh.pcap %INPUT
|
||||||
|
# @TEST-EXEC: zeek-cut -m ts uid history service < conn.log > conn.log.cut
|
||||||
|
# @TEST-EXEC: zeek-cut -m ts uid client server auth_success auth_attempts kex_alg host_key_alg < ssh.log > ssh.log.cut
|
||||||
|
# @TEST-EXEC: rm -v *log
|
||||||
|
# @TEST-EXEC: zeek -b -r $TRACES/websocket/wstunnel-ssh.pcap WebSocket::use_spicy_analyzer=T %INPUT
|
||||||
|
# @TEST-EXEC: zeek-cut -m ts uid history service < conn.log > conn.log.cut.spicy
|
||||||
|
# @TEST-EXEC: zeek-cut -m ts uid client server auth_success auth_attempts kex_alg host_key_alg < ssh.log > ssh.log.cut.spicy
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: diff -u conn.log.cut.spicy conn.log.cut >&2
|
||||||
|
# @TEST-EXEC: diff -u ssh.log.cut.spicy ssh.log.cut >&2
|
||||||
|
# @TEST-EXEC: btest-diff conn.log.cut.spicy
|
||||||
|
# @TEST-EXEC: btest-diff ssh.log.cut.spicy
|
||||||
|
# @TEST-EXEC: test ! -f analyzer.log
|
||||||
|
# @TEST-EXEC: test ! -f weird.log
|
||||||
|
|
||||||
|
@load base/protocols/conn
|
||||||
|
@load base/protocols/ssh
|
||||||
|
@load base/protocols/websocket
|
||||||
|
|
||||||
|
# Make conn.log compatible, the spicy version uses SPICY_ANALYZER, so need
|
||||||
|
# to normalize the c$service entry (and do it in either case to keep determinism).
|
||||||
|
event connection_state_remove(c: connection) &priority=10
|
||||||
|
{
|
||||||
|
if ( "SPICY_WEBSOCKET" in c$service || "WEBSOCKET" in c$service )
|
||||||
|
{
|
||||||
|
delete c$service["SPICY_WEBSOCKET"];
|
||||||
|
delete c$service["WEBSOCKET"];
|
||||||
|
add c$service["WEBSOCKET"];
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue