Merge remote-tracking branch 'origin/topic/awelzel/4440-websocket-pong-timeout'

* origin/topic/awelzel/4440-websocket-pong-timeout:
  btest/cluster/websocket: Update tests for new event signature
  cluster/websocket: Propagate code and reason to websocket_client_lost()
  cluster/websocket: Support configurable ping interval
  IXWebSocket: Bump to improve O^2 rxbuf erase()
This commit is contained in:
Arne Welzel 2025-05-13 18:26:22 +02:00
commit 8f60d37ad0
32 changed files with 186 additions and 30 deletions

17
CHANGES
View file

@ -1,3 +1,20 @@
8.0.0-dev.92 | 2025-05-13 18:26:22 +0200
* btest/cluster/websocket: Update tests for new event signature (Arne Welzel, Corelight)
* GH-4440: cluster/websocket: Propagate code and reason to websocket_client_lost() (Arne Welzel, Corelight)
This allows to get visibility into the reason why ixwebsocket or the
client decided to disconnect.
* cluster/websocket: Support configurable ping interval (Arne Welzel, Corelight)
Primarily for testing purposes and maybe the hard-coded 5 seconds is too
aggressive for some deployments, so makes sense for it to be
configurable.
* GH-4440: IXWebSocket: Bump to improve O^2 rxbuf erase() (Arne Welzel, Corelight)
8.0.0-dev.87 | 2025-05-12 11:33:38 -0700
* Fix clang-tidy `bugprone-inc-dec-in-conditions` report in Spicy plugins glue compiler (Benjamin Bannier, Corelight)

View file

@ -1 +1 @@
8.0.0-dev.87
8.0.0-dev.92

View file

@ -85,6 +85,9 @@ export {
## is incremented when the maximum queue size is reached.
const default_websocket_max_event_queue_size = 32 &redef;
## The default ping interval for WebSocket clients.
const default_websocket_ping_interval = 5 sec &redef;
## Setting a default dir will, for persistent backends that have not
## been given an explicit file path via :zeek:see:`Cluster::stores`,
## automatically create a path within this dir that is based on the name of
@ -365,6 +368,10 @@ export {
listen_port: port;
## The maximum event queue size for this server.
max_event_queue_size: count &default=default_websocket_max_event_queue_size;
## Ping interval to use. A WebSocket client not responding to
## the pings will be disconnected. Set to a negative value to
## disable pings. Subsecond intervals are currently not supported.
ping_interval: interval &default=default_websocket_ping_interval;
## The TLS options used for this WebSocket server. By default,
## TLS is disabled. See also :zeek:see:`Cluster::WebSocketTLSOptions`.
tls_options: WebSocketTLSOptions &default=WebSocketTLSOptions();
@ -673,10 +680,11 @@ event websocket_client_added(endpoint: EndpointInfo, subscriptions: string_vec)
Cluster::log(msg);
}
event websocket_client_lost(endpoint: EndpointInfo)
event websocket_client_lost(endpoint: EndpointInfo, code: count, reason: string)
{
local msg = fmt("WebSocket client '%s' (%s:%d) gone",
endpoint$id, endpoint$network$address, endpoint$network$bound_port);
local msg = fmt("WebSocket client '%s' (%s:%d) gone with code %d%s",
endpoint$id, endpoint$network$address, endpoint$network$bound_port, code,
|reason| > 0 ? fmt(" and reason '%s'", reason) : "");
Cluster::log(msg);
}

View file

@ -198,6 +198,13 @@ function Cluster::__listen_websocket%(options: WebSocketServerOptions%): bool
};
server_options.max_event_queue_size = options_rec->GetField<zeek::CountVal>("max_event_queue_size")->Get();
double ping_interval = options_rec->GetField<zeek::IntervalVal>("ping_interval")->Get();
if ( ping_interval < 0.0 )
server_options.ping_interval_seconds = -1;
else
server_options.ping_interval_seconds = static_cast<int>(ping_interval);
server_options.tls_options = std::move(tls_options);
auto result = zeek::cluster::manager->ListenWebSocket(server_options);

View file

@ -135,7 +135,8 @@ std::unique_ptr<WebSocketServer> StartServer(std::unique_ptr<WebSocketEventDispa
dispatcher->QueueForProcessing(WebSocketMessage{id, msg->str});
}
else if ( msg->type == ix::WebSocketMessageType::Close ) {
dispatcher->QueueForProcessing(WebSocketClose{id});
auto& ci = msg->closeInfo;
dispatcher->QueueForProcessing(WebSocketClose{id, ci.code, std::move(ci.reason)});
}
else if ( msg->type == ix::WebSocketMessageType::Error ) {
dispatcher->QueueForProcessing(WebSocketClose{id});

View file

@ -356,7 +356,8 @@ void WebSocketEventDispatcher::Process(const WebSocketClose& close) {
// should be the last event related to this WebSocket client.
auto rec = zeek::cluster::detail::bif::make_endpoint_info(backend->NodeId(), wsc->getRemoteIp(),
wsc->getRemotePort(), TRANSPORT_TCP);
zeek::event_mgr.Enqueue(Cluster::websocket_client_lost, std::move(rec));
zeek::event_mgr.Enqueue(Cluster::websocket_client_lost, std::move(rec), zeek::val_mgr->Count(close.code),
zeek::make_intrusive<zeek::StringVal>(close.reason));
}
clients.erase(it);

View file

@ -131,6 +131,8 @@ struct WebSocketOpen {
// A WebSocket client disconnected.
struct WebSocketClose {
std::string id;
uint16_t code;
std::string reason;
};
// A WebSocket client send a message.

@ -1 +1 @@
Subproject commit 1babe1d8e13573c073fa739fd2597dd0a6828b6b
Subproject commit abb632fb0f778258e9a8c8c1b3a262f804755fdb

View file

@ -10,4 +10,8 @@ event websocket_client_added%(endpoint: EndpointInfo, subscriptions: string_vec%
## Generated when a WebSocket client was lost.
##
## endpoint: Various information about the WebSocket client.
event websocket_client_lost%(endpoint: EndpointInfo%);
## code: The code sent by the client in its CLOSE frame, or a code generated
## internally if the server disconnected the client.
## reason: The reason sent by the client in its CLOSE frame, or a reason generated
## internally if the server disconnected the client.
event websocket_client_lost%(endpoint: EndpointInfo, code: count, reason: string%);

View file

@ -1,5 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/main.zeek, line 666: Already listening on 127.0.0.1:<port> (Cluster::__listen_websocket(ws_opts_x))
error in <...>/main.zeek, line 666: Already listening on 127.0.0.1:<port> (Cluster::__listen_websocket(ws_opts_wss_port))
error in <...>/main.zeek, line 666: Already listening on 127.0.0.1:<port> (Cluster::__listen_websocket(ws_opts_qs))
error in <...>/main.zeek, line 673: Already listening on 127.0.0.1:<port> (Cluster::__listen_websocket(ws_opts_x))
error in <...>/main.zeek, line 673: Already listening on 127.0.0.1:<port> (Cluster::__listen_websocket(ws_opts_wss_port))
error in <...>/main.zeek, line 673: Already listening on 127.0.0.1:<port> (Cluster::__listen_websocket(ws_opts_qs))
received termination signal

View file

@ -1,3 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/main.zeek, line 666: Invalid tls_options: No key_file field (Cluster::__listen_websocket(Cluster::options.0))
error in <...>/main.zeek, line 666: Invalid tls_options: No cert_file field (Cluster::__listen_websocket(Cluster::options.3))
error in <...>/main.zeek, line 673: Invalid tls_options: No key_file field (Cluster::__listen_websocket(Cluster::options.0))
error in <...>/main.zeek, line 673: Invalid tls_options: No cert_file field (Cluster::__listen_websocket(Cluster::options.3))

View file

@ -2,6 +2,6 @@
manager WebSocket client <nodeid> (127.0.0.1:<port>) subscribed to [/topic/ws/1, /topic/ws/all]
manager WebSocket client <nodeid> (127.0.0.1:<port>) subscribed to [/topic/ws/2, /topic/ws/all]
manager WebSocket client <nodeid> (127.0.0.1:<port>) subscribed to [/topic/ws/3, /topic/ws/all]
manager WebSocket client <nodeid> (127.0.0.1:<port>) gone
manager WebSocket client <nodeid> (127.0.0.1:<port>) gone
manager WebSocket client <nodeid> (127.0.0.1:<port>) gone
manager WebSocket client <nodeid> (127.0.0.1:<port>) gone with code 1000
manager WebSocket client <nodeid> (127.0.0.1:<port>) gone with code 1000
manager WebSocket client <nodeid> (127.0.0.1:<port>) gone with code 1000

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,3 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
received 1011 (internal error) Ping timeout; then sent 1011 (internal error) Ping timeout
received 1011 (internal error) Ping timeout; then sent 1011 (internal error) Ping timeout

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
received termination signal

View file

@ -0,0 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
WebSocket client <nodeid> (127.0.0.1:<port>) subscribed to []
WebSocket client <nodeid> (127.0.0.1:<port>) subscribed to []
WebSocket client <nodeid> (127.0.0.1:<port>) gone with code 1011 and reason 'Ping timeout'
WebSocket client <nodeid> (127.0.0.1:<port>) gone with code 1011 and reason 'Ping timeout'

View file

@ -0,0 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Cluster::websocket_client_added, []
Cluster::websocket_client_added, []
Cluster::websocket_client_lost, 1011, Ping timeout
Cluster::websocket_client_lost, 1011, Ping timeout

View file

@ -50,7 +50,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
print "Cluster::websocket_client_lost";
terminate();

View file

@ -47,7 +47,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "Cluster::websocket_client_added", added, subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print "Cluster::websocket_client_lost", lost;

View file

@ -53,7 +53,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
print "Cluster::websocket_client_lost";
terminate();

View file

@ -63,7 +63,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print fmt("%s: Cluster::websocket_client_added %s %s", current_time(), added, subscriptions);
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print fmt("%s: Cluster::websocket_client_lost %s", current_time(), lost);

View file

@ -66,7 +66,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
event send_ping();
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print fmt("%s: Cluster::websocket_client_lost %s", current_time(), lost);

View file

@ -94,7 +94,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print fmt("%s: Cluster::websocket_client_added %s %s", current_time(), added, subscriptions);
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print fmt("%s: Cluster::websocket_client_lost %s", current_time(), lost);

View file

@ -63,7 +63,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "Cluster::websocket_client_added", added, subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print "Cluster::websocket_client_lost", lost;

View file

@ -53,7 +53,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
print "Cluster::websocket_client_lost";
terminate();

View file

@ -53,7 +53,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
print "Cluster::websocket_client_lost";
terminate();

View file

@ -0,0 +1,100 @@
# @TEST-DOC: Ensure the websocket_client_lost() event contains code and reason. This starts two WebSocket client that aren't replying to PING frames.
#
# @TEST-REQUIRES: python3 -c 'import websockets.sync'
#
# @TEST-PORT: WEBSOCKET_PORT
#
# @TEST-EXEC: cp $FILES/ws/wstest.py .
# @TEST-EXEC: zeek -b --parse-only manager.zeek
# @TEST-EXEC: python3 -m py_compile client.py
#
# @TEST-EXEC: btest-bg-run manager "ZEEKPATH=$ZEEKPATH:.. && zeek -b ../manager.zeek >out"
# @TEST-EXEC: btest-bg-run client "python3 ../client.py >out"
#
# @TEST-EXEC: btest-bg-wait 30
# @TEST-EXEC: btest-diff ./manager/out
# @TEST-EXEC: btest-diff ./manager/.stderr
# @TEST-EXEC: zeek-cut message < ./manager/cluster.log | sed -r "s/client '.+' /client <nodeid> /g" | sed -r "s/:[0-9]+/:<port>/g" > ./manager/cluster.log.cannonified
# @TEST-EXEC: btest-diff ./manager/cluster.log.cannonified
# @TEST-EXEC: btest-diff ./client/out
# @TEST-EXEC: btest-diff ./client/.stderr
# @TEST-START-FILE manager.zeek
redef exit_only_after_terminate = T;
global lost = 0;
event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions: string_vec)
{
print "Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print "Cluster::websocket_client_lost", code, reason;
if ( lost == 2 )
terminate();
}
event zeek_init()
{
Cluster::listen_websocket([$listen_host="127.0.0.1", $listen_port=to_port(getenv("WEBSOCKET_PORT")), $ping_interval=1sec]);
Cluster::subscribe("/test/pings/");
}
# @TEST-END-FILE
# @TEST-START-FILE client.py
import json
import functools
import wstest
from websockets.sync.client import connect
from websockets.sync.client import ClientConnection
from websockets.frames import OP_PONG
class MyClientConnection(ClientConnection):
"""
Custom Client class patching the protocol.send_frame() function
to discard any PONG frames. The websocket library responds
automatically to these in a thread and can't easily turn this off,
but we want to test Zeek behavior when a client fails to respond
with PONG frames quickly enough.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__orig_send_frame = self.protocol.send_frame
def __my_send_frame(_self, frame):
if frame.opcode != OP_PONG:
self.__orig_send_frame(frame)
self.protocol.send_frame = functools.partial(__my_send_frame, self.protocol)
def run(ws_url):
with (
connect(ws_url, create_connection=MyClientConnection) as c1,
connect(ws_url, create_connection=MyClientConnection) as c2,
):
c1.send(json.dumps([]))
ack1 = json.loads(c1.recv())
assert ack1["type"] == "ack", repr(ack1)
c2.send(json.dumps([]))
ack2 = json.loads(c2.recv())
assert ack2["type"] == "ack", repr(ack2)
try:
c1.recv()
except Exception as e:
print(e)
try:
c2.recv()
except Exception as e:
print(e)
if __name__ == "__main__":
wstest.main(run, wstest.WS4_URL_V1)
# @TEST-END-FILE

View file

@ -46,7 +46,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "B Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
print "E Cluster::websocket_client_lost";
}

View file

@ -52,7 +52,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "B Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print "D Cluster::websocket_client_lost", lost;

View file

@ -67,7 +67,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print "Cluster::websocket_client_added", subscriptions;
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
print "Cluster::websocket_client_lost";
terminate();

View file

@ -84,7 +84,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
drain_if_ready();
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print "C Cluster::websocket_client_lost";

View file

@ -93,7 +93,7 @@ event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions
print fmt("%s: Cluster::websocket_client_added %s %s", current_time(), added, subscriptions);
}
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo)
event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string)
{
++lost;
print fmt("%s: Cluster::websocket_client_lost %s", current_time(), lost);