From a61aff010f5bbe36084b4e31e60c2f7a425bbd94 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Fri, 9 May 2025 16:23:17 +0200 Subject: [PATCH] cluster/websocket: Propagate code and reason to websocket_client_lost() This allows to get visibility into the reason why ixwebsocket or the client decided to disconnect. Closed #4440 --- scripts/base/frameworks/cluster/main.zeek | 7 +- .../websocket/WebSocket-IXWebSocket.cc | 3 +- src/cluster/websocket/WebSocket.cc | 3 +- src/cluster/websocket/WebSocket.h | 2 + src/cluster/websocket/events.bif | 6 +- .../..client..stderr | 1 + .../..client.out | 3 + .../..manager..stderr | 2 + .../..manager.cluster.log.cannonified | 5 + .../..manager.out | 5 + .../btest/cluster/websocket/ping-timeout.zeek | 100 ++++++++++++++++++ 11 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 testing/btest/Baseline/cluster.websocket.ping-timeout/..client..stderr create mode 100644 testing/btest/Baseline/cluster.websocket.ping-timeout/..client.out create mode 100644 testing/btest/Baseline/cluster.websocket.ping-timeout/..manager..stderr create mode 100644 testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.cluster.log.cannonified create mode 100644 testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.out create mode 100644 testing/btest/cluster/websocket/ping-timeout.zeek diff --git a/scripts/base/frameworks/cluster/main.zeek b/scripts/base/frameworks/cluster/main.zeek index 199ae2e147..6620060fae 100644 --- a/scripts/base/frameworks/cluster/main.zeek +++ b/scripts/base/frameworks/cluster/main.zeek @@ -680,10 +680,11 @@ event websocket_client_added(endpoint: EndpointInfo, subscriptions: string_vec) Cluster::log(msg); } -event websocket_client_lost(endpoint: EndpointInfo) +event websocket_client_lost(endpoint: EndpointInfo, code: count, reason: string) { - local msg = fmt("WebSocket client '%s' (%s:%d) gone", - endpoint$id, endpoint$network$address, endpoint$network$bound_port); + local msg = fmt("WebSocket client '%s' (%s:%d) gone with code %d%s", + endpoint$id, endpoint$network$address, endpoint$network$bound_port, code, + |reason| > 0 ? fmt(" and reason '%s'", reason) : ""); Cluster::log(msg); } diff --git a/src/cluster/websocket/WebSocket-IXWebSocket.cc b/src/cluster/websocket/WebSocket-IXWebSocket.cc index 25f09df1a1..08528f0f51 100644 --- a/src/cluster/websocket/WebSocket-IXWebSocket.cc +++ b/src/cluster/websocket/WebSocket-IXWebSocket.cc @@ -135,7 +135,8 @@ std::unique_ptr StartServer(std::unique_ptrQueueForProcessing(WebSocketMessage{id, msg->str}); } else if ( msg->type == ix::WebSocketMessageType::Close ) { - dispatcher->QueueForProcessing(WebSocketClose{id}); + auto& ci = msg->closeInfo; + dispatcher->QueueForProcessing(WebSocketClose{id, ci.code, std::move(ci.reason)}); } else if ( msg->type == ix::WebSocketMessageType::Error ) { dispatcher->QueueForProcessing(WebSocketClose{id}); diff --git a/src/cluster/websocket/WebSocket.cc b/src/cluster/websocket/WebSocket.cc index 8b3569b6bf..a49ce30049 100644 --- a/src/cluster/websocket/WebSocket.cc +++ b/src/cluster/websocket/WebSocket.cc @@ -356,7 +356,8 @@ void WebSocketEventDispatcher::Process(const WebSocketClose& close) { // should be the last event related to this WebSocket client. auto rec = zeek::cluster::detail::bif::make_endpoint_info(backend->NodeId(), wsc->getRemoteIp(), wsc->getRemotePort(), TRANSPORT_TCP); - zeek::event_mgr.Enqueue(Cluster::websocket_client_lost, std::move(rec)); + zeek::event_mgr.Enqueue(Cluster::websocket_client_lost, std::move(rec), zeek::val_mgr->Count(close.code), + zeek::make_intrusive(close.reason)); } clients.erase(it); diff --git a/src/cluster/websocket/WebSocket.h b/src/cluster/websocket/WebSocket.h index 685c16b077..eff83c11f2 100644 --- a/src/cluster/websocket/WebSocket.h +++ b/src/cluster/websocket/WebSocket.h @@ -131,6 +131,8 @@ struct WebSocketOpen { // A WebSocket client disconnected. struct WebSocketClose { std::string id; + uint16_t code; + std::string reason; }; // A WebSocket client send a message. diff --git a/src/cluster/websocket/events.bif b/src/cluster/websocket/events.bif index d730b42419..e02798eea7 100644 --- a/src/cluster/websocket/events.bif +++ b/src/cluster/websocket/events.bif @@ -10,4 +10,8 @@ event websocket_client_added%(endpoint: EndpointInfo, subscriptions: string_vec% ## Generated when a WebSocket client was lost. ## ## endpoint: Various information about the WebSocket client. -event websocket_client_lost%(endpoint: EndpointInfo%); +## code: The code sent by the client in its CLOSE frame, or a code generated +## internally if the server disconnected the client. +## reason: The reason sent by the client in its CLOSE frame, or a reason generated +## internally if the server disconnected the client. +event websocket_client_lost%(endpoint: EndpointInfo, code: count, reason: string%); diff --git a/testing/btest/Baseline/cluster.websocket.ping-timeout/..client..stderr b/testing/btest/Baseline/cluster.websocket.ping-timeout/..client..stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/cluster.websocket.ping-timeout/..client..stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/cluster.websocket.ping-timeout/..client.out b/testing/btest/Baseline/cluster.websocket.ping-timeout/..client.out new file mode 100644 index 0000000000..794caa7feb --- /dev/null +++ b/testing/btest/Baseline/cluster.websocket.ping-timeout/..client.out @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +received 1011 (internal error) Ping timeout; then sent 1011 (internal error) Ping timeout +received 1011 (internal error) Ping timeout; then sent 1011 (internal error) Ping timeout diff --git a/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager..stderr b/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager..stderr new file mode 100644 index 0000000000..e3f6131b1d --- /dev/null +++ b/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager..stderr @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +received termination signal diff --git a/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.cluster.log.cannonified b/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.cluster.log.cannonified new file mode 100644 index 0000000000..3fb274a13f --- /dev/null +++ b/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.cluster.log.cannonified @@ -0,0 +1,5 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +WebSocket client (127.0.0.1:) subscribed to [] +WebSocket client (127.0.0.1:) subscribed to [] +WebSocket client (127.0.0.1:) gone with code 1011 and reason 'Ping timeout' +WebSocket client (127.0.0.1:) gone with code 1011 and reason 'Ping timeout' diff --git a/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.out b/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.out new file mode 100644 index 0000000000..5657e61a7f --- /dev/null +++ b/testing/btest/Baseline/cluster.websocket.ping-timeout/..manager.out @@ -0,0 +1,5 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +Cluster::websocket_client_added, [] +Cluster::websocket_client_added, [] +Cluster::websocket_client_lost, 1011, Ping timeout +Cluster::websocket_client_lost, 1011, Ping timeout diff --git a/testing/btest/cluster/websocket/ping-timeout.zeek b/testing/btest/cluster/websocket/ping-timeout.zeek new file mode 100644 index 0000000000..f0347b1a3a --- /dev/null +++ b/testing/btest/cluster/websocket/ping-timeout.zeek @@ -0,0 +1,100 @@ +# @TEST-DOC: Ensure the websocket_client_lost() event contains code and reason. This starts two WebSocket client that aren't replying to PING frames. +# +# @TEST-REQUIRES: python3 -c 'import websockets.sync' +# +# @TEST-PORT: WEBSOCKET_PORT +# +# @TEST-EXEC: cp $FILES/ws/wstest.py . +# @TEST-EXEC: zeek -b --parse-only manager.zeek +# @TEST-EXEC: python3 -m py_compile client.py +# +# @TEST-EXEC: btest-bg-run manager "ZEEKPATH=$ZEEKPATH:.. && zeek -b ../manager.zeek >out" +# @TEST-EXEC: btest-bg-run client "python3 ../client.py >out" +# +# @TEST-EXEC: btest-bg-wait 30 +# @TEST-EXEC: btest-diff ./manager/out +# @TEST-EXEC: btest-diff ./manager/.stderr +# @TEST-EXEC: zeek-cut message < ./manager/cluster.log | sed -r "s/client '.+' /client /g" | sed -r "s/:[0-9]+/:/g" > ./manager/cluster.log.cannonified +# @TEST-EXEC: btest-diff ./manager/cluster.log.cannonified +# @TEST-EXEC: btest-diff ./client/out +# @TEST-EXEC: btest-diff ./client/.stderr + +# @TEST-START-FILE manager.zeek +redef exit_only_after_terminate = T; + +global lost = 0; + +event Cluster::websocket_client_added(info: Cluster::EndpointInfo, subscriptions: string_vec) + { + print "Cluster::websocket_client_added", subscriptions; + } + +event Cluster::websocket_client_lost(info: Cluster::EndpointInfo, code: count, reason: string) + { + ++lost; + print "Cluster::websocket_client_lost", code, reason; + if ( lost == 2 ) + terminate(); + } + +event zeek_init() + { + Cluster::listen_websocket([$listen_host="127.0.0.1", $listen_port=to_port(getenv("WEBSOCKET_PORT")), $ping_interval=1sec]); + Cluster::subscribe("/test/pings/"); + } +# @TEST-END-FILE + + +# @TEST-START-FILE client.py +import json +import functools + +import wstest + +from websockets.sync.client import connect +from websockets.sync.client import ClientConnection +from websockets.frames import OP_PONG + +class MyClientConnection(ClientConnection): + """ + Custom Client class patching the protocol.send_frame() function + to discard any PONG frames. The websocket library responds + automatically to these in a thread and can't easily turn this off, + but we want to test Zeek behavior when a client fails to respond + with PONG frames quickly enough. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__orig_send_frame = self.protocol.send_frame + def __my_send_frame(_self, frame): + if frame.opcode != OP_PONG: + self.__orig_send_frame(frame) + + self.protocol.send_frame = functools.partial(__my_send_frame, self.protocol) + +def run(ws_url): + with ( + connect(ws_url, create_connection=MyClientConnection) as c1, + connect(ws_url, create_connection=MyClientConnection) as c2, + ): + c1.send(json.dumps([])) + ack1 = json.loads(c1.recv()) + assert ack1["type"] == "ack", repr(ack1) + + c2.send(json.dumps([])) + ack2 = json.loads(c2.recv()) + assert ack2["type"] == "ack", repr(ack2) + + try: + c1.recv() + except Exception as e: + print(e) + try: + c2.recv() + except Exception as e: + print(e) + + +if __name__ == "__main__": + wstest.main(run, wstest.WS4_URL_V1) +# @TEST-END-FILE