postgresql: Initial parser implementation

This adds a protocol parser for the PostgreSQL protocol and a new postgresql.log similar to the existing mysql.log. This should be considered preliminary and hopefully during 7.1 and 7.2 with feedback from the community, we can improve on the events and logs. Even if most PostgreSQL communication is encrypted in the real-world, this will minimally allow monitoring of the SSLRequest and hand off further analysis to the SSL analyzer. This originates from github.com/awelzel/spicy-postgresql, with lots of polishing happening in the past two days.
2025-10-02 06:38:20 +00:00 · 2024-09-04 16:59:30 +02:00 · 2024-09-04 16:59:30 +02:00 · 85ca59484b
commit 85ca59484b
parent 2907d9feee
82 changed files with 1803 additions and 10 deletions
--- a/src/analyzer/protocol/postgresql/CMakeLists.txt
+++ b/src/analyzer/protocol/postgresql/CMakeLists.txt
@ -0,0 +1,5 @@
+spicy_add_analyzer(
+    NAME PostgreSQL
+    PACKAGE_NAME spicy-postgresql
+    SOURCES postgresql.spicy postgresql.evt postgresql_zeek.spicy
+    MODULES PostgreSQL PostgreSQL_Zeek)
--- a/src/analyzer/protocol/postgresql/postgresql.evt
+++ b/src/analyzer/protocol/postgresql/postgresql.evt
@ -0,0 +1,42 @@
+import PostgreSQL;
+import PostgreSQL_Zeek;
+
+protocol analyzer PostgreSQL over TCP:
+    parse originator with PostgreSQL::FrontendMessages,
+    parse responder with PostgreSQL::BackendMessages;
+
+on PostgreSQL::NotImplemented -> event PostgreSQL::not_implemented($conn, $is_orig, ("%c" % typ), self.chunk);
+
+on PostgreSQL::AuthenticationRequest if ( self.identifier != 0 )-> event PostgreSQL::authentication_request($conn, self.identifier, self.data);
+
+on PostgreSQL::AuthenticationRequest if ( self.identifier == 0 ) -> event PostgreSQL::authentication_ok($conn);
+
+on PostgreSQL::AuthenticationResponse -> event PostgreSQL::authentication_response($conn, self.data);
+
+on PostgreSQL::FrontendMessages::ssl_request -> event PostgreSQL::ssl_request($conn);
+
+on PostgreSQL::MaybeBackendSSL::ssl_byte -> event PostgreSQL::ssl_reply($conn, ("%c" % self.ssl_byte));
+
+on PostgreSQL::StartupParameter -> event PostgreSQL::startup_parameter($conn, self.name, self.value);
+
+on PostgreSQL::StartupMessage -> event PostgreSQL::startup_message($conn, self.version.major, self.version.minor);
+
+on PostgreSQL::ErrorIdentifiedField -> event PostgreSQL::error_response_identified_field($conn, ("%c" % self.code), self.value);
+
+on PostgreSQL::ErrorResponse -> event PostgreSQL::error_response($conn);
+
+on PostgreSQL::SimpleQuery -> event PostgreSQL::simple_query($conn, self.query);
+
+on PostgreSQL::ReadyForQuery -> event PostgreSQL::ready_for_query($conn, "%c" % self.transaction_status);
+
+on PostgreSQL::NoticeIdentifiedField -> event PostgreSQL::notice_response_identified_field($conn, ("%c" % self.code), self.value);
+
+on PostgreSQL::NoticeResponse -> event PostgreSQL::notice_response($conn);
+
+on PostgreSQL::Terminate -> event PostgreSQL::terminate($conn);
+
+on PostgreSQL::DataRow -> event PostgreSQL::data_row($conn, self.column_values);
+
+on PostgreSQL::ParameterStatus -> event PostgreSQL::parameter_status($conn, self.name, self.value);
+
+on PostgreSQL::BackendKeyData -> event PostgreSQL::backend_key_data($conn, self.process_id, self.secret_key);
--- a/src/analyzer/protocol/postgresql/postgresql.spicy
+++ b/src/analyzer/protocol/postgresql/postgresql.spicy
@ -0,0 +1,337 @@
+# A PostgreSQL analyzer.
+#
+# https://www.postgresql.org/docs/current/protocol.html
+#
+# Protocol version 3.0
+
+module PostgreSQL;
+
+import spicy;
+
+type SSLFrontendState = enum {
+    Requested,
+    NotRequested,
+};
+
+type SSLBackendState = enum {
+    S,
+    N,
+};
+
+# How many chunks to buffer initially when seeing a backend message
+# before a frontend or vice versa.
+const MAX_BUFFERED = 4;
+
+# When a connection switches to SSL, this consumes all the SSL chunks.
+# In zeek_postgres.spicy, SSLSink%init calls zeek::protocol_begin() and
+# then zeek::protocol_data_in()
+#
+# There's a single SSLSink shared between backend and frontend.
+type SSLSink = unit {
+    chunk: bytes &chunked &eod;
+};
+
+# Used as context for synchronization between frontend/backend.
+type Context = struct {
+    ssl_frontend_state: SSLFrontendState;
+    ssl_backend_state: SSLBackendState;
+    ssl_sink: sink&;
+    ssl_sink_connected: bool;
+};
+
+type ProtocolVersion = unit {
+    major: uint16;
+    minor: uint16;
+};
+
+type StartupParameter = unit {
+    name: /[-_\/A-Za-z0-9]+/ &requires=(|$$| > 0);
+    : uint8 &requires=($$ == 0);
+    value: /[\x20-\x7e]+/ &requires=(|$$| > 0);
+    : uint8 &requires=($$ == 0);
+};
+
+type StartupMessage = unit {
+    length: uint32 &requires=(self.length >= 9);
+    version: ProtocolVersion &requires=($$.major == 3);
+    parameters: StartupParameter[] &size=self.length - 9;
+    : skip b"\x00";
+};
+
+# Top-level entry for the client.
+public type FrontendMessages = unit {
+    %context = Context;
+    on %init {
+        # Until the first FrontendMessages are initialized, ssl_sink in the
+        # context is a Null reference. Also, we want to use a single sink
+        # for both, frontend and backend by calling beg
+        self.context().ssl_sink = self.s1;
+    }
+
+    var buffered: vector<bytes>;
+    var s1_connected: bool;
+    var ssl_requested: bool;
+    sink s1;
+
+    # Peek at the client data.
+    length: uint32 &requires=(self.length >= 8);
+    version_or_magic: uint32 {
+        self.ssl_requested = self.length == 8 && $$ == 80877103;
+
+        if (self.ssl_requested) {
+            self.context().ssl_frontend_state = SSLFrontendState::Requested;
+        } else {
+            self.context().ssl_frontend_state = SSLFrontendState::NotRequested;
+            self.context().ssl_backend_state = SSLBackendState::N;
+
+            # Pre-check the supported major version here.
+            local major = $$ >> 16;
+            if (major != 3)
+                throw "unsupported PostgreSQL major version %s" % major;
+
+            # Put length and version back into the buffer so PlainFrontendMessages
+            # can re-parse it.
+            #
+            # This explicitly avoids using random access functionality like
+            # `self.input()` and `self.set_input()` which would disable automatic
+            # trimming in this unit (which is top-level unit parsing unbounded
+            # amounts of data).
+            self.buffered.push_back(pack(self.length, spicy::ByteOrder::Network));
+            self.buffered.push_back(pack(self.version_or_magic, spicy::ByteOrder::Network));
+        }
+    }
+
+    # void field for raising an event.
+    ssl_request: void if(self.ssl_requested == True);
+
+    # print "frontend ssl", self.context();
+
+    # If the client requested SSL, we do not know how to continue parsing
+    # until the server confirmed SSL usage via 'S' or 'N' responses. As long
+    # as it hasn't responded, stall the parsing here and buffer bytes until
+    # the context() is populated.
+    #
+    # In normal operations, Zeek should see the server's response before
+    # attempting to parse more data, but Robin was concerned it that in
+    # some circumstances (out-of-order packets, reassembly artifacts) we
+    # may see the client's data before the server's.
+    #
+    # In the future, barrier: https://github.com/zeek/spicy/pull/1373
+    : bytes &chunked &eod {
+        if (!self.context().ssl_backend_state) {
+            self.buffered.push_back($$);
+
+            if (|self.buffered| > MAX_BUFFERED)
+                throw "too many frontend messages buffered";
+        } else {
+            # print "frontend ssl_state backend set!", self.context();
+            if (!self.s1_connected) {
+                if (self.context().ssl_backend_state == SSLBackendState::S) {
+                    if (!self.context().ssl_sink_connected) {
+                        self.context().ssl_sink.connect(new SSLSink());
+                        self.context().ssl_sink_connected = True;
+                    }
+                } else {
+                    # print "connecting plain frontend messages";
+                    self.s1.connect(new PlainFrontendMessages());
+                }
+
+                self.s1_connected = True;
+
+                if (|self.buffered| > 0) {
+                    for (b in self.buffered)
+                        self.s1.write(b);
+                }
+
+                self.buffered.resize(0);
+            }
+
+            self.s1.write($$);
+        }
+    }
+};
+
+type PlainFrontendMessages = unit {
+    startup_message: StartupMessage;
+    : FrontendMessage[];
+};
+
+type FrontendMessage = unit {
+    typ: uint8;
+    length: uint32 &requires=(self.length >= 4);
+
+    switch (self.typ) {
+        'p' -> : AuthenticationResponse;
+        'X' -> : Terminate;
+        'Q' -> : SimpleQuery;
+        * -> not_implemented: NotImplemented(self.typ);
+    } &size=self.length - 4;
+};
+
+type AuthenticationResponse = unit {
+    # This is PasswordMessage, SASLInitialMessage, etc. based on context.
+    # For now, just thread it through.
+    data: bytes &eod;
+};
+
+type Terminate = unit {};
+
+type SimpleQuery = unit {
+    query: bytes &until=b"\x00";
+};
+
+# The client has requested SSL, the server either confirms (S) or
+# stays in plaintext (N) mode. Depending on the result, we connect
+# our sink to the SSL sink, or to a PlainBackendMessages unit.
+#
+type MaybeBackendSSL = unit(ctx: Context&) {
+
+    # Connected SSL, or plaintext.
+    sink s1;
+
+    ssl_byte: uint8 &requires=($$ == 'S' || $$ == 'N') {
+        # print "backend ssl_byte", $$;
+        if ($$ == 'S') {
+            ctx.ssl_backend_state = SSLBackendState::S;
+            if (!ctx.ssl_sink_connected) {
+                ctx.ssl_sink.connect(new SSLSink());
+                ctx.ssl_sink_connected = True;
+            }
+
+            # Share the SSL sink with the frontend.
+            self.s1 = ctx.ssl_sink;
+        } else {
+            ctx.ssl_backend_state = SSLBackendState::N;
+            self.s1.connect(new PlainBackendMessages());
+        }
+    }
+
+    # Now that s1 is connected, forward the rest of the connection to it.
+    : bytes &chunked &eod -> self.s1;
+};
+
+# Top-level entry for the server.
+public type BackendMessages = unit {
+    %context = Context;
+
+    var buffered: vector<bytes>;
+    var s1_connected: bool;
+    sink s1;
+
+    # Buffer until the SSL frontend state was populated.
+    : bytes &chunked &eod {
+        if (!self.context().ssl_frontend_state) {
+            # print "backend buffering ", |$$|;
+            self.buffered.push_back($$);
+
+            if (|self.buffered| > MAX_BUFFERED)
+                throw "too many backend messages buffered";
+        } else {
+            # The ssl_frontend_state has been set. If The client requested SSL,
+            # connect to an SSLMaybe instance. If it did not, connect
+            # directly to PlainBackendMessages.
+            # print "backend", self.context(), |self.buffered|, self.s1, self.s1_connected;
+            if (!self.s1_connected) {
+
+                if (self.context().ssl_frontend_state == SSLFrontendState::Requested) {
+                    self.s1.connect(new MaybeBackendSSL(self.context()));
+                } else {
+                    self.s1.connect(new PlainBackendMessages());
+                }
+
+                self.s1_connected = True;
+
+                if (|self.buffered| > 0) {
+                    for (b in self.buffered)
+                        self.s1.write(b);
+                }
+                self.buffered.resize(0);
+            }
+
+            # print "backend writing to sink", $$, |self.s1|;
+            self.s1.write($$);
+        }
+    }
+};
+
+type PlainBackendMessages = unit {
+    : BackendMessage[];
+};
+
+type BackendMessage = unit {
+    typ: uint8;
+    length: uint32 &requires=(self.length >= 4);
+
+    switch (self.typ) {
+        'K' -> backend_key_data: BackendKeyData;
+        'E' -> error: ErrorResponse;
+        'R' -> auth: AuthenticationRequest(self.length - 4);
+        'S' -> parameter_status: ParameterStatus;
+        'D' -> data_row: DataRow;
+        'Z' -> ready_for_query: ReadyForQuery;
+        'N' -> notice: NoticeResponse;
+        * -> not_implemented: NotImplemented(self.typ);
+    } &size=self.length - 4;
+};
+
+type ParameterStatus = unit {
+    name: /[-_\/A-Za-z0-9]+/ &requires=(|$$| > 0);
+    : uint8 &requires=($$ == 0);
+    value: /[\x20-\x7e]+/ &requires=(|$$| > 0);
+    : uint8 &requires=($$ == 0);
+};
+
+# Possible values are 'I' if idle (not in a transaction block);
+# 'T' if in a transaction block; or 'E' if in a failed transaction block
+# (queries will be rejected until block is ended).
+type ReadyForQuery = unit {
+    transaction_status: uint8 &requires=($$ == 'I' || $$ == 'T' || $$ == 'E');
+};
+
+type NoticeIdentifiedField = unit {
+    code: uint8;
+    value: bytes &until=b"\x00";
+};
+
+type NoticeResponse = unit {
+    : NoticeIdentifiedField[];
+    : skip b"\x00";
+};
+
+# Just for counting right now.
+type DataRow = unit {
+    column_values: uint16;
+    : skip bytes &eod;
+};
+
+# Fields with a 1 byte field as documented here:
+# https://www.postgresql.org/docs/current/protocol-error-fields.html
+type ErrorIdentifiedField = unit {
+    code: uint8;
+    value: bytes &until=b"\x00";
+};
+
+type ErrorResponse = unit {
+    : ErrorIdentifiedField[];
+    : skip b"\x00";
+};
+
+type AuthenticationRequest = unit(length: uint32) {
+    identifier: uint32 &requires=($$ <= 12) {
+        if (self.identifier == 0 && length != 4)
+            throw "AuthenticationOK with wrong length: %s" % length;
+    }
+
+    # There's more structure (GSS-API, SASL, cleartext), but for now
+    # just thread through the raw data.
+    data: bytes &eod;
+};
+
+type BackendKeyData = unit {
+    process_id: uint32;
+    secret_key: uint32;
+};
+
+type NotImplemented = unit(typ: uint8) {
+    chunk: bytes &eod;
+};
--- a/src/analyzer/protocol/postgresql/postgresql_zeek.spicy
+++ b/src/analyzer/protocol/postgresql/postgresql_zeek.spicy
@ -0,0 +1,43 @@
+# Set up protocol confirmation/rejection for analyzers, as well as any further
+# Zeek-specific analysis.
+
+module PostgreSQL_Zeek;
+
+import PostgreSQL;
+import zeek;
+
+# If we see a client StartupMessage, that's pretty good.
+on PostgreSQL::StartupMessage::%done {
+    zeek::confirm_protocol();
+}
+
+# If the server replied with an ssl_byte and we let it through,
+# that's also pretty good.
+on PostgreSQL::MaybeBackendSSL::ssl_byte {
+    zeek::confirm_protocol();
+}
+
+on PostgreSQL::SSLSink::%init {
+    zeek::protocol_begin("SSL");
+}
+
+on PostgreSQL::SSLSink::%done {
+    zeek::protocol_end();
+}
+
+on PostgreSQL::SSLSink::chunk {
+    # print "ssl_chunk", zeek::is_orig(), self;
+    zeek::protocol_data_in(zeek::is_orig(), self.chunk);
+}
+
+on PostgreSQL::StartupMessage::%error(msg: string) {
+    zeek::reject_protocol("error while parsing PostgreSQL StartupMessage: %s" % msg);
+}
+
+on PostgreSQL::FrontendMessage::%error(msg: string) {
+    zeek::reject_protocol("error while parsing PostgreSQL: %s" % msg);
+}
+
+on PostgreSQL::BackendMessage::%error(msg: string) {
+    zeek::reject_protocol("error while parsing PostgreSQL: %s" % msg);
+}