Merge remote-tracking branch 'origin/topic/awelzel/spicy-format-for-8.0'

* origin/topic/awelzel/spicy-format-for-8.0:
  Update .git-blame-ignore-revs
  analyzer/protocol: Reformat with spicy-format
  pre-commit-config: Bump spicy-format to 0.26.0
This commit is contained in:
Arne Welzel 2025-07-29 10:05:20 +02:00
commit 96f9cc73c3
9 changed files with 1446 additions and 1521 deletions

View file

@ -33,3 +33,6 @@ f5a76c1aedc7f8886bc6abef0dfaa8065684b1f6
# clang-format: Format JSON with clang-format
e6256446ddef5c5d5240eefff974556f2e12ac46
# analyzer/protocol: Reformat with spicy-format
d70bcd07b9b26036b16092fe950eca40e2f5a032

View file

@ -52,8 +52,7 @@ repos:
exclude: '^(.typos.toml|src/SmithWaterman.cc|testing/.*|auxil/.*|scripts/base/frameworks/files/magic/.*|CHANGES|scripts/base/protocols/ssl/mozilla-ca-list.zeek)$'
- repo: https://github.com/bbannier/spicy-format
rev: v0.25.0
rev: v0.26.0
hooks:
- id: spicy-format
# TODO: Reformat existing large analyzers just before 8.0.
exclude: '(^testing/.*)|(protocol/ldap/.*)|(protocol/quic/.*)|(protocol/websocket/.*)'
exclude: '^testing/.*'

View file

@ -1,3 +1,11 @@
8.0.0-dev.775 | 2025-07-29 10:05:20 +0200
* Update .git-blame-ignore-revs (Arne Welzel, Corelight)
* analyzer/protocol: Reformat with spicy-format (Arne Welzel, Corelight)
* pre-commit-config: Bump spicy-format to 0.26.0 (Arne Welzel, Corelight)
8.0.0-dev.770 | 2025-07-28 14:18:15 -0700
* dce-rpc: Make named_pipe filed docs extensive (Arne Welzel, Corelight)

View file

@ -1 +1 @@
8.0.0-dev.770
8.0.0-dev.775

View file

@ -23,237 +23,222 @@ import spicy;
# https://www.obj-sys.com/asn1tutorial/node10.html
public type ASN1Type = enum {
Boolean = 1,
Integer = 2,
BitString = 3,
OctetString = 4,
NullVal = 5,
ObjectIdentifier = 6,
ObjectDescriptor = 7,
InstanceOf = 8,
Real = 9,
Enumerated = 10,
EmbeddedPDV = 11,
UTF8String = 12,
RelativeOID = 13,
Sequence = 16,
Set = 17,
NumericString = 18,
PrintableString = 19,
TeletextString = 20,
VideotextString = 21,
IA5String = 22,
UTCTime = 23,
GeneralizedTime = 24,
GraphicString = 25,
VisibleString = 26,
GeneralString = 27,
UniversalString = 28,
CharacterString = 29,
BMPString = 30
Boolean = 1,
Integer = 2,
BitString = 3,
OctetString = 4,
NullVal = 5,
ObjectIdentifier = 6,
ObjectDescriptor = 7,
InstanceOf = 8,
Real = 9,
Enumerated = 10,
EmbeddedPDV = 11,
UTF8String = 12,
RelativeOID = 13,
Sequence = 16,
Set = 17,
NumericString = 18,
PrintableString = 19,
TeletextString = 20,
VideotextString = 21,
IA5String = 22,
UTCTime = 23,
GeneralizedTime = 24,
GraphicString = 25,
VisibleString = 26,
GeneralString = 27,
UniversalString = 28,
CharacterString = 29,
BMPString = 30,
};
#- ASN.1 data classes --------------------------------------------------------
public type ASN1Class = enum {
Universal = 0,
Application = 1,
ContextSpecific = 2,
Private = 3
Universal = 0,
Application = 1,
ContextSpecific = 2,
Private = 3,
};
#- ASN.1 tag definition (including length) ------------------------------------
type LengthType = unit {
var len: uint64;
var tag_len: uint8;
var len: uint64;
var tag_len: uint8;
data : bitfield(8) {
num: 0..6;
islong: 7;
};
data: bitfield(8) {
num: 0..6;
islong: 7;
};
switch ( self.data.islong ) {
0 -> : void {
self.len = self.data.num;
self.tag_len = 1;
}
1 -> : bytes &size=self.data.num
&convert=$$.to_uint(spicy::ByteOrder::Network) {
self.len = $$;
self.tag_len = self.data.num + 1;
}
};
switch (self.data.islong) {
0 -> : void {
self.len = self.data.num;
self.tag_len = 1;
}
1 -> : bytes &size=self.data.num &convert=$$.to_uint(spicy::ByteOrder::Network) {
self.len = $$;
self.tag_len = self.data.num + 1;
}
};
};
type ASN1Tag = unit {
: bitfield(8) {
type_: 0..4 &convert=ASN1Type($$);
constructed: 5 &convert=cast<bool>($$);
class: 6..7 &convert=ASN1Class($$);
};
: bitfield(8) {
type_: 0..4 &convert=ASN1Type($$);
constructed: 5 &convert=cast<bool>($$);
class: 6..7 &convert=ASN1Class($$);
};
};
#- ASN.1 bit string -----------------------------------------------------------
# https://www.obj-sys.com/asn1tutorial/node10.html
type ASN1BitString = unit(len: uint64, constructed: bool) {
: uint8; # unused bits
value_bits: bytes &size=(len - 1);
: uint8; # unused bits
value_bits: bytes &size=(len - 1);
# TODO - constructed form
# https://github.com/zeek/spicy/issues/921
# `bytes` needs << and >> support before we can implement complex bitstrings
#
# TODO - constructed form
# https://github.com/zeek/spicy/issues/921
# `bytes` needs << and >> support before we can implement complex bitstrings
#
};
#- ASN.1 octet string ---------------------------------------------------------
# https://www.obj-sys.com/asn1tutorial/node10.html
type ASN1OctetString = unit(len: uint64, constructed: bool) {
value: bytes &size = len;
value: bytes &size=len;
# TODO - constructed form
# TODO - constructed form
};
#- ASN.1 various string types -------------------------------------------------
# https://www.obj-sys.com/asn1tutorial/node124.html
type ASN1String = unit(tag: ASN1Tag, len: uint64) {
var encoding: spicy::Charset;
var encoding: spicy::Charset;
on %init {
switch ( tag.type_ ) {
# see "Restricted Character String Types" in
# "Generic String Encoding Rules (GSER) for ASN.1 Types"
# (https://datatracker.ietf.org/doc/html/rfc3641#section-3.2)
case ASN1Type::PrintableString,
ASN1Type::GeneralizedTime,
ASN1Type::UTCTime: {
self.encoding = spicy::Charset::ASCII;
}
case ASN1Type::UTF8String,
ASN1Type::GeneralString,
ASN1Type::CharacterString,
ASN1Type::GraphicString,
ASN1Type::IA5String,
ASN1Type::NumericString,
ASN1Type::TeletextString,
ASN1Type::VideotextString,
ASN1Type::VisibleString,
# TODO: RFC3641 mentions special UTF-8 mapping rules for
# BMPString and UniversalString. This *may* not be correct.
ASN1Type::BMPString,
ASN1Type::UniversalString: {
self.encoding = spicy::Charset::UTF8;
}
on %init {
switch (tag.type_) {
# see "Restricted Character String Types" in
# "Generic String Encoding Rules (GSER) for ASN.1 Types"
# (https://datatracker.ietf.org/doc/html/rfc3641#section-3.2)
case ASN1Type::PrintableString,
ASN1Type::GeneralizedTime,
ASN1Type::UTCTime: {
self.encoding = spicy::Charset::ASCII;
}
case ASN1Type::UTF8String,
ASN1Type::GeneralString,
ASN1Type::CharacterString,
ASN1Type::GraphicString,
ASN1Type::IA5String,
ASN1Type::NumericString,
ASN1Type::TeletextString,
ASN1Type::VideotextString,
ASN1Type::VisibleString,
# TODO: RFC3641 mentions special UTF-8 mapping rules for
# BMPString and UniversalString. This *may* not be correct.
ASN1Type::BMPString,
ASN1Type::UniversalString: {
self.encoding = spicy::Charset::UTF8;
}
}
}
}
value: ASN1OctetString(len, tag.constructed) &convert=$$.value.decode(self.encoding);
value: ASN1OctetString(len, tag.constructed) &convert=$$.value.decode(self.encoding);
} &convert=self.value;
#- ASN.1 OID ------------------------------------------------------------------
# https://www.obj-sys.com/asn1tutorial/node124.html
type ASN1ObjectIdentifierNibble = unit {
data : bitfield(8) {
num: 0..6;
more: 7;
};
data: bitfield(8) {
num: 0..6;
more: 7;
};
} &convert=self.data;
type ASN1ObjectIdentifier = unit(len: uint64) {
var oidbytes: bytes;
var temp: uint64;
var oidstring: string;
var oidbytes: bytes;
var temp: uint64;
var oidstring: string;
: uint8 if ( len >= 1 ) {
self.temp = $$ / 40;
self.oidbytes += ("%d" % (self.temp)).encode();
self.temp = $$ % 40;
self.oidbytes += (".%d" % (self.temp)).encode();
self.temp = 0;
}
sublist: ASN1ObjectIdentifierNibble[len - 1] foreach {
self.temp = ( self.temp<<7 ) | $$.num;
if ( $$.more != 1 ) {
self.oidbytes += (".%d" % (self.temp)).encode();
self.temp = 0;
: uint8 if(len >= 1) {
self.temp = $$ / 40;
self.oidbytes += ("%d" % (self.temp)).encode();
self.temp = $$ % 40;
self.oidbytes += (".%d" % (self.temp)).encode();
self.temp = 0;
}
}
on %done {
self.oidstring = self.oidbytes.decode();
}
sublist: ASN1ObjectIdentifierNibble[len - 1] foreach {
self.temp = (self.temp << 7) | $$.num;
if ($$.more != 1) {
self.oidbytes += (".%d" % (self.temp)).encode();
self.temp = 0;
}
}
on %done {
self.oidstring = self.oidbytes.decode();
}
};
#- ASN.1 message header (tag + length information) ----------------------------
public type ASN1Header = unit {
tag: ASN1Tag;
len: LengthType;
tag: ASN1Tag;
len: LengthType;
};
#- ASN.1 message body ---------------------------------------------------------
public type ASN1Body = unit(head: ASN1Header, recursive: bool) {
switch ( head.tag.type_ ) {
switch (head.tag.type_) {
ASN1Type::Boolean -> bool_value: uint8 &convert=cast<bool>($$) &requires=head.len.len == 1;
ASN1Type::Integer,
ASN1Type::Enumerated -> num_value: bytes &size=head.len.len &convert=$$.to_int(spicy::ByteOrder::Big);
ASN1Type::NullVal -> null_value: bytes &size=0 &requires=head.len.len == 0;
ASN1Type::BitString -> bitstr_value: ASN1BitString(head.len.len, head.tag.constructed);
ASN1Type::OctetString -> str_value: ASN1OctetString(head.len.len, head.tag.constructed) &convert=$$.value.decode(spicy::Charset::ASCII);
ASN1Type::ObjectIdentifier -> str_value: ASN1ObjectIdentifier(head.len.len) &convert=$$.oidstring;
ASN1Type::BMPString,
ASN1Type::CharacterString,
ASN1Type::GeneralizedTime,
ASN1Type::GeneralString,
ASN1Type::GraphicString,
ASN1Type::IA5String,
ASN1Type::NumericString,
ASN1Type::PrintableString,
ASN1Type::TeletextString,
ASN1Type::UTCTime,
ASN1Type::UTF8String,
ASN1Type::VideotextString,
ASN1Type::VisibleString,
ASN1Type::UniversalString -> str_value: ASN1String(head.tag, head.len.len);
ASN1Type::Sequence,
ASN1Type::Set -> seq: ASN1SubMessages(head.len.len) if(recursive);
ASN1Type::Boolean -> bool_value: uint8 &convert=cast<bool>($$) &requires=head.len.len==1;
# TODO: ASN1Type values not handled yet
ASN1Type::ObjectDescriptor,
ASN1Type::InstanceOf,
ASN1Type::Real,
ASN1Type::EmbeddedPDV,
ASN1Type::RelativeOID -> unimplemented_value: bytes &size=head.len.len;
ASN1Type::Integer,
ASN1Type::Enumerated -> num_value: bytes &size=head.len.len
&convert=$$.to_int(spicy::ByteOrder::Big);
ASN1Type::NullVal -> null_value: bytes &size=0 &requires=head.len.len==0;
ASN1Type::BitString -> bitstr_value: ASN1BitString(head.len.len, head.tag.constructed);
ASN1Type::OctetString -> str_value: ASN1OctetString(head.len.len, head.tag.constructed)
&convert=$$.value.decode(spicy::Charset::ASCII);
ASN1Type::ObjectIdentifier -> str_value: ASN1ObjectIdentifier(head.len.len)
&convert=$$.oidstring;
ASN1Type::BMPString,
ASN1Type::CharacterString,
ASN1Type::GeneralizedTime,
ASN1Type::GeneralString,
ASN1Type::GraphicString,
ASN1Type::IA5String,
ASN1Type::NumericString,
ASN1Type::PrintableString,
ASN1Type::TeletextString,
ASN1Type::UTCTime,
ASN1Type::UTF8String,
ASN1Type::VideotextString,
ASN1Type::VisibleString,
ASN1Type::UniversalString -> str_value: ASN1String(head.tag, head.len.len);
ASN1Type::Sequence, ASN1Type::Set -> seq: ASN1SubMessages(head.len.len) if (recursive);
# TODO: ASN1Type values not handled yet
ASN1Type::ObjectDescriptor,
ASN1Type::InstanceOf,
ASN1Type::Real,
ASN1Type::EmbeddedPDV,
ASN1Type::RelativeOID -> unimplemented_value: bytes &size=head.len.len;
# unknown (to me) ASN.1 enumeration, skip over silently
* -> unimplemented_value: bytes &size=head.len.len;
};
# unknown (to me) ASN.1 enumeration, skip over silently
* -> unimplemented_value: bytes &size=head.len.len;
};
};
#- ASN.1 array of ASN.1 sequence/set sub-messages (up to msgLen bytes) --------
public type ASN1SubMessages = unit(msgLen: uint64) {
submessages: ASN1Message(True)[] &eod;
submessages: ASN1Message(True)[] &eod;
} &size=msgLen;
#- ASN.1 message with header and body -----------------------------------------
@ -262,18 +247,15 @@ public type ASN1SubMessages = unit(msgLen: uint64) {
# - else, application_data:bytes stores data array
public type ASN1Message = unit(recursive: bool) {
var application_id: int32;
var application_id: int32;
head: ASN1Header;
switch ( self.head.tag.class ) {
ASN1Class::Universal -> body: ASN1Body(self.head, recursive);
ASN1Class::Application,
ASN1Class::ContextSpecific,
ASN1Class::Private -> application_data: bytes &size=self.head.len.len {
self.application_id = cast<int32>(self.head.tag.type_);
}
};
head: ASN1Header;
switch (self.head.tag.class) {
ASN1Class::Universal -> body: ASN1Body(self.head, recursive);
ASN1Class::Application,
ASN1Class::ContextSpecific,
ASN1Class::Private -> application_data: bytes &size=self.head.len.len {
self.application_id = cast<int32>(self.head.tag.type_);
}
};
};

File diff suppressed because it is too large Load diff

View file

@ -6,9 +6,9 @@ import LDAP;
import zeek;
on LDAP::TlsForward::%init {
zeek::protocol_begin("SSL");
zeek::protocol_begin("SSL");
}
on LDAP::TlsForward::chunk {
zeek::protocol_data_in(zeek::is_orig(), self.chunk);
zeek::protocol_data_in(zeek::is_orig(), self.chunk);
}

File diff suppressed because it is too large Load diff

View file

@ -6,114 +6,109 @@ import spicy;
import zeek;
const OPCODE_CONTINUATION = 0x00;
const OPCODE_TEXT = 0x01;
const OPCODE_BINARY = 0x02;
const OPCODE_CLOSE = 0x08;
const OPCODE_PING = 0x09;
const OPCODE_PONG = 0x0a;
const OPCODE_TEXT = 0x01;
const OPCODE_BINARY = 0x02;
const OPCODE_CLOSE = 0x08;
const OPCODE_PING = 0x09;
const OPCODE_PONG = 0x0a;
public function fast_unmask(
masking_key_idx: uint64,
masking_key: vector<uint8>,
chunk: bytes
): bytes &cxxname="hlt_websocket::WebSocket::fast_unmask";
public function fast_unmask(masking_key_idx: uint64, masking_key: vector<uint8>, chunk: bytes): bytes &cxxname="hlt_websocket::WebSocket::fast_unmask";
type Frame = unit(m: Message) {
var payload_len: uint64;
var masking_key_idx: uint64;
var close_data: bytes;
var effective_opcode: uint8;
var payload_len: uint64;
var masking_key_idx: uint64;
var close_data: bytes;
var effective_opcode: uint8;
: bitfield(16) {
fin: 0 &convert=cast<bool>($$);
rsv: 1..3;
opcode: 4..7 &convert=cast<uint8>($$);
mask: 8 &convert=cast<bool>($$);
payload_len1: 9..15;
} &bit-order=spicy::BitOrder::MSB0;
: bitfield(16) {
fin: 0 &convert=cast<bool>($$);
rsv: 1..3;
opcode: 4..7 &convert=cast<uint8>($$);
mask: 8 &convert=cast<bool>($$);
payload_len1: 9..15;
} &bit-order=spicy::BitOrder::MSB0;
# Verify that this is either a continuation frame, or the Message does not
# yet have a non-continuation opcode, but this frame does.
: void &requires=(m.opcode != OPCODE_CONTINUATION && self.opcode == OPCODE_CONTINUATION || m.opcode == OPCODE_CONTINUATION && self.opcode != OPCODE_CONTINUATION);
# Verify that this is either a continuation frame, or the Message does not
# yet have a non-continuation opcode, but this frame does.
: void &requires=(m.opcode != OPCODE_CONTINUATION && self.opcode == OPCODE_CONTINUATION || m.opcode == OPCODE_CONTINUATION && self.opcode != OPCODE_CONTINUATION);
# Type/opcode to decide what to do with individual chunks.
: void {
self.effective_opcode = m.opcode != OPCODE_CONTINUATION ? m.opcode : self.opcode;
}
payload_len2: uint16 if (self.payload_len1 == 126);
payload_len8: uint64 if (self.payload_len1 == 127);
: void {
self.payload_len = self.payload_len1;
if ( self?.payload_len2 )
self.payload_len = self.payload_len2;
else if ( self?.payload_len8 )
self.payload_len = self.payload_len8;
}
# This being an uint8[] allows masking_key[x] indexing, while a bytes
# object would require *masking_key.at(i) which took roughly 20% more
# runtime when I tested it.
masking_key: uint8[] &size=4 if (self.mask);
chunk: bytes &size=self.payload_len &chunked {
# Don't use &convert with &chunked: https://github.com/zeek/spicy/issues/1661
if ( self.mask ) {
self.chunk = fast_unmask(self.masking_key_idx, self.masking_key, $$);
self.masking_key_idx += |$$|;
} else {
self.chunk = $$;
# Type/opcode to decide what to do with individual chunks.
: void {
self.effective_opcode = m.opcode != OPCODE_CONTINUATION ? m.opcode : self.opcode;
}
# Forward TEXT and BINARY data to dowstream analyzers.
if ( self.effective_opcode == OPCODE_TEXT || self.effective_opcode == OPCODE_BINARY )
zeek::protocol_data_in(zeek::is_orig(), $$);
payload_len2: uint16 if(self.payload_len1 == 126);
payload_len8: uint64 if(self.payload_len1 == 127);
# Accumulate the unmasked data in close_data if this a close frame
# so it can be parsed by the outer Message. It's a bit of a hack.
if ( self.effective_opcode == OPCODE_CLOSE )
self.close_data += $$;
}
: void {
self.payload_len = self.payload_len1;
if (self?.payload_len2)
self.payload_len = self.payload_len2;
else if (self?.payload_len8)
self.payload_len = self.payload_len8;
}
# This being an uint8[] allows masking_key[x] indexing, while a bytes
# object would require *masking_key.at(i) which took roughly 20% more
# runtime when I tested it.
masking_key: uint8[] &size=4 if(self.mask);
chunk: bytes &size=self.payload_len &chunked {
# Don't use &convert with &chunked: https://github.com/zeek/spicy/issues/1661
if (self.mask) {
self.chunk = fast_unmask(self.masking_key_idx, self.masking_key, $$);
self.masking_key_idx += |$$|;
} else {
self.chunk = $$;
}
# Forward TEXT and BINARY data to dowstream analyzers.
if (self.effective_opcode == OPCODE_TEXT || self.effective_opcode == OPCODE_BINARY)
zeek::protocol_data_in(zeek::is_orig(), $$);
# Accumulate the unmasked data in close_data if this a close frame
# so it can be parsed by the outer Message. It's a bit of a hack.
if (self.effective_opcode == OPCODE_CLOSE)
self.close_data += $$;
}
};
type CloseFrame = unit {
var status: uint16;
var reason: bytes;
var status: uint16;
var reason: bytes;
: bytes &eod {
if ( |$$| > 0 ) {
self.status = cast<uint16>($$.sub(0, 2).to_uint(spicy::ByteOrder::Network));
self.reason = $$.sub(2, 0);
: bytes &eod {
if (|$$| > 0) {
self.status = cast<uint16>($$.sub(0, 2).to_uint(spicy::ByteOrder::Network));
self.reason = $$.sub(2, 0);
}
}
}
};
public type Message = unit {
# transient trickery
var done: bool = False;
# transient trickery
var done: bool = False;
var opcode: uint8;
var opcode: uint8;
# Keep the first one persistent to have access
# to the payload if it's a close frame.
first_frame: Frame(self) {
self.opcode = $$.opcode;
self.done = $$.fin;
}
# Keep the first one persistent to have access
# to the payload if it's a close frame.
first_frame: Frame(self) {
self.opcode = $$.opcode;
self.done = $$.fin;
}
: Frame(self)[] &until=(self.done) if (!self.done) foreach {
self.done = $$.fin;
}
: Frame(self)[] &until=(self.done) if(!self.done) foreach {
self.done = $$.fin;
}
: CloseFrame &parse-from=self.first_frame.close_data if (self.opcode == OPCODE_CLOSE);
: CloseFrame &parse-from=self.first_frame.close_data if(self.opcode == OPCODE_CLOSE);
on %done {
spicy::accept_input();
}
on %done {
spicy::accept_input();
}
};
public type Messages = unit {
: Message[];
: Message[];
};