zeek/doc/ext/zeek_pygments.py
Tim Wojtulewicz ded98cd373 Copy docs into Zeek repo directly
This is based on commit 2731def9159247e6da8a3191783c89683363689c from the
zeek-docs repo.
2025-09-26 02:58:29 +00:00

247 lines
8.1 KiB
Python

from pygments.lexer import RegexLexer, bygroups, include, words
from pygments.token import (
Comment,
Keyword,
Literal,
Name,
Number,
Operator,
Punctuation,
String,
Text,
)
def setup(Sphinx):
return {
"parallel_read_safe": True,
}
class ZeekLexer(RegexLexer):
"""
For `Zeek <https://www.zeek.org/>`_ scripts.
.. versionadded:: 2.5
"""
name = "Zeek"
aliases = ["zeek"]
filenames = ["*.zeek"]
_hex = r"[0-9a-fA-F]"
_float = r"((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?"
_h = r"[A-Za-z0-9][-A-Za-z0-9]*"
tokens = {
"root": [
include("whitespace"),
include("comments"),
include("directives"),
include("attributes"),
include("types"),
include("keywords"),
include("literals"),
include("operators"),
include("punctuation"),
(
r"\b((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(?=\s*\()",
Name.Function,
),
include("identifiers"),
],
"whitespace": [
(r"\n", Text),
(r"\s+", Text),
(r"\\\n", Text),
],
"comments": [
(r"#.*$", Comment),
],
"directives": [
(r"(@(load-plugin|load-sigs|load|unload))\b.*$", Comment.Preproc),
(
r"(@(DEBUG|DIR|FILENAME|deprecated|if|ifdef|ifndef|else|endif))\b",
Comment.Preproc,
),
(r"(@prefixes)\s*(\+?=).*$", Comment.Preproc),
],
"attributes": [
(
words(
(
"redef",
"priority",
"log",
"optional",
"default",
"add_func",
"delete_func",
"expire_func",
"read_expire",
"write_expire",
"create_expire",
"synchronized",
"persistent",
"rotate_interval",
"rotate_size",
"encrypt",
"raw_output",
"mergeable",
"error_handler",
"broker_allow_complex_type",
"is_assigned",
"is_used",
"type_column",
"deprecated",
"on_change",
"backend",
"broker_store",
),
prefix=r"&",
suffix=r"\b",
),
Keyword.Pseudo,
),
],
"types": [
(
words(
(
"any",
"enum",
"record",
"set",
"table",
"vector",
"function",
"hook",
"event",
"addr",
"bool",
"count",
"double",
"file",
"int",
"interval",
"pattern",
"port",
"string",
"subnet",
"time",
),
prefix=r"\b",
suffix=r"\b",
),
Keyword.Type,
),
(
r"\b(opaque)(\s+)(of)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b",
bygroups(Keyword.Type, Text, Operator.Word, Text, Keyword.Type),
),
(
r"\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)(\s*)\b(record|enum)\b",
bygroups(Keyword, Text, Name.Class, Text, Operator, Text, Keyword.Type),
),
(
r"\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)",
bygroups(Keyword, Text, Name, Text, Operator),
),
(
r"\b(redef)(\s+)(record|enum)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b",
bygroups(Keyword, Text, Keyword.Type, Text, Name.Class),
),
],
"keywords": [
(
words(
(
"redef",
"export",
"if",
"else",
"for",
"while",
"return",
"break",
"next",
"continue",
"fallthrough",
"switch",
"default",
"case",
"add",
"delete",
"copy",
"when",
"timeout",
"schedule",
),
prefix=r"\b",
suffix=r"\b",
),
Keyword,
),
(r"\b(print)\b", Keyword),
(r"\b(global|local|const|option)\b", Keyword.Declaration),
(
r"\b(module)(\s+)(([A-Za-z_][A-Za-z_0-9]*)(?:::([A-Za-z_][A-Za-z_0-9]*))*)\b",
bygroups(Keyword.Namespace, Text, Name.Namespace),
),
],
"literals": [
(r'"', String, "string"),
# Not the greatest match for patterns, but generally helps
# disambiguate between start of a pattern and just a division
# operator.
(r"/(?=.*/)", String.Regex, "regex"),
(r"\b(T|F)\b", Keyword.Constant),
# Port
(r"\b\d{1,5}/(udp|tcp|icmp|unknown)\b", Number),
# IPv4 Address
(
r"\b(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\b",
Number,
),
# IPv6 Address (not 100% correct: that takes more effort)
(
r"\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2}))?\]",
Number,
),
# Numeric
(r"\b0[xX]" + _hex + r"+\b", Number.Hex),
(r"\b" + _float + r"\s*(day|hr|min|sec|msec|usec)s?\b", Literal.Date),
(r"\b" + _float + r"\b", Number.Float),
(r"\b(\d+)\b", Number.Integer),
# Hostnames
(_h + r"(\." + _h + r")+", String),
],
"operators": [
(r"[!%*/+<=>~|&^-]", Operator),
(r"([-+=&|]{2}|[+=!><-]=)", Operator),
(r"\b(in|as|is|of)\b", Operator.Word),
(r"\??\$", Operator),
# Technically, colons are often used for punctuation/separation.
# E.g. field name/type separation.
(r"[?:]", Operator),
],
"punctuation": [
(r"\?\$", Punctuation),
(r"[{}()\[\],;:.]", Punctuation),
],
"identifiers": [
(r"([a-zA-Z_]\w*)(::)", bygroups(Name, Punctuation)),
(r"[a-zA-Z_]\w*", Name),
],
"string": [
(r"\\.", String.Escape),
(r"%-?[0-9]*(\.[0-9]+)?[DTdxsefg]", String.Escape),
(r'"', String, "#pop"),
(r".", String),
],
"regex": [
(r"\\.", String.Escape),
(r"/", String.Regex, "#pop"),
(r".", String.Regex),
],
}