This commit is contained in:
Tim Wojtulewicz 2025-09-28 23:43:12 +08:00 committed by GitHub
commit 372b70a8ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
1077 changed files with 169219 additions and 29 deletions

View file

@ -34,15 +34,6 @@ jobs:
with: with:
submodules: "recursive" submodules: "recursive"
# Only reset the submodule pointer for scheduled builds. The reason to do
# this is to pick up any merge commits or anything that may have been
# missed in a merge, but not have any actual content. We don't want to do
# it otherwise because PRs should just use the submodule they're pointing
# at.
- name: Switch doc submodule to master
if: github.event_name == 'schedule'
run: cd doc && git checkout master
- name: Fetch Dependencies - name: Fetch Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
@ -119,9 +110,6 @@ jobs:
cd doc cd doc
echo "*** Running pre-commit ***"
pre-commit run -a --show-diff-on-failure --color=always
echo "*** Generating Sphinx Docs ***" echo "*** Generating Sphinx Docs ***"
make > make.out 2>&1 make > make.out 2>&1
make_status=$? make_status=$?
@ -132,7 +120,7 @@ jobs:
grep -q WARNING make.out && exit 1 grep -q WARNING make.out && exit 1
rm make.out rm make.out
- name: Push zeek-docs Changes - name: Push docs Changes
if: github.event_name == 'schedule' if: github.event_name == 'schedule'
run: | run: |
cd doc cd doc
@ -142,16 +130,6 @@ jobs:
# with a check that detects whether there's anything staged. # with a check that detects whether there's anything staged.
git diff-index --cached --quiet HEAD || { git commit -m "Generate docs" && git push; } git diff-index --cached --quiet HEAD || { git commit -m "Generate docs" && git push; }
- name: Update zeek-docs Submodule
if: github.event_name == 'schedule'
run: |
git config --global user.name zeek-bot
git config --global user.email info@zeek.org
git add doc
git status
# Similar logic here: proceed only if there's a change in the submodule.
git diff-index --cached --quiet HEAD || { git commit -m 'Update doc submodule [nomail] [skip ci]' && git push; }
- name: Send email - name: Send email
# Only send notifications for scheduled runs. Runs from pull requests # Only send notifications for scheduled runs. Runs from pull requests
# show failures in the GitHub UI. # show failures in the GitHub UI.

3
.gitignore vendored
View file

@ -3,6 +3,9 @@
build* build*
!ci/windows/build.cmd !ci/windows/build.cmd
# Don't ignore things in the docs directory
!doc/**
tmp tmp
*.gcov *.gcov

3
.gitmodules vendored
View file

@ -16,9 +16,6 @@
[submodule "auxil/netcontrol-connectors"] [submodule "auxil/netcontrol-connectors"]
path = auxil/netcontrol-connectors path = auxil/netcontrol-connectors
url = https://github.com/zeek/zeek-netcontrol url = https://github.com/zeek/zeek-netcontrol
[submodule "doc"]
path = doc
url = https://github.com/zeek/zeek-docs
[submodule "auxil/paraglob"] [submodule "auxil/paraglob"]
path = auxil/paraglob path = auxil/paraglob
url = https://github.com/zeek/paraglob url = https://github.com/zeek/paraglob

View file

@ -10,7 +10,7 @@ repos:
language: python language: python
files: '\.(h|c|cpp|cc|spicy|evt)$' files: '\.(h|c|cpp|cc|spicy|evt)$'
types: [file] types: [file]
exclude: '^(testing/btest/(Baseline|plugins|spicy|scripts)/.*|testing/builtin-plugins/.*|src/3rdparty/.*)$' exclude: '^(testing/btest/(Baseline|plugins|spicy|scripts)/.*|testing/builtin-plugins/.*|src/3rdparty/.*|doc/.*)$'
- id: btest-command-commented - id: btest-command-commented
name: Check that all BTest command lines are commented out name: Check that all BTest command lines are commented out
@ -56,4 +56,4 @@ repos:
rev: v0.26.0 rev: v0.26.0
hooks: hooks:
- id: spicy-format - id: spicy-format
exclude: '^testing/.*' exclude: '^(testing/.*|doc/devel/spicy/autogen/.*)'

16
.readthedocs.yml Normal file
View file

@ -0,0 +1,16 @@
version: 2
formats:
- htmlzip
build:
os: ubuntu-24.04
tools:
python: "3.13"
python:
install:
- requirements: doc/requirements.txt
sphinx:
configuration: doc/conf.py

1
doc

@ -1 +0,0 @@
Subproject commit 2731def9159247e6da8a3191783c89683363689c

2
doc/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
build
*.pyc

66
doc/.typos.toml Normal file
View file

@ -0,0 +1,66 @@
[default]
extend-ignore-re = [
# seh too close to she
"registered SEH to support IDL",
# ALLO is a valid FTP command
"\"ALLO\".*[0-9]{3}",
"des-ede3-cbc-Env-OID",
# On purpose
"\"THE NETBIOS NAM\"",
# NFS stuff.
"commited :zeek:type:`NFS3::stable_how_t`",
"\\/fo\\(o",
" nd\\.<br",
"\"BaR\"",
"Not-ECT",
"Ninteenth: Ninteenth",
# Connecton and file UIDs
"[CF][a-zA-Z0-9]{17}",
# Smoot
"Smoot",
"SIEM",
]
extend-ignore-identifiers-re = [
"TLS_.*_EDE.*_.*",
"SSL.*_EDE.*_.*",
"_3DES_EDE_CBC_SHA",
"GOST_R_.*",
"icmp6_nd_.*",
"pn", # Use for `PoolNode` variables
"complte_flag", # Existing use in exported record in base.
"VidP(n|N)", # In SMB.
"iin", # In DNP3.
"(ScValidatePnPService|ScSendPnPMessage)", # In DCE-RPC.
"snet", # Used as shorthand for subnet in base scripts.
"typ",
"tpe",
]
[default.extend-identifiers]
MCA_OCCURED = "MCA_OCCURED"
MNT3ERR_ACCES = "MNT3ERR_ACCES"
ND_QUEUE_OVERFLOW = "ND_QUEUE_OVERFLOW"
ND_REDIRECT = "ND_REDIRECT"
NFS3ERR_ACCES = "NFS3ERR_ACCES"
NO_SEH = "NO_SEH"
RPC_NT_CALL_FAILED_DNE = "RPC_NT_CALL_FAILED_DNE"
RpcAddPrintProvidor = "RpcAddPrintProvidor"
RpcDeletePrintProvidor = "RpcDeletePrintProvidor"
THA = "THA"
tha = "tha"
uses_seh = "uses_seh"
exat = "exat"
EXAT = "EXAT"
tpe = "tpe"
[default.extend-words]
caf = "caf"
helo = "helo"
# Seems we use this in the management framework
requestor = "requestor"
# `inout` is used as a keyword in Spicy, but looks like a typo of `input`.
inout = "inout"

5
doc/LICENSE Normal file
View file

@ -0,0 +1,5 @@
This work is licensed under the Creative Commons
Attribution 4.0 International License. To view a copy of this
license, visit https://creativecommons.org/licenses/by/4.0/ or send
a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain
View, California, 94041, USA.

37
doc/Makefile Normal file
View file

@ -0,0 +1,37 @@
SPHINXOPTS =
NUMJOBS ?= auto
all: html
doc: html
builddir:
mkdir -p build/html
clean:
rm -rf build/html
html: builddir
sphinx-build -j $(NUMJOBS) -b html $(SPHINXOPTS) . ./build/html
livehtml: builddir
sphinx-autobuild --ignore "*.git/*" --ignore "*.lock" --ignore "*.pyc" --ignore "*.swp" --ignore "*.swpx" --ignore "*.swx" -b html $(SPHINXOPTS) . ./build/html
commit:
git add * && git commit -m 'Update generated docs'
spicy-%:
git clone https://github.com/zeek/$@
check-spicy-docs: spicy-tftp
@echo Refreshing checkouts
@for REPO in $^; do (cd $$REPO && git pull && git reset HEAD --hard)>/dev/null; done
@
@echo Checking whether docs for Spicy integration are up-to-date
@./devel/spicy/autogen-spicy-docs spicy-tftp
@
@git diff --quiet devel/spicy/autogen/ \
|| (echo "Spicy docs are not up-to-date, rerun './devel/spicy/autogen-spicy-docs'." && exit 1)
.PHONY : all doc builddir clean html livehtml

132
doc/README Normal file
View file

@ -0,0 +1,132 @@
.. _zeek-docs: https://github.com/zeek/zeek-docs
.. _Read the Docs: https://docs.readthedocs.io/en/stable/index.html
.. _Zeek repo: https://github.com/zeek/zeek
.. _Sphinx: https://www.sphinx-doc.org/en/master
.. _pip: https://pypi.org/project/pip
Zeek Documentation
==================
The documentation repo at zeek-docs_
contains version-specific Zeek documentation source files that are ultimately
used as the basis for content hosted at https://docs.zeek.org.
Markup Format, Style, and Conventions
-------------------------------------
For general guidance on the basics of how the documentation is written,
consult this Zeek wiki:
https://github.com/zeek/zeek/wiki/Documentation-Style-and-Conventions
Source-Tree Organization
------------------------
The zeek-docs_ repo containing this README file is the root of a Sphinx_ source
tree and can be modified to add more documentation, style sheets, JavaScript,
etc. The Sphinx config file is ``conf.py``. The typical way new documents get
integrated is from them being referenced directly in ``index.rst`` or
indirectly from something in the ``toctree`` (Table of Contents Tree) specified
in that main index.
There is also a custom Sphinx domain implemented in ``ext/zeek.py`` which adds
some reStructureText (reST) directives and roles that aid in generating useful
index entries and cross-references. This primarily supports integration with
the script-reference sections, some of which are auto-generated by Zeek's
Doxygen-like feature, named "Zeekygen". The bulk of auto-generated content
lives under the ``scripts/`` directory or has a file name starting with
"autogenerated", so if you find yourself wanting to change those, you should
actually look at at doing those changes within the `Zeek repo`_ itself rather
than here, so see the next section for how Zeekygen docs can be (re)generated.
Generating Zeekygen Reference Docs
----------------------------------
All Zeekygen-generated docs get committed into Git, so if you don't have to
perform any changes on it and just want to preview what's already existing,
you can skip down to the next :ref:`Local Previewing <local-doc-preview>` section.
The Zeekygen documentation-generation feature is a part of Zeek itself, so
you'll want to obtain the `Zeek repo`_ from Git, read the :doc:`INSTALL
</install>` file directions to install required dependencies, and build Zeek::
git clone --recursive https://github.com/zeek/zeek
cd zeek
# Read INSTALL file and get dependencies here
./configure && make -j $(nproc)
# Make desired edits to scripts/, src/, etc.
./ci/update-zeekygen-docs.sh
The last command runs a script to generate documentation, which will end up in
the ``doc/`` subdirectory. Note that ``doc/`` is just a Git submodule of this
this zeek-docs_ repository, so you can run ``git status`` there to find exactly
what changed.
Also note that the documentation-generation script is run automatically
on a daily basis to incorporate up any documentation changes that people make
in Zeek itself without them having to necessarily be aware of the full
documentation process. The GitHub Action that does that daily task is
located in the Zeek repo's ``.github/workflows/generate-docs.yml`` file.
.. _local-doc-preview:
Local Previewing (How To Build)
-------------------------------
First make sure you have the required dependencies used for building docs:
* Python interpreter >= 3.9
* Sphinx: https://www.sphinx-doc.org/en/master/
* Read the Docs Sphinx Theme: https://github.com/rtfd/sphinx_rtd_theme
* GitPython: https://github.com/gitpython-developers/GitPython
If you have pip_, you may just use the command ``pip3 install -r
requirements.txt`` to install all the dependencies using the
``requirements.txt`` from zeek-docs_.
Now run ``make`` within the zeek-docs_ repository's top-level to locally render
its reST files into HTML. After the build completes, HTML documentation is
symlinked in ``build/html`` and you can open the ``index.html`` found there in
your web browser.
There's also a ``make livehtml`` (requires ``pip3 install sphinx-autobuild``)
target in the top-level Makefile that is useful for editing the reST files and
seeing changes rendered out live to a separate browser.
Hosting
-------
Documentation is hosted by `Read the Docs`_ (RTD), so you can generally read
about how it works there. The web-interface is accessible via
https://readthedocs.org/projects/zeek-docs.
How zeek-docs_ is configured to use RTD is a combination of some custom
settings in its ``.readthedocs.yml`` file and others only accessible through
RTD's web-interface (e.g. domain and subproject settings). Most config
settings are likely understandable just by browsing the web-interface and
RTD's guides, but a few particular points to mention:
* There is an associated, always-failing project at
https://readthedocs.org/projects/zeek. It's always-failing because
RTD redirects only activate when pages 404 and this project exists so that
all attempts to use https://zeek.rtfd.io or https://zeek.readthedocs.io
get redirected to https://docs.zeek.org. Those would have been the project
URLs if ownership of the RTD 'zeek' project was had from the start, but
it was only obtained later, after documentation already started development
in the 'zeek-docs' RTD project slug.
* Over time, page redirects have accrued into ``redirects.yml`` as a way to
help document what they are and why they happened and also as a potential
way to automate addition/reinstantiation of a large number of redirects,
but typically redirects can be manually added via the RTD web interface
first and then noted in ``redirects.yml``
* There are RTD subprojects for things like Broker, Package Manager,
and Spicy. The use of subprojects simply allows access to their RTD
docs via the custom domain of https://docs.zeek.org
* RTD will auto-build any newly-pushed commits to zeek-docs_ (i.e. a webhook is
configured), but if a tag is changed to point somewhere different, you'll
typically have to go into the RTD web interface, "Edit" the associated
version under "Versions", "wipe" the existing docs, and then manually trigger
a rebuild of that version tag under "Builds".

132
doc/README.rst Normal file
View file

@ -0,0 +1,132 @@
.. _zeek-docs: https://github.com/zeek/zeek-docs
.. _Read the Docs: https://docs.readthedocs.io/en/stable/index.html
.. _Zeek repo: https://github.com/zeek/zeek
.. _Sphinx: https://www.sphinx-doc.org/en/master
.. _pip: https://pypi.org/project/pip
Zeek Documentation
==================
The documentation repo at zeek-docs_
contains version-specific Zeek documentation source files that are ultimately
used as the basis for content hosted at https://docs.zeek.org.
Markup Format, Style, and Conventions
-------------------------------------
For general guidance on the basics of how the documentation is written,
consult this Zeek wiki:
https://github.com/zeek/zeek/wiki/Documentation-Style-and-Conventions
Source-Tree Organization
------------------------
The zeek-docs_ repo containing this README file is the root of a Sphinx_ source
tree and can be modified to add more documentation, style sheets, JavaScript,
etc. The Sphinx config file is ``conf.py``. The typical way new documents get
integrated is from them being referenced directly in ``index.rst`` or
indirectly from something in the ``toctree`` (Table of Contents Tree) specified
in that main index.
There is also a custom Sphinx domain implemented in ``ext/zeek.py`` which adds
some reStructureText (reST) directives and roles that aid in generating useful
index entries and cross-references. This primarily supports integration with
the script-reference sections, some of which are auto-generated by Zeek's
Doxygen-like feature, named "Zeekygen". The bulk of auto-generated content
lives under the ``scripts/`` directory or has a file name starting with
"autogenerated", so if you find yourself wanting to change those, you should
actually look at at doing those changes within the `Zeek repo`_ itself rather
than here, so see the next section for how Zeekygen docs can be (re)generated.
Generating Zeekygen Reference Docs
----------------------------------
All Zeekygen-generated docs get committed into Git, so if you don't have to
perform any changes on it and just want to preview what's already existing,
you can skip down to the next :ref:`Local Previewing <local-doc-preview>` section.
The Zeekygen documentation-generation feature is a part of Zeek itself, so
you'll want to obtain the `Zeek repo`_ from Git, read the :doc:`INSTALL
</install>` file directions to install required dependencies, and build Zeek::
git clone --recursive https://github.com/zeek/zeek
cd zeek
# Read INSTALL file and get dependencies here
./configure && make -j $(nproc)
# Make desired edits to scripts/, src/, etc.
./ci/update-zeekygen-docs.sh
The last command runs a script to generate documentation, which will end up in
the ``doc/`` subdirectory. Note that ``doc/`` is just a Git submodule of this
this zeek-docs_ repository, so you can run ``git status`` there to find exactly
what changed.
Also note that the documentation-generation script is run automatically
on a daily basis to incorporate up any documentation changes that people make
in Zeek itself without them having to necessarily be aware of the full
documentation process. The GitHub Action that does that daily task is
located in the Zeek repo's ``.github/workflows/generate-docs.yml`` file.
.. _local-doc-preview:
Local Previewing (How To Build)
-------------------------------
First make sure you have the required dependencies used for building docs:
* Python interpreter >= 3.9
* Sphinx: https://www.sphinx-doc.org/en/master/
* Read the Docs Sphinx Theme: https://github.com/rtfd/sphinx_rtd_theme
* GitPython: https://github.com/gitpython-developers/GitPython
If you have pip_, you may just use the command ``pip3 install -r
requirements.txt`` to install all the dependencies using the
``requirements.txt`` from zeek-docs_.
Now run ``make`` within the zeek-docs_ repository's top-level to locally render
its reST files into HTML. After the build completes, HTML documentation is
symlinked in ``build/html`` and you can open the ``index.html`` found there in
your web browser.
There's also a ``make livehtml`` (requires ``pip3 install sphinx-autobuild``)
target in the top-level Makefile that is useful for editing the reST files and
seeing changes rendered out live to a separate browser.
Hosting
-------
Documentation is hosted by `Read the Docs`_ (RTD), so you can generally read
about how it works there. The web-interface is accessible via
https://readthedocs.org/projects/zeek-docs.
How zeek-docs_ is configured to use RTD is a combination of some custom
settings in its ``.readthedocs.yml`` file and others only accessible through
RTD's web-interface (e.g. domain and subproject settings). Most config
settings are likely understandable just by browsing the web-interface and
RTD's guides, but a few particular points to mention:
* There is an associated, always-failing project at
https://readthedocs.org/projects/zeek. It's always-failing because
RTD redirects only activate when pages 404 and this project exists so that
all attempts to use https://zeek.rtfd.io or https://zeek.readthedocs.io
get redirected to https://docs.zeek.org. Those would have been the project
URLs if ownership of the RTD 'zeek' project was had from the start, but
it was only obtained later, after documentation already started development
in the 'zeek-docs' RTD project slug.
* Over time, page redirects have accrued into ``redirects.yml`` as a way to
help document what they are and why they happened and also as a potential
way to automate addition/reinstantiation of a large number of redirects,
but typically redirects can be manually added via the RTD web interface
first and then noted in ``redirects.yml``
* There are RTD subprojects for things like Broker, Package Manager,
and Spicy. The use of subprojects simply allows access to their RTD
docs via the custom domain of https://docs.zeek.org
* RTD will auto-build any newly-pushed commits to zeek-docs_ (i.e. a webhook is
configured), but if a tag is changed to point somewhere different, you'll
typically have to go into the RTD web interface, "Edit" the associated
version under "Versions", "wipe" the existing docs, and then manually trigger
a rebuild of that version tag under "Builds".

32
doc/_static/theme_overrides.css vendored Normal file
View file

@ -0,0 +1,32 @@
/* override table width restrictions */
@media screen and (min-width: 767px) {
.wy-table-responsive table td {
/* !important prevents the common CSS stylesheets from overriding
this as on RTD they are loaded after this stylesheet */
white-space: normal !important;
}
.wy-table-responsive {
overflow: visible !important;
}
}
h1, h2, h3, h4, h5, h6 {
color: #294488;
font-family: 'Open Sans',Helvetica,Arial,Lucida,sans-serif!important;
}
a {
color: #2ea3f2;
}
body {
font-family: "Open Sans",Arial,sans-serif;
color: #666;
}
div.highlight pre strong {
font-weight: 800;
background-color: #ffffcc;
}

15
doc/_templates/breadcrumbs.html vendored Normal file
View file

@ -0,0 +1,15 @@
{% extends "!breadcrumbs.html" %}
{% block breadcrumbs_aside %}
<li class="wy-breadcrumbs-aside">
{% if pagename != "search" %}
{% if display_github %}
{% if github_version == "master" %}
<a href="https://{{ github_host|default("github.com") }}/{{ github_user }}/{{ github_repo }}/edit/{{ github_version }}{{ conf_py_path }}{{ pagename }}{{ page_source_suffix }}" class="fa fa-github"> {{ _('Edit on GitHub') }}</a>
{% endif %}
{% elif show_source and has_source and sourcename %}
<a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow"> {{ _('View page source') }}</a>
{% endif %}
{% endif %}
</li>
{% endblock %}

14
doc/_templates/layout.html vendored Normal file
View file

@ -0,0 +1,14 @@
{% extends "!layout.html" %}
{% if READTHEDOCS and current_version %}
{% if current_version == "latest" or current_version == "stable"
or current_version == "master" or current_version == "current"
or current_version == "lts" or current_version == "LTS" %}
{% set current_version = current_version ~ " (" ~ version ~ ")" %}
{% endif %}
{% endif %}
{% block menu %}
{{ super() }}
<a href="{{pathto('genindex.html', 1)}}">Index</a>
{% endblock %}

256
doc/about.rst Normal file
View file

@ -0,0 +1,256 @@
==========
About Zeek
==========
What Is Zeek?
=============
Zeek is a passive, open-source network traffic analyzer. Many operators use
Zeek as a network security monitor (NSM) to support investigations of
suspicious or malicious activity. Zeek also supports a wide range of traffic
analysis tasks beyond the security domain, including performance measurement
and troubleshooting.
The first benefit a new user derives from Zeek is the extensive set of logs
describing network activity. These logs include not only a comprehensive record
of every connection seen on the wire, but also application-layer transcripts.
These include all HTTP sessions with their requested URIs, key headers, MIME
types, and server responses; DNS requests with replies; SSL certificates; key
content of SMTP sessions; and much more. By default, Zeek writes all this
information into well-structured tab-separated or JSON log files suitable for
post-processing with external software. Users can also choose to have external
databases or SIEM products consume, store, process, and present the data for
querying.
In addition to the logs, Zeek comes with built-in functionality for a range of
analysis and detection tasks, including extracting files from HTTP sessions,
detecting malware by interfacing to external registries, reporting vulnerable
versions of software seen on the network, identifying popular web applications,
detecting SSH brute-forcing, validating SSL certificate chains, and much more.
In addition to shipping such powerful functionality “out of the box,” Zeek is a
fully customizable and extensible platform for traffic analysis. Zeek provides
users a domain-specific, Turing-complete scripting language for expressing
arbitrary analysis tasks. Think of the Zeek language as a “domain-specific
Python” (or Perl): just like Python, the system comes with a large set of
pre-built functionality (the “standard library”), yet users can also put Zeek
to use in novel ways by writing custom code. Indeed, all of Zeeks default
analyses, including logging, are done via scripts; no specific analysis is
hard-coded into the core of the system.
Zeek runs on commodity hardware and hence provides a low-cost alternative to
expensive proprietary solutions. In many ways Zeek exceeds the capabilities of
other network monitoring tools, which typically remain limited to a small set
of hard-coded analysis tasks. Zeek is not a classic signature-based intrusion
detection system (IDS); while it supports such standard functionality as well,
Zeeks scripting language facilitates a much broader spectrum of very different
approaches to finding malicious activity. These include semantic misuse
detection, anomaly detection, and behavioral analysis.
A large variety of sites deploy Zeek to protect their infrastructure, including
many universities, research labs, supercomputing centers, open-science
communities, major corporations, and government agencies. Zeek specifically
targets high-speed, high-volume network monitoring, and an increasing number of
sites are now using the system to monitor their 10GE networks, with some
already moving on to 100GE links.
Zeek accommodates high-performance settings by supporting scalable
load-balancing. Large sites typically run “Zeek Clusters” in which a high-speed
front end load balancer distributes the traffic across an appropriate number of
back end PCs, all running dedicated Zeek instances on their individual traffic
slices. A central manager system coordinates the process, synchronizing state
across the back ends and providing the operators with a central management
interface for configuration and access to aggregated logs. Zeeks integrated
management framework, ZeekControl, supports such cluster setups out-of-the-box.
Zeeks cluster features support single-system and multi-system setups. That's
part of Zeeks scalability advantages. For example, administrators can scale
Zeek within one system for as long as possible, and then transparently add more
systems when necessary.
In brief, Zeek is optimized for interpreting network traffic and generating
logs based on that traffic. It is not optimized for byte matching, and users
seeking signature detection approaches would be better served by trying
intrusion detection systems such as Suricata. Zeek is also not a protocol
analyzer in the sense of Wireshark, seeking to depict every element of network
traffic at the frame level, or a system for storing traffic in packet capture
(PCAP) form. Rather, Zeek sits at the “happy medium” representing compact yet
high fidelity network logs, generating better understanding of network traffic
and usage.
Why Zeek?
=========
Zeek offers many advantages for security and network teams who want to better
understand how their infrastructure is being used.
Security teams generally depend upon four sorts of data sources when trying to
detect and respond to suspicious and malicious activity. These include *third
party* sources such as law enforcement, peers, and commercial or nonprofit
threat intelligence organizations; *network data*; *infrastructure and
application data*, including logs from cloud environments; and *endpoint data*.
Zeek is primarily a platform for collecting and analyzing the second form of
data -- network data. All four are important elements of any security teams
program, however.
When looking at data derived from the network, there are four types of data
available to analysts. As defined by the `network security monitoring paradigm
<https://corelight.blog/2019/04/30/do-you-know-your-nsm-data-types/>`_, these
four data types are *full content*, *transaction data*, *extracted content*,
and *alert data*. Using these data types, one can record traffic, summarize
traffic, extract traffic (or perhaps more accurately, extract content
in the form of files), and judge traffic, respectively.
Its critical to collect and analyze the four types of network security
monitoring data. The question becomes one of determining the best way to
accomplish this goal. Thankfully, Zeek as a NSM platform enables collection of
at least two, and in some ways three, of these data forms, namely transaction
data, extracted content, and alert data.
Zeek is best known for its transaction data. By default, when run and told to
watch a network interface, Zeek will generate a collection of compact,
high-fidelity, richly-annotated set of transaction logs. These logs describe
the protocols and activity seen on the wire, in a judgement-free,
policy-neutral manner. This documentation will spend a considerable amount of
time describing the most common Zeek log files such that readers will become
comfortable with the format and learn to apply them to their environment.
Zeek can also easily carve files from network traffic, thanks to its file
extraction capabilities. Analysts can then send those files to execution
sandboxes or other file examination tools for additional investigation. Zeek
has some capability to perform classical byte-centric intrusion detection, but
that job is best suited for packages like the open source Snort or Suricata
engines. Zeek has other capabilities however that are capable of providing
judgements in the form of alerts, through its notice mechanism.
Zeek is not optimized for writing traffic to disk in the spirit of a full
content data collection, and that task is best handled by software written to
fulfill that requirement.
Beyond the forms of network data that Zeek can natively collect and generate,
Zeek has advantages that appeared in the `What Is Zeek?`_ section. These
include its built-in functionality for a range of analysis and detection
tasks, and its status as a fully customizable and extensible platform for
traffic analysis. Zeek is also attractive because of its ability to run on
commodity hardware, giving users of all types the ability to at least try Zeek
in a low-cost manner.
History
=======
Zeek has a rich history stretching back to the 1990s. `Vern Paxson
<http://www.icir.org/vern/>`_ designed and implemented the initial version in
1995 as a researcher at the `Lawrence Berkeley National Laboratory (LBNL)
<http://www.lbl.gov/>`_. The original software was called “Bro,” as an
“Orwellian reminder that monitoring comes hand in hand with the potential
for privacy violations”.
LBNL first deployed Zeek in 1996, and the USENIX Security Symposium published
Verns original paper on Zeek in 1998, and awarded it the Best Paper Award that
year He published a refined version of the paper in 1999 as `Bro: A System for
Detecting Network Intruders in Real-Time
<http://www.icir.org/vern/papers/bro-CN99.pdf>`_.
In 2003, the `National Science Foundation (NSF) <http://www.nsf.gov/>`_ began
supporting research and advanced development on Bro at the `International
Computer Science Institute (ICSI) <http://www.icsi.berkeley.edu/>`_. (Vern
still leads the ICSI `Networking and Security group <http://www.icir.org/>`_.)
Over the years, a growing team of ICSI researchers and students kept adding
novel functions to Zeek, while LBNL continued its support with funding from the
`Department of Energy (DOE) <http://www.doe.gov/>`_. Much of Zeeks
capabilities originate in academic research projects, with results often
published at top-tier conferences. A key to Zeeks success was the projects
ability to bridge the gap between academia and operations. This relationship
helped ground research on Zeek in real-world challenges.
With a growing operational user community, the research-centric development
model eventually became a bottleneck to the systems evolution. Research
grants did not support the more mundane parts of software development and
maintenance. However, those elements were crucial for the end-user experience.
As a result, deploying Zeek required overcoming a steep learning curve.
In 2010, NSF sought to address this challenge by awarding ICSI a grant from its
Software Development for Cyberinfrastructure fund. The `National Center for
Supercomputing Applications (NCSA) <http://www.ncsa.illinois.edu/>`_ joined the
team as a core partner, and the Zeek project began to overhaul many of the
user-visible parts of the system for the 2.0 release in 2012.
After Zeek 2.0, the project enjoyed tremendous growth in new deployments across
a diverse range of settings, and the ongoing collaboration between ICSI (co-PI
Robin Sommer) and NCSA (co-PI Adam Slagell) brought a number of important
features. In 2012, Zeek added native IPv6 support, long before many enterprise
networking monitoring tools. In 2013, NSF renewed its support with a second
grant that established the Bro Center of Expertise at ICSI and NCSA, promoting
Zeek as a comprehensive, low-cost security capability for research and
education communities. To facilitate both debugging and education,
`try.zeek.org <https://try.zeek.org>`_ (formerly try.bro.org) was launched in
2014. This provided an interactive way for users to test a script with their
own packet captures against a variety of Zeek versions and easily share
sample code with others. For Zeek clusters and external communication,
the Broker communication framework was added. Last, but not least, the
Zeek package manager was created in 2016, funded by an additional grant
from the Mozilla Foundation.
In the fall of 2018, the project leadership team decided to change the name of
the software from Bro to Zeek. The leadership team desired a name that better
reflected the values of the community while avoiding the negative connotations
of so-called “bro culture” outside the computing world. The project released
version 3.0 in the fall of 2019, the first release bearing the name Zeek. The
year 2020 saw a renewed focus on community and growing the Zeek community, with
increased interaction via social media, webinars, Slack channels, and related
outreach efforts.
For a history of the project from 1995 to 2015, see Vern Paxsons talk from
BroCon 2015, `Reflecting on Twenty Years of Bro
<https://www.youtube.com/watch?v=pb9HlmV0s2A>`_.
For background on the decision to rename Bro to Zeek, see Vern Paxsons talk
from BroCon 2018, `Renaming Bro
<https://www.youtube.com/watch?v=L88ZYfjPzyk>`_.
Architecture
============
.. image:: /images/architecture.png
:align: center
:scale: 75%
At a very high level, Zeek is architecturally layered into two major
components. Its *event engine* (or *core*) reduces the incoming packet stream
into a series of higher-level *events*. These events reflect network activity
in policy-neutral terms, i.e., they describe *what* has been seen, but not
*why*, or whether it is significant.
For example, every HTTP request on the wire turns into a corresponding
:zeek:see:`http_request` event that carries with it the involved IP addresses
and ports, the URI being requested, and the HTTP version in use. The event
however does not convey any further *interpretation*, such as whether that URI
corresponds to a known malware site.
The event engine component comprises a number of subcomponents, including in
particular the packet processing pipeline consisting of: input sources,
packet analysis, session analysis, and file analysis. Input sources ingest
incoming network traffic from network interfaces. Packet analysis processes
lower-level protocols, starting all the way down at the link layer. Session
analysis handles application-layer protocols, such as HTTP, FTP, etc. File
analysis dissects the content of files transferred over sessions. The event
engine provides a plugin architecture for adding any of these from outside
of the core Zeek code base, allowing to expand Zeeks capabilities as
needed.
Semantics related to the events are derived by Zeeks second main component,
the *script interpreter*, which executes a set of *event handlers* written in
Zeeks custom scripting language. These scripts can express a sites
security policy, such as what actions to take when the monitor detects
different types of activity.
More generally scripts can derive any desired properties and statistics from
the input traffic. In fact, all of Zeeks default output comes from scripts
included in the distribution. Zeeks language comes with extensive
domain-specific types and support functionality. Crucially, Zeeks language
allows scripts to maintain state over time, enabling them to track and
correlate the evolution of what they observe across connection and host
boundaries. Zeek scripts can generate real-time alerts and also execute
arbitrary external programs on demand. One might use this functionality to
trigger an active response to an attack.

22
doc/acknowledgements.rst Normal file
View file

@ -0,0 +1,22 @@
================
Acknowledgements
================
Thanks to everyone who contributed in making Zeek's documentation
(alphabetically):
* Johanna Amann
* Richard Bejtlich
* Michael Dopheide
* Amber Graner
* Jan Grashöfer
* Christian Kreibich
* Terry Leach
* Aashish Sharma
* Jon Siwek
* Stephen Smoot
* Robin Sommer
* Aaron Soto
* Nick Turley
* Fatema Bannat Wala
* Tim Wojtulewicz

View file

@ -0,0 +1,392 @@
.. _CMake: https://www.cmake.org
.. _SWIG: https://www.swig.org
.. _Xcode: https://developer.apple.com/xcode/
.. _MacPorts: https://www.macports.org
.. _Fink: https://www.finkproject.org
.. _Homebrew: https://brew.sh
.. _downloads page: https://zeek.org/get-zeek
.. _devtoolset: https://developers.redhat.com/products/developertoolset/hello-world
.. _zkg package manager: https://docs.zeek.org/projects/package-manager/en/stable/
.. _crosstool-NG: https://crosstool-ng.github.io/
.. _CMake toolchain: https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html
.. _contribute: https://github.com/zeek/zeek/wiki/Contribution-Guide
.. _Chocolatey: https://chocolatey.org
.. _Npcap: https://npcap.com/
.. _building-from-source:
====================
Building from Source
====================
Building Zeek from source provides the most control over your build and is the
preferred approach for advanced users. We support a wide range of operating
systems and distributions. Our `support policy
<https://github.com/zeek/zeek/wiki/Platform-Support-Policy>`_ is informed by
what we can run in our CI pipelines with reasonable effort, with the current
status captured in our `support matrix
<https://github.com/zeek/zeek/wiki/Zeek-Operating-System-Support-Matrix>`_.
Required Dependencies
---------------------
Building Zeek from source requires the following dependencies, including
development headers for libraries:
* Bash (for ZeekControl and BTest)
* BIND8 library or greater (if not covered by system's libresolv)
* Bison 3.3 or greater (https://www.gnu.org/software/bison/)
* C/C++ compiler with C++17 support (GCC 8+ or Clang 9+)
* CMake 3.15 or greater (https://www.cmake.org)
* Flex (lexical analyzer generator) 2.6 or greater (https://github.com/westes/flex)
* Libpcap (https://www.tcpdump.org)
* Make
* OpenSSL (https://www.openssl.org)
* Python 3.9 or greater (https://www.python.org/)
* SWIG (https://www.swig.org)
* ZeroMQ (https://zeromq.org)
* Zlib (https://zlib.net/)
To install these, you can use:
* RPM/RedHat-based Linux:
.. code-block:: console
sudo dnf install bison cmake cppzmq-devel gcc gcc-c++ flex libpcap-devel make openssl-devel python3 python3-devel swig zlib-devel
On pre-``dnf`` systems, use ``yum`` instead. Additionally, on RHEL/CentOS 7,
you can install and activate a devtoolset_ to get access to recent GCC
versions. You will also have to install and activate CMake 3. For example:
.. code-block:: console
sudo yum install cmake3 devtoolset-7
scl enable devtoolset-7 bash
* DEB/Debian-based Linux:
.. code-block:: console
sudo apt-get install bison cmake cppzmq-dev gcc g++ flex libfl-dev libpcap-dev libssl-dev make python3 python3-dev swig zlib1g-dev
If your platform doesn't offer ``cppzmq-dev``, try ``libzmq3-dev``
instead. Zeek's build will fall back to an in-tree version of C++
bindings to ZeroMQ in that case.
* FreeBSD:
Most required dependencies should come with a minimal FreeBSD install
except for the following.
.. code-block:: console
sudo pkg install -y base64 bash bison cmake cppzmq git python3 swig
pyver=`python3 -c 'import sys; print(f"py{sys.version_info[0]}{sys.version_info[1]}")'`
sudo pkg install -y $pyver-sqlite3
* macOS:
Compiling source code on Macs requires first installing either Xcode_
or the "Command Line Tools" (which is a much smaller download). To check
if either is installed, run the ``xcode-select -p`` command. If you see
an error message, then neither is installed and you can then run
``xcode-select --install`` which will prompt you to either get Xcode (by
clicking "Get Xcode") or to install the command line tools (by
clicking "Install").
macOS comes with all required dependencies except for CMake_, SWIG_,
Bison, Flex, and OpenSSL (OpenSSL headers were removed in macOS 10.11,
therefore OpenSSL must be installed manually for macOS versions 10.11
or newer).
Distributions of these dependencies can likely be obtained from your
preferred macOS package management system (e.g. Homebrew_,
MacPorts_, or Fink_). Specifically for Homebrew, the ``bison``, ``cmake``,
``cppzmq``, ``flex``, ``swig``, and ``openssl`` packages
provide the required dependencies. For MacPorts, use the ``bison``, ``cmake``,
``cppzmq``, ``flex``, ``swig``, ``swig-python``, and ``openssl`` packages.
* Windows
Windows support is experimental. These instructions are meant as a starting
point for development on that platform, and might have issues or be missing
steps. Notify the Zeek team if any such problems arise.
Compiling on Windows requires the installation of a development environment.
Zeek currently builds on Visual Studio 2019, and you can either install the
full version including the UI tools or you can install the command-line tools
and build from a shell. The instructions below describe how to install the
command-line tools, but are not necessary if you install the full VS2019
package. You will need to install Chocolatey_ in order to install the
dependencies as instructed below. It's possible to install them from other
sources (msys2, cygwin, etc), which we leave to the reader.
Cloning the repository will also require Developer Mode to be enabled in
Windows. This is due to the existence of a number of symbolic links in the
repository. Without Developer Mode, ``git`` on Windows will ignore these
links and builds will fail. There are a couple of different ways to enable
it, and the settings may differ depending on the version of Windows.
.. code-block:: console
choco install -y --no-progress visualstudio2019buildtools --version=16.11.11.0
choco install -y --no-progress visualstudio2019-workload-vctools --version=1.0.0 --package-parameters '--add Microsoft.VisualStudio.Component.VC.ATLMFC'
choco install -y --no-progress sed
choco install -y --no-progress winflexbison3
choco install -y --no-progress msysgit
choco install -y --no-progress python
choco install -y --no-progress openssl --version=3.1.1
Once the dependencies are installed, you will need to add the Git installation
to your PATH (``C:\Program Files\Git\bin`` by default). This is needed for the
``sh`` command to be available during the build. Once all of the dependencies
are in place, you will need to open a shell (PowerShell or cmd) and add the
development environment to it. The following command is for running on an
x86_64 host.
.. code-block:: console
C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat x86_amd64
Now you can build via cmake:
.. code-block:: console
mkdir build
cd build
cmake.exe .. -DCMAKE_BUILD_TYPE=release -DENABLE_ZEEK_UNIT_TESTS=yes -DENABLE_CLUSTER_BACKEND_ZEROMQ=no -DVCPKG_TARGET_TRIPLET="x64-windows-static" -G Ninja
cmake.exe --build .
All of this is duplicated in the CI configuration for Windows which lives in
the ``ci/windows`` directory, and can be used as a reference for running the
commands by hand.
Note: By default, Windows links against the standard libpcap library from
vcpkg. This version of libpcap does not support packet capture on Windows,
unlike other platforms. In order to capture packets from live interfaces on
Windows, you will need to link against the Npcap_ library. This library is free
for personal use, but requires a paid license for commercial use or
redistribution. To link against Npcap, download the SDK from their website,
unzip it, and then pass ``-DPCAP_ROOT_DIR="<path to npcap sdk>"`` to the
initial CMake invocation for Zeek.
Note also that the ZeroMQ cluster backend is not yet supported on Windows.
Optional Dependencies
---------------------
Zeek can make use of some optional libraries and tools if they are found at
build time:
* libmaxminddb (for geolocating IP addresses)
* sendmail (enables Zeek and ZeekControl to send mail)
* curl (used by a Zeek script that implements active HTTP)
* gperftools (tcmalloc is used to improve memory and CPU usage)
* jemalloc (https://github.com/jemalloc/jemalloc)
* PF_RING (Linux only, see :ref:`pf-ring-config`)
* krb5 libraries and headers
* ipsumdump (for trace-summary; https://github.com/kohler/ipsumdump)
* hiredis (for the Redis storage backend)
Geolocation is probably the most interesting and can be installed on most
platforms by following the instructions for :ref:`address geolocation and AS
lookups <geolocation>`.
The `zkg package manager`_, included in the Zeek installation, requires
two external Python modules:
* GitPython: https://pypi.org/project/GitPython/
* semantic-version: https://pypi.org/project/semantic-version/
These install easily via pip (``pip3 install GitPython
semantic-version``) and also ship with some distributions:
* RPM/RedHat-based Linux:
.. code-block:: console
sudo yum install python3-GitPython python3-semantic_version
* DEB/Debian-based Linux:
.. code-block:: console
sudo apt-get install python3-git python3-semantic-version
``zkg`` also requires a ``git`` installation, which the above system packages
pull in as a dependency. If you install via pip, remember that you also need
``git`` itself.
Retrieving the Sources
----------------------
Zeek releases are bundled into source packages for convenience and are
available on the `downloads page`_. The source code can be manually downloaded
from the link in the ``.tar.gz`` format to the target system for installation.
If you plan to `contribute`_ to Zeek or just want to try out the latest
features under development, you should obtain Zeek's source code through its
Git repositories hosted at https://github.com/zeek:
.. code-block:: console
git clone --recurse-submodules https://github.com/zeek/zeek
.. note:: If you choose to clone the ``zeek`` repository
non-recursively for a "minimal Zeek experience", be aware that
compiling it depends on several of the other submodules as well, so
you'll likely have to build/install those independently first.
Configuring and Building
------------------------
The typical way to build and install from source is as follows:
.. code-block:: console
./configure
make
make install
If the ``configure`` script fails, then it is most likely because it either
couldn't find a required dependency or it couldn't find a sufficiently new
version of a dependency. Assuming that you already installed all required
dependencies, then you may need to use one of the ``--with-*`` options
that can be given to the ``configure`` script to help it locate a dependency.
To find out what all different options ``./configure`` supports, run
``./configure --help``.
The default installation path is ``/usr/local/zeek``, which would typically
require root privileges when doing the ``make install``. A different
installation path can be chosen by specifying the ``configure`` script
``--prefix`` option. Note that ``/usr``, ``/opt/bro/``, and ``/opt/zeek`` are
the standard prefixes for binary Zeek packages to be installed, so those are
typically not good choices unless you are creating such a package.
OpenBSD users, please see our `FAQ <https://zeek.org/faq/>`_ if you are having
problems installing Zeek.
Depending on the Zeek package you downloaded, there may be auxiliary
tools and libraries available in the ``auxil/`` directory. Some of them
will be automatically built and installed along with Zeek. There are
``--disable-*`` options that can be given to the configure script to
turn off unwanted auxiliary projects that would otherwise be installed
automatically. Finally, use ``make install-aux`` to install some of
the other programs that are in the ``auxil/zeek-aux`` directory.
Finally, if you want to build the Zeek documentation (not required, because
all of the documentation for the latest Zeek release is available at
https://docs.zeek.org), there are instructions in ``doc/README`` in the source
distribution.
Cross Compiling
---------------
Prerequisites
~~~~~~~~~~~~~
You need three things on the host system:
1. The Zeek source tree.
2. A cross-compilation toolchain, such as one built via crosstool-NG_.
3. Pre-built Zeek dependencies from the target system. This usually
includes libpcap, zlib, OpenSSL, and Python development headers
and libraries.
Configuration and Compiling
~~~~~~~~~~~~~~~~~~~~~~~~~~~
You first need to compile a few build tools native to the host system
for use during the later cross-compile build. In the root of your
Zeek source tree:
.. code-block:: console
./configure --builddir=../zeek-buildtools
( cd ../zeek-buildtools && make binpac bifcl )
Next configure Zeek to use your cross-compilation toolchain (this example
uses a Raspberry Pi as the target system):
.. code-block:: console
./configure --toolchain=/home/jon/x-tools/RaspberryPi-toolchain.cmake --with-binpac=$(pwd)/../zeek-buildtools/auxil/binpac/src/binpac --with-bifcl=$(pwd)/../zeek-buildtools/src/bifcl
Here, the :file:`RaspberryPi-toolchain.cmake` file specifies a `CMake
toolchain`_. In the toolchain file, you need to point the toolchain and
compiler at the cross-compilation toolchain. It might look something the
following:
.. code-block:: cmake
# Operating System on which CMake is targeting.
set(CMAKE_SYSTEM_NAME Linux)
# The CMAKE_STAGING_PREFIX option may not work.
# Given that Zeek is configured:
#
# ``./configure --prefix=<dir>``
#
# The options are:
#
# (1) ``make install`` and then copy over the --prefix dir from host to
# target system.
#
# (2) ``DESTDIR=<staging_dir> make install`` and then copy over the
# contents of that staging directory.
set(toolchain /home/jon/x-tools/arm-rpi-linux-gnueabihf)
set(CMAKE_C_COMPILER ${toolchain}/bin/arm-rpi-linux-gnueabihf-gcc)
set(CMAKE_CXX_COMPILER ${toolchain}/bin/arm-rpi-linux-gnueabihf-g++)
# The cross-compiler/linker will use these paths to locate dependencies.
set(CMAKE_FIND_ROOT_PATH
/home/jon/x-tools/zeek-rpi-deps
${toolchain}/arm-rpi-linux-gnueabihf/sysroot
)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
If that configuration succeeds you are ready to build:
.. code-block:: console
make
And if that works, install on your host system:
.. code-block:: console
make install
Once installed, you can copy/move the files from the installation prefix on the
host system to the target system and start running Zeek as usual.
Configuring the Run-Time Environment
====================================
You may want to adjust your :envvar:`PATH` environment variable
according to the platform/shell/package you're using since
neither :file:`/usr/local/zeek/bin/` nor :file:`/opt/zeek/bin/`
will reside in the default :envvar:`PATH`. For example:
Bourne-Shell Syntax:
.. code-block:: console
export PATH=/usr/local/zeek/bin:$PATH
C-Shell Syntax:
.. code-block:: console
setenv PATH /usr/local/zeek/bin:$PATH
Or substitute ``/opt/zeek/bin`` instead if you installed from a binary package.
Zeek supports several environment variables to adjust its behavior. Take a look
at the ``zeek --help`` output for details.

507
doc/cluster-setup.rst Normal file
View file

@ -0,0 +1,507 @@
.. _ZeekControl documentation: https://github.com/zeek/zeekctl
==================
Zeek Cluster Setup
==================
.. TODO: integrate BoZ revisions
A *Zeek Cluster* is a set of systems jointly analyzing the traffic of
a network link in a coordinated fashion. You can operate such a setup from
a central manager system easily using ZeekControl because it
hides much of the complexity of the multi-machine installation.
Cluster Architecture
====================
Zeek is not multithreaded, so once the limitations of a single processor core
are reached the only option currently is to spread the workload across many
cores, or even many physical computers. The cluster deployment scenario for
Zeek is the current solution to build these larger systems. The tools and
scripts that accompany Zeek provide the structure to easily manage many Zeek
processes examining packets and doing correlation activities but acting as
a singular, cohesive entity. This section describes the Zeek cluster
architecture. For information on how to configure a Zeek cluster,
see the documentation for `ZeekControl <https://github.com/zeek/zeekctl>`_.
Architecture
------------
The figure below illustrates the main components of a Zeek cluster.
.. image:: /images/deployment.png
For more specific information on the way Zeek processes are connected,
how they function, and how they communicate with each other, see the
:ref:`Broker Framework Documentation <broker-framework>`.
Tap
***
The tap is a mechanism that splits the packet stream in order to make a copy
available for inspection. Examples include the monitoring port on a switch
and an optical splitter on fiber networks.
Frontend
********
The frontend is a discrete hardware device or on-host technique that splits
traffic into many streams or flows. The Zeek binary does not do this job.
There are numerous ways to accomplish this task, some of which are described
below in `Frontend Options`_.
Manager
*******
The manager is a Zeek process that has two primary jobs. It receives log
messages and notices from the rest of the nodes in the cluster using the Zeek
communications protocol (note that if you use a separate logger node, then the
logger receives all logs instead of the manager). The result
is a single log instead of many discrete logs that you have to
combine in some manner with post-processing.
The manager also supports other functionality and analysis which
requires a centralized, global view of events or data.
Logger
******
A logger is an optional Zeek process that receives log messages from the
rest of the nodes in the cluster using the Zeek communications protocol.
The purpose of having a logger receive logs instead of the manager is
to reduce the load on the manager. If no logger is needed, then the
manager will receive logs instead.
Proxy
*****
A proxy is a Zeek process that may be used to offload data storage or
any arbitrary workload. A cluster may contain multiple proxy nodes.
The default scripts that come with Zeek make minimal use of proxies, so
a single one may be sufficient, but customized use of them to partition
data or workloads provides greater cluster scalability potential than
just doing similar tasks on a single, centralized Manager node.
Zeek processes acting as proxies don't tend to be extremely hard on CPU
or memory and users frequently run proxy processes on the same physical
host as the manager.
Worker
******
The worker is the Zeek process that sniffs network traffic and does protocol
analysis on the reassembled traffic streams. Most of the work of an active
cluster takes place on the workers and as such, the workers typically
represent the bulk of the Zeek processes that are running in a cluster.
The fastest memory and CPU core speed you can afford is recommended
since all of the protocol parsing and most analysis will take place here.
There are no particular requirements for the disks in workers since almost all
logging is done remotely to the manager, and normally very little is written
to disk.
Frontend Options
----------------
There are many options for setting up a frontend flow distributor. In many
cases it is beneficial to do multiple stages of flow distribution
on the network and on the host.
Discrete hardware flow balancers
********************************
cPacket
^^^^^^^
If you are monitoring one or more 10G physical interfaces, the recommended
solution is to use either a cFlow or cVu device from cPacket because they
are used successfully at a number of sites. These devices will perform
layer-2 load balancing by rewriting the destination Ethernet MAC address
to cause each packet associated with a particular flow to have the same
destination MAC. The packets can then be passed directly to a monitoring
host where each worker has a BPF filter to limit its visibility to only that
stream of flows, or onward to a commodity switch to split the traffic out to
multiple 1G interfaces for the workers. This greatly reduces
costs since workers can use relatively inexpensive 1G interfaces.
On host flow balancing
**********************
PF_RING
^^^^^^^
The PF_RING software for Linux has a "clustering" feature which will do
flow-based load balancing across a number of processes that are sniffing the
same interface. This allows you to easily take advantage of multiple
cores in a single physical host because Zeek's main event loop is single
threaded and can't natively utilize all of the cores. If you want to use
PF_RING, see the documentation on :ref:`how to configure Zeek with PF_RING
<pf-ring-config>`.
AF_PACKET
^^^^^^^^^
On Linux, Zeek supports `AF_PACKET sockets <https://docs.kernel.org/networking/packet_mmap.html>`_ natively.
Currently, this is provided by including the `external Zeek::AF_Packet plugin <https://github.com/zeek/zeek-af_packet-plugin>`_
in default builds of Zeek for Linux. Additional information can be found in
the project's README file.
To check the availability of the ``af_packet`` packet source, print its information using ``zeek -N``::
zeek -N Zeek::AF_Packet
Zeek::AF_Packet - Packet acquisition via AF_Packet (dynamic, version 3.2.0)
On FreeBSD, MacOSX, or if Zeek was built with ``--disable-af-packet``, the
plugin won't be available.
Single worker mode
""""""""""""""""""
For the most basic usage, prefix the interface with ``af_packet::`` when invoking Zeek::
zeek -i af_packet::eth0
Generally, running Zeek this way requires a privileged user with CAP_NET_RAW
and CAP_NET_ADMIN capabilities. Linux supports file-based capabilities: A
process executing an executable with capabilities will receive these.
Using this mechanism allows to run Zeek as an unprivileged user once the file
capabilities have been added::
sudo setcap cap_net_raw,cap_net_admin=+eip /path/to/zeek
Offloading and ethtool tuning
"""""""""""""""""""""""""""""
While not specific to AF_PACKET, it is recommended to disable any offloading
features provided by the network card or Linux networking stack when running
Zeek. This allows to see network packets as they arrive on the wire.
See this `blog post <https://blog.securityonion.net/2011/10/when-is-full-packet-capture-not-full.html>`_
for more background
Toggling these features can be done with the ``ethtool -K`` command, for example::
IFACE=eth0
for offload in rx tx sg tso ufo gso gro lro; do
ethtool -K $IFACE $offload off
done
Detailed statistics about the interface can be gathered via ``ethtool -S``.
For more details around the involved offloads consult the
`ethtool manpage <https://man7.org/linux/man-pages/man8/ethtool.8.html>`_.
Load balancing
""""""""""""""
The more interesting use-case is to use AF_PACKET to run multiple Zeek workers
and have their packet sockets join what is called a fanout group.
In such a setup, the network traffic is load-balanced across Zeek workers.
By default load balancing is based on symmetric flow hashes [#]_.
For example, running two Zeek workers listening on the same network interface,
each worker analyzing approximately half of the network traffic, can be done
as follows::
zeek -i af_packet::eth0 &
zeek -i af_packet::eth0 &
The fanout group is identified by an id and configurable using the
``AF_Packet::fanout_id`` constant which defaults to 23. In the example
above, both Zeek workers join the same fanout group.
.. note::
As a caveat, within the same Linux network namespace, two Zeek processes can
not use the same fanout group id for listening on different network interfaces.
If this is a setup you're planning on running, configure the fanout group
ids explicitly.
For illustration purposes, the following starts two Zeek workers each using
a different network interface and fanout group id::
zeek -i af_packet::eth0 AF_Packet::fanout_id=23 &
zeek -i af_packet::eth1 AF_Packet::fanout_id=24 &
.. warning::
Zeek workers crashing or restarting due to running out of memory can,
for a short period of time, disturb load balancing due to their packet
sockets being removed and later rejoining the fanout group.
This may be visible in Zeek logs as gaps and/or duplicated connection
entries produced by different Zeek workers.
See :ref:`cluster-configuration` for instructions how to configure AF_PACKET
with ZeekControl.
Netmap
^^^^^^
`Netmap <https://github.com/luigirizzo/netmap>`_ is a framework for fast
packet I/O that is natively supported on FreeBSD since version 10.
On Linux it can be installed as an out-of-tree kernel module.
FreeBSD
"""""""
FreeBSD's libpcap library supports netmap natively. This allows to prefix
interface names with ``netmap:`` to instruct libpcap to open the interface
in netmap mode. For example, a single Zeek worker can leverage netmap
transparently using Zeek's default packet source as follows::
zeek -i netmap:em0
.. warning::
Above command will put the em0 interface into kernel-bypass mode. Network
packets will pass directly to Zeek without being interpreted by the kernel.
If em0 is your primary network interface, this effectively disables
networking, including SSH connectivity.
If your network card supports multiple rings, individual Zeek workers can be
attached to these as well (this assumes the NIC does proper flow hashing in hardware)::
zeek -i netmap:em0-0
zeek -i netmap:em0-1
For software load balancing support, the FreeBSD source tree includes the
``lb`` tool to distribute packets into netmap pipes doing flow hashing
in user-space.
To compile and install ``lb``, ensure ``/usr/src`` is available on your
FreeBSD system, then run the following commands::
cd /usr/src/tools/tools/netmap/
make
# Installs lb into /usr/local/bin
cp /usr/obj/usr/src/`uname -m`.`uname -m`/tools/tools/netmap/lb /usr/local/bin/
To load-balance packets arriving on em0 into 4 different netmap pipes named
``zeek}0`` through ``zeek}3``, run ``lb`` as follows::
lb -i em0 -p zeek:4
410.154166 main [634] interface is em0
411.377220 main [741] successfully opened netmap:em0
411.377243 main [812] opening pipe named netmap:zeek{0/xT@1
411.379200 main [829] successfully opened pipe #1 netmap:zeek{0/xT@1 (tx slots: 1024)
411.379242 main [838] zerocopy enabled
...
Now, Zeek workers can attach to these four netmap pipes. When starting Zeek
workers manually, the respective invocations would be as follows. The ``/x``
suffix specifies exclusive mode to prevent two Zeek processes consuming packets
from the same netmap pipe::
zeek -i netmap:zeek}0/x
zeek -i netmap:zeek}1/x
zeek -i netmap:zeek}2/x
zeek -i netmap:zeek}3/x
For packet-level debugging, you can attach ``tcpdump`` to any of the netmap
pipes in read monitor mode even while Zeek workers are consuming from them::
tcpdump -i netmap:zeek}1/r
In case libpcap's netmap support is insufficient, the external
`Zeek netmap plugin <https://github.com/zeek/zeek-netmap>`_ can be installed.
.. warning::
When using the zeek-netmap plugin on FreeBSD, the interface specification given to Zeek
needs to change from ``netmap:zeek}0/x`` to ``netmap::zeek}0/x`` - a single colon more.
In the first case, Zeek uses the default libpcap packet source and passes ``netmap:zeek}0``
as interface name. In the second case, ``netmap::`` is interpreted by Zeek and
the netmap packet source is instantiated. The ``zeek}0/x`` part is used as
interface name.
Linux
"""""
While netmap isn't included in the Linux kernel, it can be installed as
an out-of-tree kernel module.
See the project's `GitHub repository <https://github.com/luigirizzo/netmap>`_
for detailed instructions. This includes the ``lb`` tool for load balancing.
On Linux, the external `zeek-netmap <https://github.com/zeek/zeek-netmap>`_
packet source plugin is required, or the system's libpcap library as used by
Zeek needs to be recompiled with native netmap support. With the netmap kernel
module loaded and the Zeek plugin installed, running a Zeek worker as follows
will leverage netmap on Linux::
zeek -i netmap::eth1
For using ``lb`` or libpcap with netmap support, refer to the commands shown
in the FreeBSD section - these are essentially the same.
.. _cluster-configuration:
Cluster Configuration
=====================
A *Zeek Cluster* is a set of systems jointly analyzing the traffic of
a network link in a coordinated fashion. You can operate such a setup from
a central manager system easily using ZeekControl because it
hides much of the complexity of the multi-machine installation.
This section gives examples of how to setup common cluster configurations
using ZeekControl. For a full reference on ZeekControl, see the
`ZeekControl documentation`_.
Preparing to Setup a Cluster
----------------------------
We refer to the user account used to set up the cluster
as the "Zeek user". When setting up a cluster the Zeek user must be set up
on all hosts, and this user must have ssh access from the manager to all
machines in the cluster, and it must work without being prompted for a
password/passphrase (for example, using ssh public key authentication).
Also, on the worker nodes this user must have access to the target
network interface in promiscuous mode.
Additional storage must be available on all hosts under the same path,
which we will call the cluster's prefix path. We refer to this directory
as ``<prefix>``. If you build Zeek from source, then ``<prefix>`` is
the directory specified with the ``--prefix`` configure option,
or ``/usr/local/zeek`` by default. The Zeek user must be able to either
create this directory or, where it already exists, must have write
permission inside this directory on all hosts.
When trying to decide how to configure the Zeek nodes, keep in mind that
there can be multiple Zeek instances running on the same host. For example,
it's possible to run a proxy and the manager on the same host. However, it is
recommended to run workers on a different machine than the manager because
workers can consume a lot of CPU resources. The maximum recommended
number of workers to run on a machine should be one or two less than
the number of CPU cores available on that machine. Using a load-balancing
method (such as PF_RING) along with CPU pinning can decrease the load on
the worker machines. Also, in order to reduce the load on the manager
process, it is recommended to have a logger in your configuration. If a
logger is defined in your cluster configuration, then it will receive logs
instead of the manager process.
Basic Cluster Configuration
---------------------------
With all prerequisites in place, perform the following steps to setup
a Zeek cluster (do this as the Zeek user on the manager host only):
- Edit the ZeekControl configuration file, ``<prefix>/etc/zeekctl.cfg``,
and change the value of any options to be more suitable for
your environment. You will most likely want to change the value of
the ``MailTo`` and ``LogRotationInterval`` options. A complete
reference of all ZeekControl options can be found in the
`ZeekControl documentation`_.
- Edit the ZeekControl node configuration file, ``<prefix>/etc/node.cfg``
to define where logger, manager, proxies, and workers are to run. For a
cluster configuration, you must comment-out (or remove) the standalone node
in that file, and either uncomment or add node entries for each node
in your cluster (logger, manager, proxy, and workers). For example, if you
wanted to run five Zeek nodes (two workers, one proxy, a logger, and a
manager) on a cluster consisting of three machines, your cluster
configuration would look like this::
[logger]
type=logger
host=10.0.0.10
[manager]
type=manager
host=10.0.0.10
[proxy-1]
type=proxy
host=10.0.0.10
[worker-1]
type=worker
host=10.0.0.11
interface=eth0
[worker-2]
type=worker
host=10.0.0.12
interface=eth0
For a complete reference of all options that are allowed in the ``node.cfg``
file, see the `ZeekControl documentation`_.
- Edit the network configuration file ``<prefix>/etc/networks.cfg``. This
file lists all of the networks which the cluster should consider as local
to the monitored environment.
- Install Zeek on all machines in the cluster using ZeekControl::
> zeekctl install
- See the `ZeekControl documentation`_
for information on setting up a cron job on the manager host that can
monitor the cluster.
AF_PACKET Cluster Configuration
-------------------------------
Since version 5.2, Zeek includes AF_PACKET as a native packet source. This
provides an easy and efficient capture mechanism for Linux users.
Adapt the worker section in ZeekControl's ``node.cfg`` file with the
following entries, assuming running four worker processes listening on ``eth0`` ::
[worker-1]
type=worker
host=10.0.0.11
interface=eth0
lb_method=af_packet
lb_procs=4
The specific options are ``lb_method=af_packet`` and ``lb_procs=4``.
If listening on two or more interfaces on the same host is a requirement,
remember to set a unique ``fanout_id`` using the node option ``af_packet_fanout_id``::
[worker-1-eth0]
type=worker
host=10.0.0.11
interface=eth0
lb_method=af_packet
lb_procs=4
af_packet_fanout_id=20
[worker-1-eth1]
type=worker
host=10.0.0.11
interface=eth1
lb_method=af_packet
lb_procs=4
af_packet_fanout_id=21
Pinning the worker processes to individual CPU cores can improve performance.
Use the node's option ``pin_cpus=4,5,6,7``, listing as many CPU numbers as
processes at appropriate offsets.
.. _pf-ring-config:
PF_RING Cluster Configuration
-----------------------------
`PF_RING <http://www.ntop.org/products/pf_ring/>`_ allows speeding up the
packet capture process by installing a new type of socket in Linux systems.
It supports 10Gbit hardware packet filtering using standard network adapters,
and user-space DNA (Direct NIC Access) for fast packet capture/transmission.
.. note::
Unless you have evaluated to specifically require PF_RING, consider using
AF_PACKET first and test if it fulfills your requirements. AF_PACKET has
been integrated into Zeek since version 5.2. It's a bit easier to get
started with as it does not require an out of tree Linux kernel module.
Head over to :ref:`cluster-pf-ring` for more details.
.. toctree::
:hidden:
cluster/pf_ring
.. [#] Some Linux kernel versions between 3.10 and 4.7 might exhibit
a bug that prevents the required symmetric hashing. The script available
in the GitHub project `can-i-use-afpacket-fanout <https://github.com/JustinAzoff/can-i-use-afpacket-fanout>`_
can be used to verify whether ``PACKET_FANOUT`` works as expected.
This issue has been fixed in all stable kernels for at least 5 years.
You're unlikely to be affected.

141
doc/cluster/pf_ring.rst Normal file
View file

@ -0,0 +1,141 @@
.. _cluster-pf-ring:
===================
PF_RING Setup Guide
===================
Installing PF_RING
******************
1. Download and install PF_RING for your system following the instructions
`here <http://www.ntop.org/get-started/download/#PF_RING>`_. The following
commands will install the PF_RING libraries and kernel module (replace
the version number 5.6.2 in this example with the version that you
downloaded)::
cd /usr/src
tar xvzf PF_RING-5.6.2.tar.gz
cd PF_RING-5.6.2/userland/lib
./configure --prefix=/opt/pfring
make install
cd ../libpcap
./configure --prefix=/opt/pfring
make install
cd ../tcpdump-4.1.1
./configure --prefix=/opt/pfring
make install
cd ../../kernel
make
make install
modprobe pf_ring enable_tx_capture=0 min_num_slots=32768
Refer to the documentation for your Linux distribution on how to load the
pf_ring module at boot time. You will need to install the PF_RING
library files and kernel module on all of the workers in your cluster.
2. Download the Zeek source code.
3. Configure and install Zeek using the following commands::
./configure --with-pcap=/opt/pfring
make
make install
4. Make sure Zeek is correctly linked to the PF_RING libpcap libraries::
ldd /usr/local/zeek/bin/zeek | grep pcap
libpcap.so.1 => /opt/pfring/lib/libpcap.so.1 (0x00007fa6d7d24000)
5. Configure ZeekControl to use PF_RING (explained below).
6. Run "zeekctl install" on the manager. This command will install Zeek and
required scripts to all machines in your cluster.
Using PF_RING
*************
In order to use PF_RING, you need to specify the correct configuration
options for your worker nodes in ZeekControl's node configuration file.
Edit the ``node.cfg`` file and specify ``lb_method=pf_ring`` for each of
your worker nodes. Next, use the ``lb_procs`` node option to specify how
many Zeek processes you'd like that worker node to run, and optionally pin
those processes to certain CPU cores with the ``pin_cpus`` option (CPU
numbering starts at zero). The correct ``pin_cpus`` setting to use is
dependent on your CPU architecture (Intel and AMD systems enumerate
processors in different ways). Using the wrong ``pin_cpus`` setting
can cause poor performance. Here is what a worker node entry should
look like when using PF_RING and CPU pinning::
[worker-1]
type=worker
host=10.0.0.50
interface=eth0
lb_method=pf_ring
lb_procs=10
pin_cpus=2,3,4,5,6,7,8,9,10,11
Using PF_RING+DNA with symmetric RSS
************************************
You must have a PF_RING+DNA license in order to do this. You can sniff
each packet only once.
1. Load the DNA NIC driver (i.e. ixgbe) on each worker host.
2. Run "ethtool -L dna0 combined 10" (this will establish 10 RSS queues
on your NIC) on each worker host. You must make sure that you set the
number of RSS queues to the same as the number you specify for the
lb_procs option in the node.cfg file.
3. On the manager, configure your worker(s) in node.cfg::
[worker-1]
type=worker
host=10.0.0.50
interface=dna0
lb_method=pf_ring
lb_procs=10
Using PF_RING+DNA with pfdnacluster_master
******************************************
You must have a PF_RING+DNA license and a libzero license in order to do
this. You can load balance between multiple applications and sniff the
same packets multiple times with different tools.
1. Load the DNA NIC driver (i.e. ixgbe) on each worker host.
2. Run "ethtool -L dna0 1" (this will establish 1 RSS queues on your NIC)
on each worker host.
3. Run the pfdnacluster_master command on each worker host. For example::
pfdnacluster_master -c 21 -i dna0 -n 10
Make sure that your cluster ID (21 in this example) matches the interface
name you specify in the node.cfg file. Also make sure that the number
of processes you're balancing across (10 in this example) matches
the lb_procs option in the node.cfg file.
4. If you are load balancing to other processes, you can use the
pfringfirstappinstance variable in zeekctl.cfg to set the first
application instance that Zeek should use. For example, if you are running
pfdnacluster_master with "-n 10,4" you would set
pfringfirstappinstance=4. Unfortunately that's still a global setting
in zeekctl.cfg at the moment but we may change that to something you can
set in node.cfg eventually.
5. On the manager, configure your worker(s) in node.cfg::
[worker-1]
type=worker
host=10.0.0.50
interface=dnacluster:21
lb_method=pf_ring
lb_procs=10

33
doc/components/index.rst Normal file
View file

@ -0,0 +1,33 @@
=============
Subcomponents
=============
To find documentation for the various subcomponents of Zeek, see their
respective GitHub repositories or documentation:
* `Spicy <https://docs.zeek.org/projects/spicy>`__
- C++ parser generator for dissecting protocols & files.
* `BinPAC <https://github.com/zeek/binpac>`__
- A protocol parser generator
* `ZeekControl <https://github.com/zeek/zeekctl>`__
- Interactive Zeek management shell
* `Zeek-Aux <https://github.com/zeek/zeek-aux>`__
- Small auxiliary tools for Zeek
* `BTest <https://github.com/zeek/btest>`__
- A system testing framework
* `Capstats <https://github.com/zeek/capstats>`__
- Command-line packet statistic tool
* `PySubnetTree <https://github.com/zeek/pysubnettree>`__
- Python module for CIDR lookups
* `trace-summary <https://github.com/zeek/trace-summary>`__
- Script for generating break-downs of network traffic
* `Broker <https://github.com/zeek/broker>`__
- Zeek's Messaging Library
- `(Docs) <https://docs.zeek.org/projects/broker>`__
* `Package Manager <https://github.com/zeek/package-manager>`__
- A package manager for Zeek
- `(Docs) <https://docs.zeek.org/projects/package-manager>`__
* `Paraglob <https://github.com/zeek/paraglob>`__
- A pattern matching data structure for Zeek.
- `(Docs) <https://github.com/zeek/paraglob/blob/master/README.md>`__

305
doc/conf.py Normal file
View file

@ -0,0 +1,305 @@
#
# Zeek documentation build configuration file, created by sphinx-quickstart
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import os
import sys
extensions = []
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath("ext"))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions += [
"zeek",
"sphinx.ext.todo",
"zeek_pygments",
"spicy-pygments",
"literal-emph",
"sphinx.ext.extlinks",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# The suffix of source filenames.
source_suffix = ".rst"
# The encoding of source files.
# source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = "index"
# General information about the project.
project = "Zeek"
copyright = "by the Zeek Project"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
#
version = "source"
try:
# Use the actual Zeek version if available
with open("../VERSION") as f:
version = f.readline().strip()
except:
try:
import re
import git
repo = git.Repo(os.path.abspath("."))
version = "git/master"
version_tag_re = r"v\d+\.\d+(\.\d+)?"
version_tags = [
t
for t in repo.tags
if t.commit == repo.head.commit and re.match(version_tag_re, str(t))
]
# Note: sorting by tag date doesn't necessarily give correct
# order in terms of version numbers, but doubtful that will ever be
# a problem (if we ever do re-tag an old version number on a given
# commit such that it is incorrectly found as the most recent version,
# we can just re-tag all the other version numbers on that same commit)
version_tags = sorted(version_tags, key=lambda t: t.tag.tagged_date)
if version_tags:
version = str(version_tags[-1])
except:
pass
# The full version, including alpha/beta/rc tags.
release = version
# In terms of the actual hyperlink URL, a more ideal/stable way to reference
# source code on GitHub would be by commit hash, but that can be tricky to
# update in a way that produces stable Sphinx/reST configuration: don't want
# to update the commit-hash for every Zeek commit unless it actually produces
# new content, and also don't want to accidentally make it easy for people to
# insert unreachable commits when manually running
# `zeek/ci/update-zeekygen-docs.sh`.
#
# We only have a few versions of docs that actually matter: `master` and
# `release/.*`, and the tip of those branches will always be in sync with
# auto-generated content by simply having `zeek/ci/update-zeekygen-docs.sh`
# change this to `release/.*` when needed.
zeek_code_version = "master"
zeek_code_url = f"https://github.com/zeek/zeek/blob/{zeek_code_version}"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
# language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
# today = ''
# Else, today_fmt is used as the format for a strftime call.
today_fmt = "%B %d, %Y"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = [".#*", "script-reference/autogenerated-*"]
# The reST default role (used for this markup: `text`) to use for all documents.
# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
show_authors = True
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
highlight_language = "none"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
html_theme = "sphinx_rtd_theme"
# Set canonical URL from the Read the Docs Domain
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
# Tell Jinja2 templates the build is running on Read the Docs
if os.environ.get("READTHEDOCS", "") == "True":
if "html_context" not in globals():
html_context = {}
html_context["READTHEDOCS"] = True
html_last_updated_fmt = "%B %d, %Y"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"analytics_id": "UA-144186885-1",
"collapse_navigation": False,
"style_external_links": True,
}
# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> Documentation".
html_title = f"Book of Zeek ({release})"
# A shorter title for the navigation bar. Default is the same as html_title.
# html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
html_logo = "images/zeek-logo-sidebar.png"
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
html_favicon = "images/zeek-favicon.ico"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
def setup(app):
app.add_css_file("theme_overrides.css")
from sphinx.highlighting import lexers
from zeek_pygments import ZeekLexer
lexers["zeek"] = ZeekLexer()
app.add_config_value("zeek-code-url", zeek_code_url, "env")
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
# html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
# html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
# html_sidebars = {
#'**': ['localtoc.html', 'sourcelink.html', 'searchbox.html'],
# }
# Additional templates that should be rendered to pages, maps page names to
# template names.
# html_additional_pages = {}
# If false, no module index is generated.
# html_domain_indices = True
# If false, no index is generated.
# html_use_index = True
# If true, the index is split into individual pages for each letter.
# html_split_index = False
# If true, links to the reST sources are added to the pages.
# html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
# html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
# html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
# html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
# html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = "zeek-docs"
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
# latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
# latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
("index", "Zeek.tex", "Zeek Documentation", "The Zeek Project", "manual"),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
# latex_use_parts = False
# If true, show page references after internal links.
# latex_show_pagerefs = False
# If true, show URL addresses after external links.
# latex_show_urls = False
# Additional stuff for the LaTeX preamble.
# latex_preamble = ''
# Documents to append as an appendix to all manuals.
# latex_appendices = []
# If false, no module index is generated.
# latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [("index", "zeek", "Zeek Documentation", ["The Zeek Project"], 1)]
# -- Options for todo plugin --------------------------------------------
todo_include_todos = True
extlinks = {
"slacklink": ("https://zeek.org/slack%s", None),
"discourselink": ("https://community.zeek.org/%s", None),
"spicylink": ("https://docs.zeek.org/projects/spicy/en/latest/%s", None),
}
extlinks_detect_hardcoded_links = True

318
doc/customizations.rst Normal file
View file

@ -0,0 +1,318 @@
.. _popular-customizations:
======================
Popular Customizations
======================
This page outlines customizations and additions that are popular
among Zeek users.
.. note::
This page lists externally-maintained Zeek packages. The Zeek team does not
provide support or maintenance for these packages. If you find bugs or have
feature requests, please reach out to the respective package maintainers directly.
You may also post in the :slacklink:`Zeek Slack <>` #packages
channel or :discourselink:`forum <>` to get help from the broader
Zeek community.
Log Enrichment
==============
Community ID
------------
.. versionadded:: 6.0
Zeek includes native `Community ID Flow Hashing`_ support. This functionality
has previously been provided through the `zeek-community-id`_ package.
.. note::
At this point, the external `zeek-community-id`_ package is still
available to support Zeek deployments running older versions. However,
the scripts provided by the package cause conflicts with those provided in
Zeek 6.0 - do not load both.
Loading the
:doc:`/scripts/policy/protocols/conn/community-id-logging.zeek`
and
:doc:`/scripts/policy/frameworks/notice/community-id.zeek`
scripts adds an additional ``community_id`` field to the
:zeek:see:`Conn::Info` and :zeek:see:`Notice::Info` record.
.. code-block:: console
$ zeek -r ./traces/get.trace protocols/conn/community-id-logging LogAscii::use_json=T
$ jq < conn.log
{
"ts": 1362692526.869344,
"uid": "CoqLmg1Ds5TE61szq1",
"id.orig_h": "141.142.228.5",
"id.orig_p": 59856,
"id.resp_h": "192.150.187.43",
"id.resp_p": 80,
"proto": "tcp",
...
"community_id": "1:yvyB8h+3dnggTZW0UEITWCst97w="
}
The Community ID Flow Hash of a :zeek:see:`conn_id` instance can be computed
with the :zeek:see:`community_id_v1` builtin function directly on the command-line
or used in custom scripts.
.. code-block:: console
$ zeek -e 'print community_id_v1([$orig_h=141.142.228.5, $orig_p=59856/tcp, $resp_h=192.150.187.43, $resp_p=80/tcp])'
1:yvyB8h+3dnggTZW0UEITWCst97w=
.. _Community ID Flow Hashing: https://github.com/corelight/community-id-spec
.. _zeek-community-id: https://github.com/corelight/zeek-community-id/>`_
.. _geolocation:
Address geolocation and AS lookups
----------------------------------
.. _libmaxminddb: https://github.com/maxmind/libmaxminddb
Zeek supports IP address geolocation as well as AS (autonomous system)
lookups. This requires two things:
* Compilation of Zeek with the `libmaxminddb`_ library and development
headers. If you're using our :ref:`Docker images <docker-images>` or
:ref:`binary packages <binary-packages>`, there's nothing to do: they ship
with GeoIP support.
* Installation of corresponding MaxMind database files on your
system.
To check whether your Zeek supports geolocation, run ``zeek-config --have-geoip``
(available since Zeek 6.2) or simply try an address lookup. The following
indicates that your Zeek lacks support:
.. code-block:: console
$ zeek -e 'lookup_location(1.2.3.4)'
error in <command line>, line 1: Zeek was not configured for GeoIP support (lookup_location(1.2.3.4))
Read on for more details about building Zeek with GeoIP support, and how to
configure access to the database files.
Building Zeek with libmaxminddb
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
If you build Zeek yourself, you need to install libmaxminddb prior to
configuring your build.
* RPM/RedHat-based Linux:
.. code-block:: console
sudo yum install libmaxminddb-devel
* DEB/Debian-based Linux:
.. code-block:: console
sudo apt-get install libmaxminddb-dev
* FreeBSD:
.. code-block:: console
sudo pkg install libmaxminddb
* Mac OS X:
You need to install from your preferred package management system
(e.g. Homebrew, MacPorts, or Fink). For Homebrew, the name of the package
that you need is libmaxminddb.
The ``configure`` script's output indicates whether it successfully located
libmaxminddb. If your system's MaxMind library resides in a non-standard path,
you may need to specify it via ``./configure --with-geoip=<path>``.
Installing and configuring GeoIP databases
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
MaxMind's databases ship as individual files that you can `download
<https://www.maxmind.com/en/accounts/current/geoip/downloads>`_ from their
website after `signing up <https://www.maxmind.com/en/geolite2/signup>`_ for an
account. Some Linux distributions also offer free databases in their package
managers.
There are three types of databases: city-level geolocation, country-level
geolocation, and mapping of IP addresses to autonomous systems (AS number and
organization). Download these and decide on a place to put them on your
file system. If you use automated tooling or system packages for the
installation, that path may be chosen for you, such as ``/usr/share/GeoIP``.
Zeek provides three ways to configure access to the databases:
* Specifying the path and filenames via script variables. Use the
:zeek:see:`mmdb_dir` variable, unset by default, to point to the directory
containing the database(s). By default Zeek looks for databases called
``GeoLite2-City.mmdb``, ``GeoLite2-Country.mmdb``, and
``GeoLite2-ASN.mmdb``. Starting with Zeek 6.2 you can adjust these names by
redefining the :zeek:see:`mmdb_city_db`, :zeek:see:`mmdb_country_db`, and
:zeek:see:`mmdb_asn_db` variables.
* Relying on Zeek's pre-configured search paths and filenames. The
:zeek:see:`mmdb_dir_fallbacks` variable contains default
search paths that Zeek will try in turn when :zeek:see:`mmdb_dir` is not
set. Prior to Zeek 6.2 these paths were hardcoded; they're now redefinable.
For geolocation, Zeek first attempts the city-level databases due to their
greater precision, and falls back to the city-level one. You can adjust the
database filenames via :zeek:see:`mmdb_city_db` and related variables, as
covered above.
* Opening databases explicitly via scripting. The
:zeek:see:`mmdb_open_location_db` and :zeek:see:`mmdb_open_asn_db`
functions take full paths to database files. Zeek only ever uses one
geolocation and one ASN database, and these loads override any databases
previously loaded. These loads can occur at any point.
Querying the databases
^^^^^^^^^^^^^^^^^^^^^^
Two built-in functions provide GeoIP functionality:
.. code-block:: zeek
function lookup_location(a:addr): geo_location
function lookup_autonomous_system(a:addr): geo_autonomous_system
:zeek:see:`lookup_location` returns a :zeek:see:`geo_location` record with
country/region/etc fields, while :zeek:see:`lookup_autonomous_system` returns a
:zeek:see:`geo_autonomous_system` record indicating the AS number and
organization. Depending on the queried IP address some fields may be
uninitialized, so you should guard access with an ``a?$b`` :ref:`existence test
<record-field-operators>`.
Zeek tests the database files for staleness. If it detects that a database has
been updated, it will automatically reload it. Zeek does not automatically add
GeoIP intelligence to its logs, but several add-on scripts and packages provide
such functionality. These include:
* The :ref:`notice framework <notice-framework>` lets you configure notice types
that you'd like to augment with location information. See
:zeek:see:`Notice::lookup_location_types` and
:zeek:see:`Notice::ACTION_ADD_GEODATA` for details.
* The :doc:`/scripts/policy/protocols/smtp/detect-suspicious-orig.zeek` and
:doc:`/scripts/policy/protocols/ssh/geo-data.zeek` policy scripts.
* Several `Zeek packages <https://packages.zeek.org>`_.
Testing
^^^^^^^
Before using the GeoIP functionality it is a good idea to verify that
everything is setup correctly. You can quickly check if the GeoIP
functionality works by running commands like these:
.. code-block:: console
zeek -e "print lookup_location(8.8.8.8);"
If you see an error message similar to "Failed to open GeoIP location database",
then your database configuration is broken. You may need to rename or move your
GeoIP database files.
Example
^^^^^^^
The following shows every FTP connection from hosts in Ohio, US:
.. code-block:: zeek
event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool)
{
local client = c$id$orig_h;
local loc = lookup_location(client);
if (loc?$region && loc$region == "OH" && loc?$country_code && loc$country_code == "US")
{
local city = loc?$city ? loc$city : "<unknown>";
print fmt("FTP Connection from:%s (%s,%s,%s)", client, city,
loc$region, loc$country_code);
}
}
Log Writers
===========
Kafka
-----
For exporting logs to `Apache Kafka`_ in a streaming fashion, the externally-maintained
`zeek-kafka`_ package is a popular choice and easy to configure. It relies on `librdkafka`_.
.. code-block:: zeek
redef Log::default_writer = Log::WRITER_KAFKAWRITER;
redef Kafka::kafka_conf += {
["metadata.broker.list"] = "192.168.0.1:9092"
};
.. _Apache Kafka: https://kafka.apache.org/
.. _zeek-kafka: https://github.com/SeisoLLC/zeek-kafka/
.. _librdkafka: https://github.com/confluentinc/librdkafka
Logging
=======
JSON Streaming Logs
-------------------
The externally-maintained `json-streaming-logs`_ package tailors Zeek
for use with log shippers like `Filebeat`_ or `fluentd`_. It configures
additional log files prefixed with ``json_streaming_``, adds ``_path``
and ``_write_ts`` fields to log records and configures log rotation
appropriately.
If you do not use a logging archive and want to stream all logs away
from the system where Zeek is running without leveraging Kafka, this
package helps you with that.
.. _json-streaming-logs: https://github.com/corelight/json-streaming-logs
.. _Filebeat: https://www.elastic.co/beats/filebeat
.. _fluentd: https://www.fluentd.org/
Long Connections
----------------
Zeek logs connection entries into the :file:`conn.log` only upon termination
or due to expiration of inactivity timeouts. Depending on the protocol and
chosen timeout values this can significantly delay the appearance of a log
entry for a given connection. The delay may be up to an hour for lingering
SSH connections or connections where the final FIN or RST packets were missed.
The `zeek-long-connections`_ package alleviates this by creating a :file:`conn_long.log`
log with the same format as :file:`conn.log`, but containing entries for connections
that have been existing for configurable intervals.
By default, the first entry for a connection is logged after 10mins. Depending on
the environment, this can be lowered as even a 10 minute delay may be significant
for detection purposes in streaming setup.
.. _zeek-long-connections: https://github.com/corelight/zeek-long-connections
Profiling and Debugging
=======================
jemalloc profiling
------------------
For investigation of memory leaks or state-growth issues within Zeek,
jemalloc's profiling is invaluable. A package providing a bit support
for configuring jemalloc's profiling facilities is `zeek-jemalloc-profiling`_.
Some general information about memory profiling exists in the :ref:`Troubleshooting <troubleshooting>`
section.
.. _zeek-jemalloc-profiling: https://github.com/JustinAzoff/zeek-jemalloc-profiling

View file

@ -0,0 +1,120 @@
.. _cluster_backend_zeromq:
======================
ZeroMQ Cluster Backend
======================
.. versionadded:: 7.1
*Experimental*
Quickstart
==========
To switch a Zeek cluster with a static cluster layout over to use ZeroMQ
as cluster backend, add the following snippet to ``local.zeek``:
.. code-block:: zeek
@load frameworks/cluster/backend/zeromq/connect
Note that the function :zeek:see:`Broker::publish` will be non-functional
and a warning emitted when used - use :zeek:see:`Cluster::publish` instead.
By default, a configuration based on hard-coded endpoints and cluster layout
information is created. For more customization, refer to the module documentation
at :doc:`cluster/backend/zeromq/main.zeek </scripts/policy/frameworks/cluster/backend/zeromq/main.zeek>`.
Architecture
============
Publish-Subscribe of Zeek Events
--------------------------------
The `ZeroMQ <https://zeromq.org/>`_ based cluster backend uses a central
XPUB/XSUB broker for publish-subscribe functionality. Zeek events published
via :zeek:see:`Cluster::publish` are distributed by this central broker to
interested nodes.
.. figure:: /images/cluster/zeromq-pubsub.png
As depicted in the figure above, each cluster node connects to the central
broker twice, once via its XPUB socket and once via its XSUB socket. This
results in two TCP connections from every cluster node to the central broker.
This setup allows every node in the cluster to see messages from all other
nodes, avoiding the need for cluster topology awareness.
.. note::
Scalability of the central broker in production setups, but for small
clusters on a single node, may be fast enough.
On a cluster node, the XPUB socket provides notifications about subscriptions
created by other nodes: For every subscription created by any node in
the cluster, the :zeek:see:`Cluster::Backend::ZeroMQ::subscription` event is
raised locally on every other node (unless another node had created the same
subscription previously).
This mechanism is used to discover the existence of other cluster nodes by
matching the topics with the prefix for node specific subscriptions as produced
by :zeek:see:`Cluster::nodeid_topic`.
As of now, the implementation of the central broker calls ZeroMQ's
``zmq::proxy()`` function to forward messages between the XPUB and
XSUB socket.
While the diagram above indicates the central broker being deployed separately
from Zeek cluster nodes, by default the manager node will start and run this
broker using a separate thread. There's nothing that would prevent from running
a long running central broker independently from the Zeek cluster nodes, however.
The serialization of Zeek events is done by the selected
:zeek:see:`Cluster::event_serializer` and is independent of ZeroMQ.
The central broker needs no knowledge about the chosen format, it is
only shuffling messages between nodes.
Logging
-------
While remote events always pass through the central broker, nodes connect and
send log writes directly to logger nodes in a cluster. The ZeroMQ cluster backend
leverages ZeroMQ's pipeline pattern for this functionality. That is, logger nodes
(including the manager if configured using :zeek:see:`Cluster::manager_is_logger`)
open a ZeroMQ PULL socket to receive log writes. All other nodes connect their
PUSH socket to all available PULL sockets. These connections are separate from
the publish-subscribe setup outlined above.
When sending log-writes over a PUSH socket, load balancing is done by ZeroMQ.
Individual cluster nodes do not have control over the decision which logger
node receives log writes at any given time.
.. figure:: /images/cluster/zeromq-logging.png
While the previous paragraph used "log writes", a single message to a logger
node actually contains a batch of log writes. The options :zeek:see:`Log::flush_interval`
and :zeek:see:`Log::write_buffer_size` control the frequency and maximum size
of these batches.
The serialization format used to encode such batches is controlled by the
selected :zeek:see:`Cluster::log_serializer` and is independent of ZeroMQ.
With the default serializer (:zeek:see:`Cluster::LOG_SERIALIZER_ZEEK_BIN_V1`),
every log batch on the wire has a header prepended that describes it. This allows
interpretation of log writes even by non-Zeek processes. This opens the possibility
to implement non-Zeek logger processes as long as the chosen serializer format
is understood by the receiving process. In the future, a JSON lines serialization
may be provided, allowing easier interpretation than a proprietary binary format.
Summary
-------
Combining the diagrams above, the connections between the different socket
types in a Zeek cluster looks something like the following.
.. figure:: /images/cluster/zeromq-cluster.png

111
doc/devel/contributors.rst Normal file
View file

@ -0,0 +1,111 @@
===================
Contributor's Guide
===================
See below for selection of some of the more common contribution guidelines
maintained directly in `Zeek wiki
<https://github.com/zeek/zeek/wiki#contributors>`_.
General Contribution Process
============================
See https://github.com/zeek/zeek/wiki/Contribution-Guide
Coding Style and Conventions
============================
See https://github.com/zeek/zeek/wiki/Coding-Style-and-Conventions
General Documentation Structure/Process
=======================================
See the :doc:`README </README>` file of https://github.com/zeek/zeek-docs
Documentation Style and Conventions
===================================
See https://github.com/zeek/zeek/wiki/Documentation-Style-and-Conventions
Checking for Memory Errors and Leaks
====================================
See https://github.com/zeek/zeek/wiki/Checking-for-Memory-Errors-and-Leaks
Maintaining long-lived forks of Zeek
====================================
Consistent formatting of the Zeek codebase is enforced automatically by
configurations tracked in the repository. Upstream updates to these
configurations can lead to formatting changes which could cause merge conflicts
for long-lived forks.
Currently the following configuration files in the root directory are used:
- ``.pre-commit-config.yaml``: Configuration for `pre-commit <https://pre-commit.com/>`_.
We use pre-commit to manage and orchestrate formatters and linters.
- ``.clang-format``: Configuration for `clang-format
<https://clang.llvm.org/docs/ClangFormat.html>`_ for formatting C++ files.
- ``.style.yapf``: Configuration for `YAPF <https://github.com/google/yapf>`_
for formatting Python files.
- ``.cmake-format.json``: Configuration for `cmake-format
<https://github.com/cheshirekow/cmake_format>`_ for formatting CMake files.
With these configuration files present ``pre-commit run --all-files`` will
install all needed formatters and reformat all files in the repository
according to the current configuration.
.. rubric:: Workflow: Zeek ``master`` branch regularly merged into fork
If Zeek's master branch is regularly merged into the fork, merge conflicts can
be resolved once and their resolution is tracked in the repository. Similarly,
we can explicitly reformat the fork once and then merge the upstream branch.
.. code-block:: sh
## Get and stage latest versions of configuration files from master.
git checkout master -- .pre-commit-config.yaml .clang-format .style.yapf .cmake-format.json
## Reformat fork according to new configuration.
pre-commit run -a
## Record reformatted state of fork.
git add -u && git commit -m 'Reformat'
# Merge in master, resolve merge conflicts as usual.
git merge master
.. rubric:: Workflow: Fork regularly rebased onto Zeek ``master`` branch
If the target for a rebase has been reformatted individual diff hunks might not
apply cleanly anymore. There are different approaches to work around that. The
approach with the least conflicts is likely to first reformat the fork
according to upstream style without pulling in changes, and only after that
rebase on upstream and resolve potential semantic conflicts.
.. code-block:: sh
# Create a commit updating the configuration files.
git checkout master -- .pre-commit-config.yaml .clang-format .style.yapf .cmake-format.json
git commit -m 'Bump formatter configurations'
# With a fork branched from upstream at commit FORK_COMMIT, rebase the
# config update commit 'Bump formatter configurations' to the start of the
# fork, but do not yet rebase on master (interactively move the last patch
# to the start of the list of patches).
git rebase -i FORK_COMMIT
# Reformat all commits according to configs at the base. We use the '--exec'
# flag of 'git rebase' to execute pre-commit after applying each patch. If
# 'git rebase' detects uncommitted changes it stops automatic progress so
# one can inspect and apply the changes.
git rebase -i FORK_COMMIT --exec 'pre-commit run --all-files'
# When this stops, inspect changes and stage them.
git add -u
# Continue rebasing. This prompts for a commit message and amends the last
# patch.
git rebase --continue
# The fork is now formatted according to upstream style. Rebase on master,
# and drop the 'Bump formatter configurations' patch from the list of patches.
git rebase -i master

21
doc/devel/index.rst Normal file
View file

@ -0,0 +1,21 @@
================
Developer Guides
================
In addition to documentation found or mentioned below, some developer-oriented
content is maintained directly in the `Zeek wiki
<https://github.com/zeek/zeek/wiki#development-guides>`_ due to the nature of
the content (e.g. the author finds it to be more dynamic, informal, meta,
transient, etc. compared to other documentation).
.. toctree::
:maxdepth: 2
plugins
spicy/index
websocket-api
Documentation Guide </README.rst>
contributors
maintainers
cluster-backend-zeromq

13
doc/devel/maintainers.rst Normal file
View file

@ -0,0 +1,13 @@
==================
Maintainer's Guide
==================
Some notable guidelines for maintainers are linked below for convenience, but
they are generally maintained directly in the `Zeek wiki
<https://github.com/zeek/zeek/wiki#maintainers>`_.
Release Process
===============
See https://github.com/zeek/zeek/wiki/Release-Process

505
doc/devel/plugins.rst Normal file
View file

@ -0,0 +1,505 @@
.. _zkg package manager: https://docs.zeek.org/projects/package-manager/en/stable/
.. _writing-plugins:
===============
Writing Plugins
===============
Zeek provides a plugin API that enables extending
the system dynamically, without modifying the core code base. That way,
custom code remains self-contained and can be maintained, compiled,
and installed independently. Currently, plugins can add the following
functionality to Zeek:
- Zeek scripts.
- Builtin functions/events/types for the scripting language.
- Protocol analyzers.
- File analyzers.
- Packet sources and packet dumpers.
- Logging framework backends.
- Input framework readers.
A plugin's functionality is available to the user just as if Zeek had
the corresponding code built-in. Indeed, internally many of Zeek's
pieces are structured as plugins as well, they are just statically
compiled into the binary rather than loaded dynamically at runtime.
.. note::
Plugins and Zeek packages are related but separate concepts. Both extend
Zeek's functionality without modifying Zeek's source code. A plugin achieves
this via compiled, native code that Zeek links into its core at runtime. A Zeek
package, on the other hand, is a modular addition to Zeek, managed via the
`zkg package manager`_, that may or may not include a plugin. More commonly,
packages consist of script-layer additions to Zeek's functionality. Packages
also feature more elaborate metadata, enabling dependencies on other packages,
Zeek versions, etc.
Quick Start
===========
Writing a basic plugin is quite straight-forward as long as one
follows a few conventions. In the following, we create a simple example
plugin that adds a new Built-In Function (BIF) to Zeek: we'll add
``rot13(s: string) : string``, a function that rotates every letter
in a string by 13 places.
Generally, a plugin comes in the form of a directory following a
certain structure. To get started, Zeek's distribution provides a
helper script ``auxil/zeek-aux/plugin-support/init-plugin`` that creates
a skeleton plugin that can then be customized. Let's use that::
# init-plugin ./rot13-plugin Demo Rot13
As you can see, the script takes three arguments. The first is a
directory inside which the plugin skeleton will be created. The second
is the namespace the plugin will live in, and the third is a descriptive
name for the plugin itself relative to the namespace. Zeek uses the
combination of namespace and name to identify a plugin. The namespace
serves to avoid naming conflicts between plugins written by independent
developers; pick, e.g., the name of your organisation. The namespaces
``Bro`` (legacy) and ``Zeek`` are reserved for functionality distributed
by the Zeek Project. In
our example, the plugin will be called ``Demo::Rot13``.
The ``init-plugin`` script puts a number of files in place. The full
layout is described later. For now, all we need is
``src/rot13.bif``. It's initially empty, but we'll add our new BIF
there as follows::
# cat src/rot13.bif
%%{
#include <cstring>
#include <cctype>
#include "zeek/util.h"
#include "zeek/ZeekString.h"
#include "zeek/Val.h"
%%}
module Demo;
function rot13%(s: string%) : string
%{
char* rot13 = util::copy_string(s->CheckString());
for ( char* p = rot13; *p; p++ )
{
char b = islower(*p) ? 'a' : 'A';
char d = *p - b + 13;
if ( d >= 13 && d <= 38 )
*p = d % 26 + b;
}
zeek::String* zs = new zeek::String(1, reinterpret_cast<byte_vec>(rot13),
strlen(rot13));
return make_intrusive<StringVal>(zs);
%}
The syntax of this file is just like any other ``*.bif`` file; we
won't go into it here.
Now we are ready to compile our plugin. The configure script will just
need to be able to find the location of either a Zeek installation-tree or
a Zeek source-tree.
When building a plugin against a Zeek installation-tree, simply have the
installation's associated ``zeek-config`` in your :envvar:`PATH` and the
configure script will detect it and use it to obtain all the information
it needs::
# which zeek-config
/usr/local/zeek/bin/zeek-config
# cd rot13-plugin
# ./configure && make
[... cmake output ...]
When building a plugin against a Zeek source-tree (which itself needs
to have first been built), the configure script has to explicitly be
told its location::
# cd rot13-plugin
# ./configure --zeek-dist=/path/to/zeek/dist && make
[... cmake output ...]
This builds the plugin in a subdirectory ``build/``. In fact, that
subdirectory *becomes* the plugin: when ``make`` finishes, ``build/``
has everything it needs for Zeek to recognize it as a dynamic plugin.
Let's try that. Once we point Zeek to the ``build/`` directory, it will
pull in our new plugin automatically, as we can check with the ``-N``
option::
# export ZEEK_PLUGIN_PATH=/path/to/rot13-plugin/build
# zeek -N
[...]
Demo::Rot13 - <Insert description> (dynamic, version 0.1.0)
[...]
That looks quite good, except for the dummy description that we should
replace with something nicer so that users will know what our plugin
is about. We do this by editing the ``config.description`` line in
``src/Plugin.cc``, like this::
[...]
plugin::Configuration Plugin::Configure()
{
plugin::Configuration config;
config.name = "Demo::Rot13";
config.description = "Caesar cipher rotating a string's letters by 13 places.";
config.version.major = 0;
config.version.minor = 1;
config.version.patch = 0;
return config;
}
[...]
Now rebuild and verify that the description is visible::
# make
[...]
# zeek -N | grep Rot13
Demo::Rot13 - Caesar cipher rotating a string's letters by 13 places. (dynamic, version 0.1.0)
Zeek can also show us what exactly the plugin provides with the
more verbose option ``-NN``::
# zeek -NN
[...]
Demo::Rot13 - Caesar cipher rotating a string's letters by 13 places. (dynamic, version 0.1.0)
[Function] Demo::rot13
[...]
There's our function. Now let's use it::
# zeek -e 'print Demo::rot13("Hello")'
Uryyb
It works. We next install the plugin along with Zeek itself, so that it
will find it directly without needing the ``ZEEK_PLUGIN_PATH``
environment variable. If we first unset the variable, the function
will no longer be available::
# unset ZEEK_PLUGIN_PATH
# zeek -e 'print Demo::rot13("Hello")'
error in <command line>, line 1: unknown identifier Demo::rot13, at or near "Demo::rot13"
Once we install it, it works again::
# make install
# zeek -e 'print Demo::rot13("Hello")'
Uryyb
The installed version went into
``<zeek-install-prefix>/lib/zeek/plugins/Demo_Rot13``.
One can distribute the plugin independently of Zeek for others to use.
To distribute in source form, just remove the ``build/`` directory
(``make distclean`` does that) and then tar up the whole ``rot13-plugin/``
directory. Others then follow the same process as above after
unpacking.
To distribute the plugin in binary form, the build process
conveniently creates a corresponding tarball in ``build/dist/``. In
this case, it's called ``Demo_Rot13-0.1.0.tar.gz``, with the version
number coming out of the ``VERSION`` file that ``init-plugin`` put
into place. The binary tarball has everything needed to run the
plugin, but no further source files. Optionally, one can include
further files by specifying them in the plugin's ``CMakeLists.txt``
through the ``zeek_plugin_dist_files`` macro; the skeleton does that
for ``README``, ``VERSION``, ``CHANGES``, and ``COPYING``. To use the
plugin through the binary tarball, just unpack it into
``<zeek-install-prefix>/lib/zeek/plugins/``. Alternatively, if you unpack
it in another location, then you need to point ``ZEEK_PLUGIN_PATH`` there.
Before distributing your plugin, you should edit some of the meta
files that ``init-plugin`` puts in place. Edit ``README`` and
``VERSION``, and update ``CHANGES`` when you make changes. Also put a
license file in place as ``COPYING``; if BSD is fine, you will find a
template in ``COPYING.edit-me``.
Plugin Directory Layout
=======================
A plugin's directory needs to follow a set of conventions so that Zeek
(1) recognizes it as a plugin, and (2) knows what to load. While
``init-plugin`` takes care of most of this, the following is the full
story. We'll use ``<base>`` to represent a plugin's top-level
directory. With the skeleton, ``<base>`` corresponds to ``build/``.
``<base>/__zeek_plugin__``
A file that marks a directory as containing a Zeek plugin. The file
must exist, and its content must consist of a single line with the
qualified name of the plugin (e.g., "Demo::Rot13").
``<base>/lib/<plugin-name>.<os>-<arch>.so``
The shared library containing the plugin's compiled code. Zeek will
load this in dynamically at run-time if OS and architecture match
the current platform.
``scripts/``
A directory with the plugin's custom Zeek scripts. When the plugin
gets activated, this directory will be automatically added to
``ZEEKPATH``, so that any scripts/modules inside can be
"@load"ed.
``scripts``/__load__.zeek
A Zeek script that will be loaded when the plugin gets activated.
When this script executes, any BIF elements that the plugin
defines will already be available. See below for more information
on activating plugins.
``scripts``/__preload__.zeek
A Zeek script that will be loaded when the plugin gets activated,
but before any BIF elements become available. See below for more
information on activating plugins.
``lib/bif/``
Directory with auto-generated Zeek scripts that declare the plugin's
BIF elements. The files here are produced by ``bifcl``.
Any other files in ``<base>`` are ignored by Zeek.
By convention, a plugin should put its custom scripts into sub folders
of ``scripts/``, i.e., ``scripts/<plugin-namespace>/<plugin-name>/<script>.zeek``
to avoid conflicts. As usual, you can then put a ``__load__.zeek`` in
there as well so that, e.g., ``@load Demo/Rot13`` could load a whole
module in the form of multiple individual scripts.
Note that in addition to the paths above, the ``init-plugin`` helper
puts some more files and directories in place that help with
development and installation (e.g., ``CMakeLists.txt``, ``Makefile``,
and source code in ``src/``). However, all these do not have a special
meaning for Zeek at runtime and aren't necessary for a plugin to
function.
``init-plugin``
===============
``init-plugin`` puts a basic plugin structure in place that follows
the above layout and augments it with a CMake build and installation
system. Plugins with this structure can be used both directly out of
their source directory (after ``make`` and setting Zeek's
``ZEEK_PLUGIN_PATH``), and when installed alongside Zeek (after ``make
install``).
Upon completion, ``init-plugin`` initializes a git repository and stages its
produced files for committing, but does not yet commit the files. This allows
you to tweak the new plugin as needed prior to the initial commit.
``make install`` copies over the ``lib`` and ``scripts`` directories,
as well as the ``__zeek_plugin__`` magic file and any further
distribution files specified in ``CMakeLists.txt`` (e.g., README,
VERSION). You can find a full list of files installed in
``build/MANIFEST``. Behind the scenes, ``make install`` really just
unpacks the binary tarball from ``build/dist`` into the destination
directory.
``init-plugin`` will never overwrite existing files. If its target
directory already exists, it will by default decline to do anything.
You can run it with ``-u`` instead to update an existing plugin,
however it will never overwrite any existing files; it will only put
in place files it doesn't find yet. To revert a file back to what
``init-plugin`` created originally, delete it first and then rerun
with ``-u``.
``init-plugin`` puts a ``configure`` script in place that wraps
``cmake`` with a more familiar configure-style configuration. By
default, the script provides two options for specifying paths to the
Zeek source (``--zeek-dist``) and to the plugin's installation directory
(``--install-root``). To extend ``configure`` with plugin-specific
options (such as search paths for its dependencies) don't edit the
script directly but instead extend ``configure.plugin``, which
``configure`` includes. That way you will be able to more easily
update ``configure`` in the future when the distribution version
changes. In ``configure.plugin`` you can use the predefined shell
function ``append_cache_entry`` to seed values into the CMake cache;
see the installed skeleton version and existing plugins for examples.
.. note::
In the past ``init-plugin`` also generated a ``zkg.meta`` file, automatically
creating a Zeek package containing a plugin. ``init-plugin`` now focuses
purely on plugins, as its name suggests. To bootstrap new Zeek packages
(possibly containing plugins), use the more featureful templating
functionality provided by the ``zkg create`` command, explained `here
<https://docs.zeek.org/projects/package-manager/en/stable/package.html>`_.
Activating a Plugin
===================
A plugin needs to be *activated* to make it available to the user.
Activating a plugin will:
1. Load the dynamic module
2. Make any BIF items available
3. Add the ``scripts/`` directory to ``ZEEKPATH``
4. Load ``scripts/__preload__.zeek``
5. Make BIF elements available to scripts.
6. Load ``scripts/__load__.zeek``
By default, Zeek will automatically activate all dynamic plugins found
in its search path ``ZEEK_PLUGIN_PATH``. However, in bare mode (``zeek
-b``), no dynamic plugins will be activated by default; instead the
user can selectively enable individual plugins in scriptland using the
``@load-plugin <qualified-plugin-name>`` directive (e.g.,
``@load-plugin Demo::Rot13``). Alternatively, one can activate a
plugin from the command-line by specifying its full name
(``Demo::Rot13``), or set the environment variable
``ZEEK_PLUGIN_ACTIVATE`` to a list of comma-separated names of
plugins to unconditionally activate, even in bare mode.
``zeek -N`` shows activated plugins separately from found but not yet
activated plugins. Note that plugins compiled statically into Zeek are
always activated, and hence show up as such even in bare mode.
Plugin Components
=================
It's easy for a plugin to provide custom scripts: just put them into
``scripts/``, as described above. The CMake infrastructure will automatically
install them, as well include them into the source and binary plugin
distributions.
Any number or combination of other components can be provided by a single
plugin. For example a plugin can provide multiple different protocol
analyzers, or both a log writer and input reader.
The best place to look for examples or templates for a specific type of plugin
component are the source code of Zeek itself since every one of its components
uses the same API as any external plugin.
Each component type also has a simple integration test, found
in the Zeek source-tree's ``testing/btest/plugins/`` directory,
that can serve useful for creating basic plugin skeletons.
Testing Plugins
===============
A plugin should come with a test suite to exercise its functionality.
The ``init-plugin`` script puts in place a basic
`BTest <https://github.com/zeek/btest>`_ setup
to start with. Initially, it comes with a single test that just checks
that Zeek loads the plugin correctly::
# cd tests
# btest -A
[ 0%] rot13.show-plugin ... ok
all 1 tests successful
You can also run this via the Makefile::
# cd ..
# make test
make -C tests
make[1]: Entering directory `tests'
all 1 tests successful
make[1]: Leaving directory `tests'
Now let's add a custom test that ensures that our BIF works correctly::
# cd tests
# cat >rot13/bif-rot13.zeek
# @TEST-EXEC: zeek %INPUT >output
# @TEST-EXEC: btest-diff output
event zeek_init()
{
print Demo::rot13("Hello");
}
Check the output::
# btest -d rot13/bif-rot13.zeek
[ 0%] rot13.bif-rot13 ... failed
% 'btest-diff output' failed unexpectedly (exit code 100)
% cat .diag
== File ===============================
Uryyb
== Error ===============================
test-diff: no baseline found.
=======================================
% cat .stderr
1 of 1 test failed
Install the baseline::
# btest -U rot13/bif-rot13.zeek
all 1 tests successful
Run the test-suite::
# btest
all 2 tests successful
Debugging Plugins
=================
If your plugin isn't loading as expected, Zeek's debugging facilities
can help illuminate what's going on. To enable, recompile Zeek
with debugging support (``./configure --enable-debug``), and
afterwards rebuild your plugin as well. If you then run Zeek with ``-B
plugins``, it will produce a file :file:`debug.log` that records details
about the process for searching, loading, and activating plugins.
To generate your own debugging output from inside your plugin, you can
add a custom debug stream by using the ``PLUGIN_DBG_LOG(<plugin>,
<args>)`` macro (defined in ``DebugLogger.h``), where ``<plugin>`` is
the ``Plugin`` instance and ``<args>`` are printf-style arguments,
just as with Zeek's standard debugging macros (grep for ``DBG_LOG`` in
Zeek's ``src/`` to see examples). At runtime, you can then activate
your plugin's debugging output with ``-B plugin-<name>``, where
``<name>`` is the name of the plugin as returned by its
``Configure()`` method, yet with the namespace-separator ``::``
replaced with a simple dash. Example: If the plugin is called
``Demo::Rot13``, use ``-B plugin-Demo-Rot13``. As usual, the debugging
output will be recorded to :file:`debug.log` if Zeek's compiled in debug
mode.
.. _building-plugins-statically:
Building Plugins Statically into Zeek
=====================================
Plugins can be built statically into a Zeek binary using the
``--include-plugins`` option passed to Zeek's ``configure``. This argument
takes a semicolon-separated list of absolute paths to plugin sources. Each
path needs to contain a ``CMakeLists.txt`` file, as is commonly the case at the
toplevel of plugin source trees, and usually also in Zeek packages. Building
plugins in this manner includes them directly into the Zeek binary
and installation. They are loaded automatically by Zeek at startup
without needing to install them separately.
Building plugins into Zeek is a handy way to build them consistently with
sanitizers, as you can use Zeek's existing ``./configure --sanitizers=...``
infrastructure to apply transparently to built-in plugins.
The configure run lists built-in plugins at the end, so you can verify
successful inclusion of your plugin there. Your plugin should also
show up in the resulting build's ``zeek -NN`` output.
Headers for built-in plugins are installed into a subdirectory of
``<zeek-install-prefix>/include/zeek/builtin-plugins`` specific to
each plugin. Scripts are installed into a subdirectory of
``<zeek-install-prefix>/share/zeek/builtin-plugins`` specific to
each plugin. The scripts directory is also automatically added to
the default ``ZEEKPATH``.
Plugin Tutorials
================
.. toctree::
:maxdepth: 1
plugins/connkey-plugin
plugins/event-metadata-plugin

View file

@ -0,0 +1,205 @@
.. _connkey-plugin:
===============================
Writing a Connection Key Plugin
===============================
.. versionadded:: 8.0
By default, Zeek looks up internal connection state using the classic five-tuple
of originator and responder IP addresses, ports, and the numeric protocol
identifier (for TCP, UDP, etc). Zeek's data structure driving this is called a
connection key, or ``ConnKey``.
In certain environments the classic five-tuple does not sufficiently distinguish
connections. Consider traffic mirrored from multiple VLANs with overlapping IP
address ranges. Concretely, a connection between 10.0.0.1 and 10.0.0.2 in one
VLAN is distinct from a connection between the same IPs in another VLAN. Here,
Zeek should include the VLAN identifier into the connection key, and you can
instruct Zeek to do so by loading the
:doc:`/scripts/policy/frameworks/conn_key/vlan_fivetuple.zeek` policy script.
Zeek's plugin API allows adding support for additional custom connection keys.
This section provides a tutorial on how to do so, using the example of VXLAN-enabled
flow tuples. If you're not familiar with plugin development, head over to the
:ref:`Writing Plugins <writing-plugins>` section.
Our goal is to implement a custom connection key to scope connections
transported within a `VXLAN <https://datatracker.ietf.org/doc/html/rfc7348/index.html>`_
tunnel by the VXLAN Network Identifier (VNI).
As a test case, we have encapsulated the `HTTP GET trace <https://github.com/zeek/zeek/raw/refs/heads/master/testing/btest/Traces/http/get.trace>`_
from the Zeek repository twice with VXLAN using VNIs 4711 and 4242, respectively,
and merged the resulting two PCAP files with the original PCAP.
The :download:`resulting PCAP <connkey-vxlan-fivetuple-plugin-src/Traces/vxlan-overlapping-http-get.pcap>`
contains three HTTP connections, two of which are VXLAN-encapsulated.
By default, Zeek will create the same connection key for the original and
encapsulated HTTP connections, since they have identical inner five-tuples.
Therefore, Zeek creates only a single ``http.log`` entry, and two entries
in ``conn.log``.
.. code-block:: shell
$ zeek -C -r Traces/vxlan-overlapping-http-get.pcap
$ zeek-cut -m uid method host uri < http.log
uid method host uri
CpWF5etn1l2rpaLu3 GET bro.org /download/CHANGES.bro-aux.txt
$ zeek-cut -m uid service history orig_pkts resp_pkts < conn.log
uid service history orig_pkts resp_pkts
Cq2CY245oGGbibJ8k9 http ShADTadtFf 21 21
CMleDu4xANIMzePYd7 vxlan D 28 0
Note that just two of the HTTP connections are encapsulated.
That is why the VXLAN connection shows only 28 packets.
Each HTTP connection has 14 packets total, 7 in each direction. Zeek aggregates
all packets into the single HTTP connection, but only 28 of them were
transported within the VXLAN tunnel connection. Note also the ``t`` and ``T``
flags in the :zeek:field:`Conn::Info$history` field. These stand for retransmissions,
caused by Zeek not discriminating between the different HTTP connections.
The plugin we'll develop below adds the VXLAN VNI to the connection key.
As a result, Zeek will correctly report three HTTP connections, tracked
and logged separately. We'll add the VNI as
:zeek:field:`vxlan_vni` to the :zeek:see:`conn_id_ctx` record, making it available
in ``http.log`` and ``conn.log`` via the ``id.ctx.vxlan_vni`` column.
After activating the plugin Zeek tracks each HTTP connection individually and
the logs will look as follows:
.. code-block:: shell
$ zeek-cut -m uid method host uri id.ctx.vxlan_vni < http.log
uid method host uri id.ctx.vxlan_vni
CBifsS2vqGEg8Fa5ac GET bro.org /download/CHANGES.bro-aux.txt 4711
CEllEz13txeSrbGqBe GET bro.org /download/CHANGES.bro-aux.txt 4242
CRfbJw1kBBvHDQQBta GET bro.org /download/CHANGES.bro-aux.txt -
$ zeek-cut -m uid service history orig_pkts resp_pkts id.ctx.vxlan_vni < conn.log
uid service history orig_pkts resp_pkts id.ctx.vxlan_vni
CRfbJw1kBBvHDQQBta http ShADadFf 7 7 -
CEllEz13txeSrbGqBe http ShADadFf 7 7 4242
CBifsS2vqGEg8Fa5ac http ShADadFf 7 7 4711
CC6Ald2LejCS1qcDy4 vxlan D 28 0 -
Implementation
==============
Adding alternative connection keys involves implementing two classes.
First, a factory class producing ``zeek::ConnKey`` instances. This
is the class created through the added ``zeek::conn_key::Component``.
Second, a custom connection key class derived from ``zeek::ConnKey``.
Instances of this class are created by the factory. This is a typical
abstract factory pattern.
Our plugin's ``Configure()`` method follows the standard pattern of setting up
basic information about the plugin and registering our own ``ConnKey`` component.
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/src/Plugin.cc
:caption: Plugin.cc
:language: cpp
:lines: 16-
:linenos:
:tab-width: 4
Next, in the ``Factory.cc`` file, we're implementing a custom ``zeek::ConnKey`` class.
This class is named ``VxlanVniConnKey`` and inherits from ``zeek::IPBasedConnKey``.
While ``zeek::ConnKey`` is technically the base class, in this tutorial we'll
derive from ``zeek::IPBasedConnKey``.
Currently, Zeek only supports IP-based connection tracking via the
``IPBasedAnalyzer`` analyzer. This analyzer requires ``zeek::IPBasedConnKey``
instances.
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/src/Factory.cc
:caption: VxlanVniConnKey class in Factory.cc
:language: cpp
:linenos:
:lines: 18-78
:tab-width: 4
The current pattern for custom connection keys is to embed the bytes used for
the ``zeek::session::detail::Key`` as a packed struct within a ``ConnKey`` instance.
We override ``DoPopulateConnIdVal()`` to set the :zeek:field:`vxlan_vni` field
of the :zeek:see:`conn_id_ctx` record value to the extracted VXLAN VNI. A small trick
employed is that we default the most significant byte of ``key.vxlan_vni`` to 0xFF.
As a VNI has only 24 bits, this allows us to determine if a VNI was actually
extracted, or whether it remained unset.
The ``DoInit()`` implementation is the actual place for connection key customization.
This is where we extract the VXLAN VNI from packet data. To do so, we're using the relatively
new ``GetAnalyzerData()`` API of the packet analysis manager.
This API allows generic access to the raw data layers analyzed by a give packet analyzer.
For our use-case, we take the most outer VXLAN layer, if any, and extract the VNI
into ``key.vxlan_vni``.
There's no requirement to use the ``GetAnalyzerData()`` API. If the ``zeek::Packet``
instance passed to ``DoInit()`` contains the needed information, e.g. VLAN identifiers
or information from the packet's raw bytes, you can use them directly.
Specifically, ``GetAnalyzerData()`` may introduce additional overhead into the
packet path that you can avoid if the information is readily available
elsewhere.
Using other Zeek APIs to determine connection key information is of course
also possible.
The next part shown concerns the ``Factory`` class itself. The
``DoConnKeyFromVal()`` method contains logic to produce a ``VxlanVniConnKey``
instance from an existing :zeek:see:`conn_id` record.
This is needed in order for the :zeek:see:`lookup_connection` builtin function to work properly.
The implementation re-uses the ``DoConnKeyFromVal()`` implementation of the
default ``fivetuple::Factory`` that our factory inherits from to extract the
classic five-tuple information.
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/src/Factory.cc
:caption: Factory class in Factory.cc
:language: cpp
:linenos:
:lines: 80-103
:tab-width: 4
Calling the ``fivetuple::Factory::DoConnKeyFromVal()`` in turn calls our
own factory's ``DoNewConnKey()`` method through virtual dispatch. Since our
factory overrides this method to always return a ``VxlanVniConnKey`` instance,
the static cast later is safe.
Last, the plugin's ``__load__.zeek`` file is shown. It includes the extension
of the :zeek:see:`conn_id_ctx` identifier by the :zeek:field:`vxlan_vni` field.
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/scripts/__load__.zeek
:caption: The conn_id redefinition in __load__.zeek
:language: zeek
:linenos:
:tab-width: 4
Using the custom Connection Key
===============================
After installing the plugin, the new connection key implementation can be
selected by redefining the script-level :zeek:see:`ConnKey::factory` variable.
This can either be done in a separate script, but we do it directly on the
command-line for simplicity. The ``ConnKey::CONNKEY_VXLAN_VNI_FIVETUPLE`` is
registered in Zeek during the plugin's ``AddComponent()`` call during
``Configure()``, where the component has the name ``VXLAN_VNI_FIVETUPLE``.
.. code-block:: shell
$ zeek -C -r Traces/vxlan-overlapping-http-get.pcap ConnKey::factory=ConnKey::CONNKEY_VXLAN_VNI_FIVETUPLE
Viewing the ``conn.log`` now shows three separate HTTP connections,
two of which have a ``vxlan_vni`` value set in their logs.
.. code-block:: shell
$ zeek-cut -m uid service history orig_pkts resp_pkts id.ctx.vxlan_vni < conn.log
uid service history orig_pkts resp_pkts id.ctx.vxlan_vni
CRfbJw1kBBvHDQQBta http ShADadFf 7 7 -
CEllEz13txeSrbGqBe http ShADadFf 7 7 4242
CBifsS2vqGEg8Fa5ac http ShADadFf 7 7 4711
CC6Ald2LejCS1qcDy4 vxlan D 28 0 -
Pretty cool, isn't it?

View file

@ -0,0 +1,9 @@
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
project(ZeekPluginConnKeyVxlanVniFivetuple)
include(ZeekPlugin)
zeek_add_plugin(
Zeek ConnKey_Vxlan_Vni_Fivetuple
SOURCES src/Factory.cc src/Plugin.cc SCRIPT_FILES scripts/__load__.zeek)

View file

@ -0,0 +1,26 @@
Copyright (c) 2025 by the Zeek Project. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,23 @@
#
# Convenience Makefile providing a few common top-level targets.
#
cmake_build_dir=build
arch=`uname -s | tr A-Z a-z`-`uname -m`
all: build-it
build-it:
( cd $(cmake_build_dir) && make )
install:
( cd $(cmake_build_dir) && make install )
clean:
( cd $(cmake_build_dir) && make clean )
distclean:
rm -rf $(cmake_build_dir)
test:
make -C tests

View file

@ -0,0 +1 @@
0.1.0

View file

@ -0,0 +1,193 @@
#!/bin/sh
#
# Wrapper for viewing/setting options that the plugin's CMake
# scripts will recognize.
#
# Don't edit this. Edit configure.plugin to add plugin-specific options.
#
set -e
command="$0 $*"
if [ -e $(dirname $0)/configure.plugin ]; then
# Include custom additions.
. $(dirname $0)/configure.plugin
fi
usage() {
cat 1>&2 <<EOF
Usage: $0 [OPTIONS]
Plugin Options:
--cmake=PATH Path to CMake binary
--zeek-dist=DIR Path to Zeek source tree
--install-root=DIR Path where to install plugin into
--with-binpac=DIR Path to BinPAC installation root
--with-broker=DIR Path to Broker installation root
--with-bifcl=PATH Path to bifcl executable
--enable-debug Compile in debugging mode
--disable-cpp-tests Don't build C++ unit tests
EOF
if type plugin_usage >/dev/null 2>&1; then
plugin_usage 1>&2
fi
echo
exit 1
}
# Function to append a CMake cache entry definition to the
# CMakeCacheEntries variable
# $1 is the cache entry variable name
# $2 is the cache entry variable type
# $3 is the cache entry variable value
append_cache_entry() {
CMakeCacheEntries="$CMakeCacheEntries -D $1:$2=$3"
}
# set defaults
builddir=build
zeekdist=""
installroot="default"
zeek_plugin_begin_opts=""
CMakeCacheEntries=""
while [ $# -ne 0 ]; do
case "$1" in
-*=*) optarg=$(echo "$1" | sed 's/[-_a-zA-Z0-9]*=//') ;;
*) optarg= ;;
esac
case "$1" in
--help | -h)
usage
;;
--cmake=*)
CMakeCommand=$optarg
;;
--zeek-dist=*)
zeekdist=$(cd $optarg && pwd)
;;
--install-root=*)
installroot=$optarg
;;
--with-binpac=*)
append_cache_entry BinPAC_ROOT_DIR PATH $optarg
binpac_root=$optarg
;;
--with-broker=*)
append_cache_entry BROKER_ROOT_DIR PATH $optarg
broker_root=$optarg
;;
--with-bifcl=*)
append_cache_entry BifCl_EXE PATH $optarg
;;
--enable-debug)
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
;;
--disable-cpp-tests)
zeek_plugin_begin_opts="DISABLE_CPP_TESTS;$zeek_plugin_begin_opts"
;;
*)
if type plugin_option >/dev/null 2>&1; then
plugin_option $1 && shift && continue
fi
echo "Invalid option '$1'. Try $0 --help to see available options."
exit 1
;;
esac
shift
done
if [ -z "$CMakeCommand" ]; then
# prefer cmake3 over "regular" cmake (cmake == cmake2 on RHEL)
if command -v cmake3 >/dev/null 2>&1; then
CMakeCommand="cmake3"
elif command -v cmake >/dev/null 2>&1; then
CMakeCommand="cmake"
else
echo "This plugin requires CMake, please install it first."
echo "Then you may use this script to configure the CMake build."
echo "Note: pass --cmake=PATH to use cmake in non-standard locations."
exit 1
fi
fi
if [ -z "$zeekdist" ]; then
if type zeek-config >/dev/null 2>&1; then
zeek_config="zeek-config"
else
echo "Either 'zeek-config' must be in PATH or '--zeek-dist=<path>' used"
exit 1
fi
append_cache_entry BRO_CONFIG_PREFIX PATH $(${zeek_config} --prefix)
append_cache_entry BRO_CONFIG_INCLUDE_DIR PATH $(${zeek_config} --include_dir)
append_cache_entry BRO_CONFIG_PLUGIN_DIR PATH $(${zeek_config} --plugin_dir)
append_cache_entry BRO_CONFIG_LIB_DIR PATH $(${zeek_config} --lib_dir)
append_cache_entry BRO_CONFIG_CMAKE_DIR PATH $(${zeek_config} --cmake_dir)
append_cache_entry CMAKE_MODULE_PATH PATH $(${zeek_config} --cmake_dir)
build_type=$(${zeek_config} --build_type)
if [ "$build_type" = "debug" ]; then
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
fi
if [ -z "$binpac_root" ]; then
append_cache_entry BinPAC_ROOT_DIR PATH $(${zeek_config} --binpac_root)
fi
if [ -z "$broker_root" ]; then
append_cache_entry BROKER_ROOT_DIR PATH $(${zeek_config} --broker_root)
fi
else
if [ ! -e "$zeekdist/zeek-path-dev.in" ]; then
echo "$zeekdist does not appear to be a valid Zeek source tree."
exit 1
fi
# BRO_DIST is the canonical/historical name used by plugin CMake scripts
# ZEEK_DIST doesn't serve a function at the moment, but set/provided anyway
append_cache_entry BRO_DIST PATH $zeekdist
append_cache_entry ZEEK_DIST PATH $zeekdist
append_cache_entry CMAKE_MODULE_PATH PATH $zeekdist/cmake
fi
if [ "$installroot" != "default" ]; then
mkdir -p $installroot
append_cache_entry BRO_PLUGIN_INSTALL_ROOT PATH $installroot
fi
if [ -n "$zeek_plugin_begin_opts" ]; then
append_cache_entry ZEEK_PLUGIN_BEGIN_OPTS STRING "$zeek_plugin_begin_opts"
fi
if type plugin_addl >/dev/null 2>&1; then
plugin_addl
fi
echo "Build Directory : $builddir"
echo "Zeek Source Directory : $zeekdist"
mkdir -p $builddir
cd $builddir
"$CMakeCommand" $CMakeCacheEntries ..
echo "# This is the command used to configure this build" >config.status
echo $command >>config.status
chmod u+x config.status

View file

@ -0,0 +1,3 @@
redef record conn_id_ctx += {
vxlan_vni: count &log &optional;
};

View file

@ -0,0 +1,105 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "Factory.h"
#include <memory>
#include "zeek/ID.h"
#include "zeek/Val.h"
#include "zeek/iosource/Packet.h"
#include "zeek/packet_analysis/Analyzer.h"
#include "zeek/packet_analysis/Manager.h"
#include "zeek/packet_analysis/protocol/ip/conn_key/IPBasedConnKey.h"
#include "zeek/packet_analysis/protocol/ip/conn_key/fivetuple/Factory.h"
#include "zeek/util-types.h"
namespace zeek::conn_key::vxlan_vni_fivetuple {
class VxlanVniConnKey : public zeek::IPBasedConnKey {
public:
VxlanVniConnKey() {
// Ensure padding holes in the key struct are filled with zeroes.
memset(static_cast<void*>(&key), 0, sizeof(key));
}
detail::PackedConnTuple& PackedTuple() override { return key.tuple; }
const detail::PackedConnTuple& PackedTuple() const override { return key.tuple; }
protected:
zeek::session::detail::Key DoSessionKey() const override {
return {reinterpret_cast<const void*>(&key), sizeof(key), session::detail::Key::CONNECTION_KEY_TYPE};
}
void DoPopulateConnIdVal(zeek::RecordVal& conn_id, zeek::RecordVal& ctx) override {
// Base class populates conn_id fields (orig_h, orig_p, resp_h, resp_p)
zeek::IPBasedConnKey::DoPopulateConnIdVal(conn_id, ctx);
if ( conn_id.GetType() != id::conn_id )
return;
if ( (key.vxlan_vni & 0xFF000000) == 0 ) // High-bits unset: Have VNI
ctx.Assign(GetVxlanVniOffset(), static_cast<zeek_uint_t>(key.vxlan_vni));
else
ctx.Remove(GetVxlanVniOffset());
}
// Extract VNI from most outer VXLAN layer.
void DoInit(const Packet& pkt) override {
static const auto& analyzer = zeek::packet_mgr->GetAnalyzer("VXLAN");
// Set the high-bits: This is needed because keys can get reused.
key.vxlan_vni = 0xFF000000;
if ( ! analyzer || ! analyzer->IsEnabled() )
return;
auto spans = zeek::packet_mgr->GetAnalyzerData(analyzer);
if ( spans.empty() || spans[0].size() < 8 )
return;
key.vxlan_vni = spans[0][4] << 16 | spans[0][5] << 8 | spans[0][6];
}
static int GetVxlanVniOffset() {
static const auto& conn_id_ctx = zeek::id::find_type<zeek::RecordType>("conn_id_ctx");
static int vxlan_vni_offset = conn_id_ctx->FieldOffset("vxlan_vni");
return vxlan_vni_offset;
}
private:
friend class Factory;
struct {
struct detail::PackedConnTuple tuple;
uint32_t vxlan_vni;
} __attribute__((packed, aligned)) key; // packed and aligned due to usage for hashing
};
zeek::ConnKeyPtr Factory::DoNewConnKey() const { return std::make_unique<VxlanVniConnKey>(); }
zeek::expected<zeek::ConnKeyPtr, std::string> Factory::DoConnKeyFromVal(const zeek::Val& v) const {
if ( v.GetType() != id::conn_id )
return zeek::unexpected<std::string>{"unexpected value type"};
auto ck = zeek::conn_key::fivetuple::Factory::DoConnKeyFromVal(v);
if ( ! ck.has_value() )
return ck;
int vxlan_vni_offset = VxlanVniConnKey::GetVxlanVniOffset();
static int ctx_offset = id::conn_id->FieldOffset("ctx");
auto* k = static_cast<VxlanVniConnKey*>(ck.value().get());
auto* ctx = v.AsRecordVal()->GetFieldAs<zeek::RecordVal>(ctx_offset);
if ( vxlan_vni_offset < 0 )
return zeek::unexpected<std::string>{"missing vlxan_vni field"};
if ( ctx->HasField(vxlan_vni_offset) )
k->key.vxlan_vni = ctx->GetFieldAs<zeek::CountVal>(vxlan_vni_offset);
return ck;
}
} // namespace zeek::conn_key::vxlan_vni_fivetuple

View file

@ -0,0 +1,18 @@
#pragma once
#include "zeek/ConnKey.h"
#include "zeek/packet_analysis/protocol/ip/conn_key/fivetuple/Factory.h"
namespace zeek::conn_key::vxlan_vni_fivetuple {
class Factory : public zeek::conn_key::fivetuple::Factory {
public:
static zeek::conn_key::FactoryPtr Instantiate() { return std::make_unique<Factory>(); }
private:
// Returns a VxlanVniConnKey instance.
zeek::ConnKeyPtr DoNewConnKey() const override;
zeek::expected<zeek::ConnKeyPtr, std::string> DoConnKeyFromVal(const zeek::Val& v) const override;
};
} // namespace zeek::conn_key::vxlan_vni_fivetuple

View file

@ -0,0 +1,26 @@
#include "Plugin.h"
#include <zeek/conn_key/Component.h>
#include "Factory.h"
namespace plugin {
namespace Zeek_ConnKey_Vxlan_Vni_Fivetuple {
Plugin plugin;
}
} // namespace plugin
using namespace plugin::Zeek_ConnKey_Vxlan_Vni_Fivetuple;
zeek::plugin::Configuration Plugin::Configure() {
zeek::plugin::Configuration config;
config.name = "Zeek::ConnKey_Vxlan_Vni_Fivetuple";
config.description = "ConnKey implementation using the most outer VXLAN VNI";
config.version = {0, 1, 0};
AddComponent(new zeek::conn_key::Component("VXLAN_VNI_FIVETUPLE",
zeek::conn_key::vxlan_vni_fivetuple::Factory::Instantiate));
return config;
}

View file

@ -0,0 +1,17 @@
#pragma once
#include <zeek/plugin/Plugin.h>
namespace plugin {
namespace Zeek_ConnKey_Vxlan_Vni_Fivetuple {
class Plugin : public zeek::plugin::Plugin {
protected:
zeek::plugin::Configuration Configure() override;
};
extern Plugin plugin;
} // namespace Zeek_ConnKey_Vxlan_Vni_Fivetuple
} // namespace plugin

View file

@ -0,0 +1,3 @@
build
*.log
.state

View file

@ -0,0 +1,9 @@
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
project(ZeekPluginEventLatency)
include(ZeekPlugin)
zeek_add_plugin(
Zeek EventLatency
SOURCES src/Plugin.cc SCRIPT_FILES scripts/__load__.zeek)

View file

@ -0,0 +1,26 @@
Copyright (c) 2025 by the Zeek Project. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,23 @@
#
# Convenience Makefile providing a few common top-level targets.
#
cmake_build_dir=build
arch=`uname -s | tr A-Z a-z`-`uname -m`
all: build-it
build-it:
( cd $(cmake_build_dir) && make )
install:
( cd $(cmake_build_dir) && make install )
clean:
( cd $(cmake_build_dir) && make clean )
distclean:
rm -rf $(cmake_build_dir)
test:
make -C tests

View file

@ -0,0 +1 @@
0.1.0

View file

@ -0,0 +1,193 @@
#!/bin/sh
#
# Wrapper for viewing/setting options that the plugin's CMake
# scripts will recognize.
#
# Don't edit this. Edit configure.plugin to add plugin-specific options.
#
set -e
command="$0 $*"
if [ -e $(dirname $0)/configure.plugin ]; then
# Include custom additions.
. $(dirname $0)/configure.plugin
fi
usage() {
cat 1>&2 <<EOF
Usage: $0 [OPTIONS]
Plugin Options:
--cmake=PATH Path to CMake binary
--zeek-dist=DIR Path to Zeek source tree
--install-root=DIR Path where to install plugin into
--with-binpac=DIR Path to BinPAC installation root
--with-broker=DIR Path to Broker installation root
--with-bifcl=PATH Path to bifcl executable
--enable-debug Compile in debugging mode
--disable-cpp-tests Don't build C++ unit tests
EOF
if type plugin_usage >/dev/null 2>&1; then
plugin_usage 1>&2
fi
echo
exit 1
}
# Function to append a CMake cache entry definition to the
# CMakeCacheEntries variable
# $1 is the cache entry variable name
# $2 is the cache entry variable type
# $3 is the cache entry variable value
append_cache_entry() {
CMakeCacheEntries="$CMakeCacheEntries -D $1:$2=$3"
}
# set defaults
builddir=build
zeekdist=""
installroot="default"
zeek_plugin_begin_opts=""
CMakeCacheEntries=""
while [ $# -ne 0 ]; do
case "$1" in
-*=*) optarg=$(echo "$1" | sed 's/[-_a-zA-Z0-9]*=//') ;;
*) optarg= ;;
esac
case "$1" in
--help | -h)
usage
;;
--cmake=*)
CMakeCommand=$optarg
;;
--zeek-dist=*)
zeekdist=$(cd $optarg && pwd)
;;
--install-root=*)
installroot=$optarg
;;
--with-binpac=*)
append_cache_entry BinPAC_ROOT_DIR PATH $optarg
binpac_root=$optarg
;;
--with-broker=*)
append_cache_entry BROKER_ROOT_DIR PATH $optarg
broker_root=$optarg
;;
--with-bifcl=*)
append_cache_entry BifCl_EXE PATH $optarg
;;
--enable-debug)
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
;;
--disable-cpp-tests)
zeek_plugin_begin_opts="DISABLE_CPP_TESTS;$zeek_plugin_begin_opts"
;;
*)
if type plugin_option >/dev/null 2>&1; then
plugin_option $1 && shift && continue
fi
echo "Invalid option '$1'. Try $0 --help to see available options."
exit 1
;;
esac
shift
done
if [ -z "$CMakeCommand" ]; then
# prefer cmake3 over "regular" cmake (cmake == cmake2 on RHEL)
if command -v cmake3 >/dev/null 2>&1; then
CMakeCommand="cmake3"
elif command -v cmake >/dev/null 2>&1; then
CMakeCommand="cmake"
else
echo "This plugin requires CMake, please install it first."
echo "Then you may use this script to configure the CMake build."
echo "Note: pass --cmake=PATH to use cmake in non-standard locations."
exit 1
fi
fi
if [ -z "$zeekdist" ]; then
if type zeek-config >/dev/null 2>&1; then
zeek_config="zeek-config"
else
echo "Either 'zeek-config' must be in PATH or '--zeek-dist=<path>' used"
exit 1
fi
append_cache_entry BRO_CONFIG_PREFIX PATH $(${zeek_config} --prefix)
append_cache_entry BRO_CONFIG_INCLUDE_DIR PATH $(${zeek_config} --include_dir)
append_cache_entry BRO_CONFIG_PLUGIN_DIR PATH $(${zeek_config} --plugin_dir)
append_cache_entry BRO_CONFIG_LIB_DIR PATH $(${zeek_config} --lib_dir)
append_cache_entry BRO_CONFIG_CMAKE_DIR PATH $(${zeek_config} --cmake_dir)
append_cache_entry CMAKE_MODULE_PATH PATH $(${zeek_config} --cmake_dir)
build_type=$(${zeek_config} --build_type)
if [ "$build_type" = "debug" ]; then
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
fi
if [ -z "$binpac_root" ]; then
append_cache_entry BinPAC_ROOT_DIR PATH $(${zeek_config} --binpac_root)
fi
if [ -z "$broker_root" ]; then
append_cache_entry BROKER_ROOT_DIR PATH $(${zeek_config} --broker_root)
fi
else
if [ ! -e "$zeekdist/zeek-path-dev.in" ]; then
echo "$zeekdist does not appear to be a valid Zeek source tree."
exit 1
fi
# BRO_DIST is the canonical/historical name used by plugin CMake scripts
# ZEEK_DIST doesn't serve a function at the moment, but set/provided anyway
append_cache_entry BRO_DIST PATH $zeekdist
append_cache_entry ZEEK_DIST PATH $zeekdist
append_cache_entry CMAKE_MODULE_PATH PATH $zeekdist/cmake
fi
if [ "$installroot" != "default" ]; then
mkdir -p $installroot
append_cache_entry BRO_PLUGIN_INSTALL_ROOT PATH $installroot
fi
if [ -n "$zeek_plugin_begin_opts" ]; then
append_cache_entry ZEEK_PLUGIN_BEGIN_OPTS STRING "$zeek_plugin_begin_opts"
fi
if type plugin_addl >/dev/null 2>&1; then
plugin_addl
fi
echo "Build Directory : $builddir"
echo "Zeek Source Directory : $zeekdist"
mkdir -p $builddir
cd $builddir
"$CMakeCommand" $CMakeCacheEntries ..
echo "# This is the command used to configure this build" >config.status
echo $command >>config.status
chmod u+x config.status

View file

@ -0,0 +1,11 @@
module EventLatency;
redef enum EventMetadata::ID += {
## Identifier for the absolute time at which Zeek published this event.
WALLCLOCK_TIMESTAMP = 10001000,
};
event zeek_init()
{
assert EventMetadata::register(WALLCLOCK_TIMESTAMP, time);
}

View file

@ -0,0 +1 @@
# Empty

View file

@ -0,0 +1,65 @@
#include "Plugin.h"
#include <zeek/Event.h>
#include <zeek/Val.h>
#include <zeek/cluster/Backend.h>
#include <zeek/plugin/Plugin.h>
#include <zeek/telemetry/Manager.h>
namespace plugin {
namespace Zeek_EventLatency {
Plugin plugin;
}
} // namespace plugin
using namespace plugin::Zeek_EventLatency;
zeek::plugin::Configuration Plugin::Configure() {
zeek::plugin::Configuration config;
config.name = "Zeek::EventLatency";
config.description = "Track remote event latencies";
config.version = {0, 1, 0};
EnableHook(zeek::plugin::HOOK_PUBLISH_EVENT);
EnableHook(zeek::plugin::HOOK_QUEUE_EVENT);
return config;
}
void Plugin::InitPostScript() {
double bounds[] = {0.0002, 0.0004, 0.0006, 0.0008, 0.0010, 0.0012, 0.0014, 0.0016, 0.0018, 0.0020};
histogram =
zeek::telemetry_mgr->HistogramInstance("zeek", "cluster_event_latency_seconds", {}, bounds, "event latency");
}
bool Plugin::HookPublishEvent(zeek::cluster::Backend& backend, const std::string& topic,
zeek::cluster::detail::Event& event) {
static const auto& wallclock_id = zeek::id::find_val<zeek::EnumVal>("EventLatency::WALLCLOCK_TIMESTAMP");
auto now_val = zeek::make_intrusive<zeek::TimeVal>(zeek::util::current_time(/*real=*/true));
if ( ! event.AddMetadata(wallclock_id, now_val) )
zeek::reporter->FatalError("failed to add wallclock timestamp metadata");
return true;
}
bool Plugin::HookQueueEvent(zeek::Event* event) {
static const auto& wallclock_id = zeek::id::find_val<zeek::EnumVal>("EventLatency::WALLCLOCK_TIMESTAMP");
if ( event->Source() == zeek::util::detail::SOURCE_LOCAL )
return false;
auto timestamps = event->MetadataValues(wallclock_id);
if ( timestamps->Size() > 0 ) {
double remote_ts = timestamps->ValAt(0)->AsTime();
auto now = zeek::util::current_time(/*real=*/true);
auto latency = std::max(0.0, now - remote_ts);
histogram->Observe(latency);
}
else
zeek::reporter->Warning("missing wallclock timestamp metadata");
return false;
}

View file

@ -0,0 +1,29 @@
#pragma once
#include <zeek/plugin/Plugin.h>
#include <zeek/telemetry/Histogram.h>
namespace plugin {
namespace Zeek_EventLatency {
class Plugin : public zeek::plugin::Plugin {
protected:
// Overridden from zeek::plugin::Plugin.
zeek::plugin::Configuration Configure() override;
void InitPostScript() override;
bool HookPublishEvent(zeek::cluster::Backend& backend, const std::string& topic,
zeek::cluster::detail::Event& event) override;
bool HookQueueEvent(zeek::Event* event) override;
private:
zeek::telemetry::HistogramPtr histogram;
};
extern Plugin plugin;
} // namespace Zeek_EventLatency
} // namespace plugin

View file

@ -0,0 +1,103 @@
.. _event-metadata-plugin:
=====================
Event Metadata Plugin
=====================
.. versionadded:: 8.0
Zeek's plugin API allows adding metadata to Zeek events. In the Zeek-script
layer, the :zeek:see:`EventMetadata::current` and :zeek:see:`EventMetadata::current_all`
functions can be used to introspect metadata attached to events. In a Zeek cluster,
metadata is transported via remote events for consumption by other Zeek nodes.
This section describes the functionality in form of a tutorial. We'll
be using custom event metadata to track the latency of Zeek events in a
cluster and expose them as a Prometheus histogram.
If you're unfamiliar with plugin development, head over to the
:ref:`Writing Plugins <writing-plugins>` section. For more information
about telemetry and Prometheus, see also the :ref:`Telemetry framework's <framework-telemetry>`
documentation.
Registering Metadata
====================
Initially, we make Zeek's core aware of the metadata to attach to events. This
requires two steps.
First, redefining the :zeek:see:`EventMetadata::ID` enumeration with our
custom enumeration value ``WALLCLOCK_TIMESTAMP``. This is our metadata identifier.
Its value represents the Unix timestamps when an event was published.
Second, registering the metadata identifier with Zeek's :zeek:see:`time` type
by calling :zeek:see:`EventMetadata::register` in a :zeek:see:`zeek_init` handler.
This instructs Zeek to convert metadata items in received remote events with
identifier ``10001000`` to a :zeek:see:`time` value.
For simplicity, the second step is done in the plugin's ``scripts/__init__.zeek`` file
that's loaded automatically when Zeek loads the plugin.
.. literalinclude:: event-metadata-plugin-src/scripts/__load__.zeek
:caption: main.zeek
:language: zeek
:linenos:
:tab-width: 4
The ``10001000`` represents the metadata identifier for serialization purposes. It
needs to be unique and have a defined meaning and consistent type for a given Zeek
deployment. Metadata identifiers below ``200`` are reserved for Zeek's internal use.
Users are free to choose any other value. Zeek will fail to start or fail to
register the type in the case of conflicting identifiers in third-party packages.
Implementing the Plugin
=======================
Next, we implement the ``InitPostScript()``, ``HookPublishEvent()`` and
``HookQueueEvent()`` methods in our plugin.
In the ``InitPostScript()`` method, a histogram instance is initialized using
Zeek's telemetry manager with hard-coded bounds. These define buckets for latency
monitoring.
The ``HookPublishEvent()`` method adds ``WALLCLOCK_TIMESTAMP`` metadata with
the current time to the event, while the ``HookQueueEvent()`` method extracts
the sender's timestamp and computes the latency based on its own local time.
Finally, the latency is recorded with the histogram by calling ``Observe()``.
.. literalinclude:: event-metadata-plugin-src/src/Plugin.cc
:caption: main.zeek
:language: zeek
:linenos:
:lines: 28-
:tab-width: 4
Resulting Prometheus Metrics
============================
Deploying the plugin outlined above in a cluster and querying the manager's
metrics endpoint presents the following result::
$ curl -s localhost:10001/metrics | grep '^zeek_cluster_event_latency'
zeek_cluster_event_latency_seconds_count{endpoint="manager"} 11281
zeek_cluster_event_latency_seconds_sum{endpoint="manager"} 7.960928916931152
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0002"} 37
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0004"} 583
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0005999999999999999"} 3858
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0008"} 7960
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.001"} 10185
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0012"} 10957
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0014"} 11239
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0016"} 11269
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0018"} 11279
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.002"} 11281
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="+Inf"} 11281
This example indicates that there were a total of 11281 latencies observed,
the summed up latency was around 8 seconds, 37 events had a latency less or equal
to 0.2 milliseconds, 583 with less or equal than 0.4 milliseconds and none
that took more than 2 milliseconds.
This sort of data is usually scraped and ingested by a `Prometheus server <https://prometheus.io/>`_ and
then visualized using `Grafana <https://grafana.com/>`_.

View file

@ -0,0 +1,46 @@
#!/bin/bash
#
# Copyright (c) 2020-2023 by the Zeek Project. See LICENSE for details.
#
# Tool to update autogenerated docs that require external files. Must be
# run manually and requires access to the Spicy TFTP analyzer.
set -e
if [ $# != 1 ]; then
echo "usage: $(basename "$0") <spicy-tftp-repo>"
exit 1
fi
TFTP=$1
if [ ! -d "${TFTP}"/analyzer ]; then
echo "${TFTP} does not seem to point to a spicy-tftp repository."
exit 1
fi
set -o errexit
set -o nounset
ZEEK="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)/../../.."
DOC="${ZEEK}/doc"
SPICY="${ZEEK}/auxil/spicy"
SPICYDOC="${ZEEK}/build/auxil/spicy/bin/spicy-doc"
AUTOGEN_FINAL="${ZEEK}/doc/devel/spicy/autogen"
if [ ! -x "${SPICYDOC}" ]; then
>&2 echo "Warning: Could not find spicy-doc in build directory, aborting"
exit 0
fi
"${SPICY}/doc/scripts/autogen-spicy-lib" functions zeek <"${ZEEK}/scripts/spicy/zeek.spicy" >"${AUTOGEN_FINAL}/zeek-functions.spicy" || exit 1
# Copy some static files over.
cp "${TFTP}"/scripts/main.zeek "${AUTOGEN_FINAL}"/tftp.zeek || exit 1
cp "${TFTP}"/analyzer/tftp.spicy "${AUTOGEN_FINAL}"/tftp.spicy || exit 1
cp "${TFTP}"/analyzer/tftp.evt "${AUTOGEN_FINAL}"/tftp.evt || exit 1
# Copy some files from the Zeek source tree so that zeek-docs remains standaline for CI.
cp "${ZEEK}/scripts/base/frameworks/spicy/init-bare.zeek" "${AUTOGEN_FINAL}/"
cp "${ZEEK}/scripts/base/frameworks/spicy/init-framework.zeek" "${AUTOGEN_FINAL}/"
cp "${ZEEK}/auxil/spicy/doc/scripts/spicy-pygments.py" "${DOC}/ext"

View file

@ -0,0 +1,38 @@
module Spicy;
export {
# doc-options-start
## Constant for testing if Spicy is available.
const available = T;
## Show output of Spicy print statements.
const enable_print = F &redef;
# Record and display profiling information, if compiled into analyzer.
const enable_profiling = F &redef;
## abort() instead of throwing HILTI exceptions.
const abort_on_exceptions = F &redef;
## Include backtraces when reporting unhandled exceptions.
const show_backtraces = F &redef;
## Maximum depth of recursive file analysis (Spicy analyzers only)
const max_file_depth: count = 5 &redef;
# doc-options-end
# doc-types-start
## Result type for :zeek:see:`Spicy::resource_usage`. The values reflect resource
## usage as reported by the Spicy runtime system.
type ResourceUsage: record {
user_time : interval; ##< user CPU time of the Zeek process
system_time :interval; ##< system CPU time of the Zeek process
memory_heap : count; ##< memory allocated on the heap by the Zeek process
num_fibers : count; ##< number of fibers currently in use
max_fibers: count; ##< maximum number of fibers ever in use
max_fiber_stack_size: count; ##< maximum fiber stack size ever in use
cached_fibers: count; ##< number of fibers currently cached
};
# doc-types-end
}

View file

@ -0,0 +1,85 @@
# doc-common-start
module Spicy;
export {
# doc-functions-start
## Enable a specific Spicy protocol analyzer if not already active. If this
## analyzer replaces an standard analyzer, that one will automatically be
## disabled.
##
## tag: analyzer to toggle
##
## Returns: true if the operation succeeded
global enable_protocol_analyzer: function(tag: Analyzer::Tag) : bool;
## Disable a specific Spicy protocol analyzer if not already inactive. If
## this analyzer replaces an standard analyzer, that one will automatically
## be re-enabled.
##
## tag: analyzer to toggle
##
## Returns: true if the operation succeeded
global disable_protocol_analyzer: function(tag: Analyzer::Tag) : bool;
## Enable a specific Spicy file analyzer if not already active. If this
## analyzer replaces an standard analyzer, that one will automatically be
## disabled.
##
## tag: analyzer to toggle
##
## Returns: true if the operation succeeded
global enable_file_analyzer: function(tag: Files::Tag) : bool;
## Disable a specific Spicy file analyzer if not already inactive. If
## this analyzer replaces an standard analyzer, that one will automatically
## be re-enabled.
##
## tag: analyzer to toggle
##
## Returns: true if the operation succeeded
global disable_file_analyzer: function(tag: Files::Tag) : bool;
## Returns current resource usage as reported by the Spicy runtime system.
global resource_usage: function() : ResourceUsage;
# doc-functions-end
}
# Marked with &is_used to suppress complaints when there aren't any
# Spicy file analyzers loaded, and hence this event can't be generated.
event spicy_analyzer_for_mime_type(a: Files::Tag, mt: string) &is_used
{
Files::register_for_mime_type(a, mt);
}
# Marked with &is_used to suppress complaints when there aren't any
# Spicy protocol analyzers loaded, and hence this event can't be generated.
event spicy_analyzer_for_port(a: Analyzer::Tag, p: port) &is_used
{
Analyzer::register_for_port(a, p);
}
function enable_protocol_analyzer(tag: Analyzer::Tag) : bool
{
return Spicy::__toggle_analyzer(tag, T);
}
function disable_protocol_analyzer(tag: Analyzer::Tag) : bool
{
return Spicy::__toggle_analyzer(tag, F);
}
function enable_file_analyzer(tag: Files::Tag) : bool
{
return Spicy::__toggle_analyzer(tag, T);
}
function disable_file_analyzer(tag: Files::Tag) : bool
{
return Spicy::__toggle_analyzer(tag, F);
}
function resource_usage() : ResourceUsage
{
return Spicy::__resource_usage();
}

View file

@ -0,0 +1,16 @@
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
#
# Note: When line numbers change in this file, update the documentation that pulls it in.
protocol analyzer spicy::TFTP over UDP:
parse with TFTP::Packet,
port 69/udp;
import TFTP;
on TFTP::Request if ( is_read ) -> event tftp::read_request($conn, $is_orig, self.filename, self.mode);
on TFTP::Request if ( ! is_read ) -> event tftp::write_request($conn, $is_orig, self.filename, self.mode);
on TFTP::Data -> event tftp::data($conn, $is_orig, self.num, self.data);
on TFTP::Acknowledgement -> event tftp::ack($conn, $is_orig, self.num);
on TFTP::Error -> event tftp::error($conn, $is_orig, self.code, self.msg);

View file

@ -0,0 +1,95 @@
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
#
# Trivial File Transfer Protocol
#
# Specs from https://tools.ietf.org/html/rfc1350
module TFTP;
import spicy;
# Common header for all messages:
#
# 2 bytes
# ---------------
# | TFTP Opcode |
# ---------------
public type Packet = unit {
# public top-level entry point for parsing
op: uint16 &convert=Opcode($$);
switch (self.op) {
Opcode::RRQ -> rrq: Request(True);
Opcode::WRQ -> wrq: Request(False);
Opcode::DATA -> data: Data;
Opcode::ACK -> ack: Acknowledgement;
Opcode::ERROR -> error: Error;
};
};
# TFTP supports five types of packets [...]:
#
# opcode operation
# 1 Read request (RRQ)
# 2 Write request (WRQ)
# 3 Data (DATA)
# 4 Acknowledgment (ACK)
# 5 Error (ERROR)
type Opcode = enum {
RRQ = 0x01,
WRQ = 0x02,
DATA = 0x03,
ACK = 0x04,
ERROR = 0x05,
};
# Figure 5-1: RRQ/WRQ packet
#
# 2 bytes string 1 byte string 1 byte
# ------------------------------------------------
# | Opcode | Filename | 0 | Mode | 0 |
# ------------------------------------------------
type Request = unit(is_read: bool) {
filename: bytes &until=b"\x00";
mode: bytes &until=b"\x00";
on %done {
spicy::accept_input();
}
};
# Figure 5-2: DATA packet
#
# 2 bytes 2 bytes n bytes
# ----------------------------------
# | Opcode | Block # | Data |
# ----------------------------------
type Data = unit {
num: uint16;
data: bytes &eod;
};
# Figure 5-3: ACK packet
#
# 2 bytes 2 bytes
# ---------------------
# | Opcode | Block # |
# ---------------------
type Acknowledgement = unit {
num: uint16;
};
# Figure 5-4: ERROR packet
#
# 2 bytes 2 bytes string 1 byte
# -----------------------------------------
# | Opcode | ErrorCode | ErrMsg | 0 |
# -----------------------------------------
type Error = unit {
code: uint16;
msg: bytes &until=b"\x00";
};

View file

@ -0,0 +1,162 @@
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
module TFTP;
export {
redef enum Log::ID += { LOG };
type Info: record {
## Timestamp for when the request happened.
ts: time &log;
## Unique ID for the connection.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## True for write requests, False for read request.
wrq: bool &log;
## File name of request.
fname: string &log;
## Mode of request.
mode: string &log;
## UID of data connection
uid_data: string &optional &log;
## Number of bytes sent.
size: count &default=0 &log;
## Highest block number sent.
block_sent: count &default=0 &log;
## Highest block number ackknowledged.
block_acked: count &default=0 &log;
## Any error code encountered.
error_code: count &optional &log;
## Any error message encountered.
error_msg: string &optional &log;
# Set to block number of final piece of data once received.
final_block: count &optional;
# Set to true once logged.
done: bool &default=F;
};
## Event that can be handled to access the TFTP logging record.
global log_tftp: event(rec: Info);
}
# Maps a partial data connection ID to the request's Info record.
global expected_data_conns: table[addr, port, addr] of Info;
redef record connection += {
tftp: Info &optional;
};
event zeek_init() &priority=5
{
Log::create_stream(TFTP::LOG, [$columns = Info, $ev = log_tftp, $path="tftp"]);
}
function log_pending(c: connection)
{
if ( ! c?$tftp || c$tftp$done )
return;
Log::write(TFTP::LOG, c$tftp);
c$tftp$done = T;
}
function init_request(c: connection, is_orig: bool, fname: string, mode: string, is_read: bool)
{
log_pending(c);
local info: Info;
info$ts = network_time();
info$uid = c$uid;
info$id = c$id;
info$fname = fname;
info$mode = mode;
info$wrq = (! is_read);
c$tftp = info;
# The data will come in from a different source port.
Analyzer::schedule_analyzer(c$id$resp_h, c$id$orig_h, c$id$orig_p, Analyzer::ANALYZER_SPICY_TFTP, 1min);
expected_data_conns[c$id$resp_h, c$id$orig_p, c$id$orig_h] = info;
}
event scheduled_analyzer_applied(c: connection, a: Analyzer::Tag) &priority=10
{
local id = c$id;
if ( [c$id$orig_h, c$id$resp_p, c$id$resp_h] in expected_data_conns )
{
c$tftp = expected_data_conns[c$id$orig_h, c$id$resp_p, c$id$resp_h];
c$tftp$uid_data = c$uid;
add c$service["spicy_tftp_data"];
}
}
event tftp::read_request(c: connection, is_orig: bool, fname: string, mode: string)
{
init_request(c, is_orig, fname, mode, T);
}
event tftp::write_request(c: connection, is_orig: bool, fname: string, mode: string)
{
init_request(c, is_orig, fname, mode, F);
}
event tftp::data(c: connection, is_orig: bool, block_num: count, data: string)
{
if ( ! c?$tftp || c$tftp$done )
return;
local info = c$tftp;
if ( block_num <= info$block_sent )
# Duplicate (or previous gap; we don't track that)
return;
info$size += |data|;
info$block_sent = block_num;
if ( |data| < 512 )
# Last block, per spec.
info$final_block = block_num;
}
event tftp::ack(c: connection, is_orig: bool, block_num: count)
{
if ( ! c?$tftp || c$tftp$done )
return;
local info = c$tftp;
info$block_acked = block_num;
if ( block_num <= info$block_acked )
# Duplicate (or previous gap, we don't track that)
return;
info$block_acked = block_num;
# If it's an ack for the last block, we're done.
if ( info?$final_block && info$final_block == block_num )
log_pending(c);
}
event tftp::error(c: connection, is_orig: bool, code: count, msg: string)
{
if ( ! c?$tftp || c$tftp$done )
return;
local info = c$tftp;
info$error_code = code;
info$error_msg = msg;
log_pending(c);
}
event connection_state_remove(c: connection)
{
if ( ! c?$tftp || c$tftp$done )
return;
log_pending(c);
}

View file

@ -0,0 +1,736 @@
.. _spicy_confirm_protocol:
.. rubric:: ``function zeek::confirm_protocol()``
[Deprecated] Triggers a DPD protocol confirmation for the current connection.
This function has been deprecated and will be removed. Use ``spicy::accept_input``
instead, which will have the same effect with Zeek.
.. _spicy_reject_protocol:
.. rubric:: ``function zeek::reject_protocol(reason: string)``
[Deprecated] Triggers a DPD protocol violation for the current connection.
This function has been deprecated and will be removed. Use ``spicy::decline_input``
instead, which will have the same effect with Zeek.
.. _spicy_weird:
.. rubric:: ``function zeek::weird(id: string, addl: string = "") : &cxxname="zeek::spicy::rt::weird";``
Reports a "weird" to Zeek. This should be used with similar semantics as in
Zeek: something quite unexpected happening at the protocol level, which however
does not prevent us from continuing to process the connection.
id: the name of the weird, which (just like in Zeek) should be a *static*
string identifying the situation reported (e.g., ``unexpected_command``).
addl: additional information to record along with the weird
.. _spicy_is_orig:
.. rubric:: ``function zeek::is_orig() : bool``
Returns true if we're currently parsing the originator side of a connection.
.. _spicy_uid:
.. rubric:: ``function zeek::uid() : string``
Returns the current connection's UID.
.. _spicy_conn_id:
.. rubric:: ``function zeek::conn_id() : tuple<orig_h: addr, orig_p: port, resp_h: addr, resp_p: port>``
Returns the current connection's 4-tuple ID to make IP address and port information available.
.. _spicy_flip_roles:
.. rubric:: ``function zeek::flip_roles()``
Instructs Zeek to flip the directionality of the current connection.
.. _spicy_number_packets:
.. rubric:: ``function zeek::number_packets() : uint64``
Returns the number of packets seen so far on the current side of the current connection.
.. _spicy_has_analyzer:
.. rubric:: ``function zeek::has_analyzer(analyzer: string, if_enabled: bool = True) : bool``
Checks if there is a Zeek analyzer of a given name.
analyzer: the Zeek-side name of the analyzer to check for
if_enabled: if true, only checks for analyzers that are enabled
Returns the type of the analyzer if it exists, or ``Undef`` if it does not.
.. _spicy_analyzer_type:
.. rubric:: ``function zeek::analyzer_type(analyzer: string, if_enabled: bool = True) : AnalyzerType``
Returns the type of a Zeek analyzer of a given name.
analyzer: the Zeek-side name of the analyzer to check
if_enabled: if true, only checks for analyzers that are enabled
Returns the type of the analyzer if it exists, or ``Undef`` if it does not.
.. _spicy_protocol_begin:
.. rubric:: ``function zeek::protocol_begin(analyzer: optional<string>, protocol: spicy::Protocol = spicy::Protocol::TCP)``
Adds a Zeek-side child protocol analyzer to the current connection.
If the same analyzer was added previously with `protocol_handle_get_or_create` or
`protocol_begin` with same argument, and not closed with `protocol_handle_close`
or `protocol_end`, no new analyzer will be added.
See `protocol_handle_get_or_create` for lifetime and error semantics.
analyzer: type of analyzer to instantiate, specified through its Zeek-side
name (similar to what Zeek's signature action `enable` takes)
protocol: the transport-layer protocol that the analyzer uses; only TCP is
currently supported here
Note: For backwards compatibility, the analyzer argument can be left unset to add
a DPD analyzer. This use is deprecated, though; use the single-argument version of
`protocol_begin` for that instead.
.. _spicy_protocol_begin_2:
.. rubric:: ``function zeek::protocol_begin(protocol: spicy::Protocol = spicy::Protocol::TCP)``
Adds a Zeek-side DPD child protocol analyzer performing dynamic protocol detection
on subsequently provided data.
If the same DPD analyzer was added previously with `protocol_handle_get_or_create` or
`protocol_begin` with same argument, and not closed with `protocol_handle_close`
or `protocol_end`, no new analyzer will be added.
See `protocol_handle_get_or_create` for lifetime and error semantics.
protocol: the transport-layer protocol on which to perform protocol detection;
only TCP is currently supported here
.. _spicy_protocol_handle_get_or_create:
.. rubric:: ``function zeek::protocol_handle_get_or_create(analyzer: string, protocol: spicy::Protocol = spicy::Protocol::TCP) : ProtocolHandle``
Gets a handle to a Zeek-side child protocol analyzer for the current connection.
If no such child exists yet it will be added; otherwise a handle to the
existing child protocol analyzer will be returned.
This function will return an error if:
- not called from a protocol analyzer, or
- the requested child protocol analyzer is of unknown type or not support by the requested transport protocol, or
- creation of a child analyzer of the requested type was prevented by a
previous call of `disable_analyzer` with `prevent=T`
By default, any newly created child protocol analyzer will remain alive
until Zeek expires the current connection's state. Alternatively, one
can call `protocol_handle_close` or `protocol_end` to delete the analyzer
earlier.
analyzer: type of analyzer to get or instantiate, specified through its Zeek-side
name (similar to what Zeek's signature action `enable` takes).
protocol: the transport-layer protocol that the analyser uses; only TCP is
currently supported here
.. _spicy_protocol_data_in:
.. rubric:: ``function zeek::protocol_data_in(is_orig: bool, data: bytes, protocol: spicy::Protocol = spicy::Protocol::TCP)``
Forwards protocol data to all previously instantiated Zeek-side child protocol analyzers of a given transport-layer.
is_orig: true to feed the data to the child's originator side, false for the responder
data: chunk of data to forward to child analyzer
protocol: the transport-layer protocol of the children to forward to; only TCP is currently supported here
.. _spicy_protocol_data_in_2:
.. rubric:: ``function zeek::protocol_data_in(is_orig: bool, data: bytes, h: ProtocolHandle)``
Forwards protocol data to a specific previously instantiated Zeek-side child analyzer.
is_orig: true to feed the data to the child's originator side, false for the responder
data: chunk of data to forward to child analyzer
h: handle to the child analyzer to forward data into
.. _spicy_protocol_gap:
.. rubric:: ``function zeek::protocol_gap(is_orig: bool, offset: uint64, len: uint64, h: optional<ProtocolHandle> = Null)``
Signals a gap in input data to all previously instantiated Zeek-side child protocol analyzers.
is_orig: true to signal gap to the child's originator side, false for the responder
offset: start offset of gap in input stream
len: size of gap
h: optional handle to the child analyzer signal a gap to, else signal to all child analyzers
.. _spicy_protocol_end:
.. rubric:: ``function zeek::protocol_end()``
Signals end-of-data to all previously instantiated Zeek-side child protocol
analyzers and removes them.
.. _spicy_protocol_handle_close:
.. rubric:: ``function zeek::protocol_handle_close(handle: ProtocolHandle)``
Signals end-of-data to the given child analyzer and removes it.
The given handle must be live, i.e., it must not have been used in a
previous protocol_handle_close call, and must not have been live when
protocol_end was called. If the handle is not live a runtime error will
be triggered.
handle: handle to the child analyzer to remove
.. _spicy_file_begin:
.. rubric:: ``function zeek::file_begin(mime_type: optional<string> = Null, fuid: optional<string> = Null) : string``
Signals the beginning of a file to Zeek's file analysis, associating it with the current connection.
Optionally, a mime type can be provided. It will be passed on to Zeek's file analysis framework.
Optionally, a file ID can be provided. It will be passed on to Zeek's file analysis framework.
Returns the Zeek-side file ID of the new file.
This function creates a new Zeek file analyzer that will remain alive until
either `file_end` gets called, or Zeek eventually expires the analyzer
through a timeout. (As Zeek does not tie a file analyzer's lifetime to any
connection, it may survive the termination of the current connection.)
.. _spicy_fuid:
.. rubric:: ``function zeek::fuid() : string``
Returns the current file's FUID.
.. _spicy_terminate_session:
.. rubric:: ``function zeek::terminate_session()``
Terminates the currently active Zeek-side session, flushing all state. Any
subsequent activity will start a new session from scratch. This can only be
called from inside a protocol analyzer.
.. _spicy_skip_input:
.. rubric:: ``function zeek::skip_input()``
Tells Zeek to skip sending any further input data to the current analyzer.
This is supported for protocol and file analyzers.
.. _spicy_file_set_size:
.. rubric:: ``function zeek::file_set_size(size: uint64, fid: optional<string> = Null)``
Signals the expected size of a file to Zeek's file analysis.
size: expected size of file
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
.. _spicy_file_data_in:
.. rubric:: ``function zeek::file_data_in(data: bytes, fid: optional<string> = Null)``
Passes file content on to Zeek's file analysis.
data: chunk of raw data to pass into analysis
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
.. _spicy_file_data_in_at_offset:
.. rubric:: ``function zeek::file_data_in_at_offset(data: bytes, offset: uint64, fid: optional<string> = Null)``
Passes file content at a specific offset on to Zeek's file analysis.
data: chunk of raw data to pass into analysis
offset: position in file where data starts
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
.. _spicy_file_gap:
.. rubric:: ``function zeek::file_gap(offset: uint64, len: uint64, fid: optional<string> = Null)``
Signals a gap in a file to Zeek's file analysis.
offset: position in file where gap starts
len: size of gap
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
.. _spicy_file_end:
.. rubric:: ``function zeek::file_end(fid: optional<string> = Null)``
Signals the end of a file to Zeek's file analysis.
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
.. _spicy_forward_packet:
.. rubric:: ``function zeek::forward_packet(identifier: uint32)``
Inside a packet analyzer, forwards what data remains after parsing the top-level unit
on to another analyzer. The index specifies the target, per the current dispatcher table.
.. _spicy_network_time:
.. rubric:: ``function zeek::network_time() : time``
Gets the network time from Zeek.
.. _spicy_get_address:
.. rubric:: ``function zeek::get_address(id: string) : addr``
Returns the value of a global Zeek script variable of Zeek type ``addr``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_bool:
.. rubric:: ``function zeek::get_bool(id: string) : bool``
Returns the value of a global Zeek script variable of Zeek type ``bool``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_count:
.. rubric:: ``function zeek::get_count(id: string) : uint64``
Returns the value of a global Zeek script variable of Zeek type ``count``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_double:
.. rubric:: ``function zeek::get_double(id: string) : real``
Returns the value of a global Zeek script variable of Zeek type ``double``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_enum:
.. rubric:: ``function zeek::get_enum(id: string) : string``
Returns the value of a global Zeek script variable of Zeek type ``enum``.
The value is returned as a string containing the enum's label name, without
any scope. Throws an exception if there's no such Zeek of that name, or if
it's not of the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_int:
.. rubric:: ``function zeek::get_int(id: string) : int64``
Returns the value of a global Zeek script variable of Zeek type ``int``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_interval:
.. rubric:: ``function zeek::get_interval(id: string) : interval``
Returns the value of a global Zeek script variable of Zeek type
``interval``. Throws an exception if there's no such Zeek of that name, or
if it's not of the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_port:
.. rubric:: ``function zeek::get_port(id: string) : port``
Returns the value of a global Zeek script variable of Zeek type ``port``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_record:
.. rubric:: ``function zeek::get_record(id: string) : ZeekRecord``
Returns the value of a global Zeek script variable of Zeek type ``record``.
The value is returned as an opaque handle to the record, which can be used
with the ``zeek::record_*()`` functions to access the record's fields.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_set:
.. rubric:: ``function zeek::get_set(id: string) : ZeekSet``
Returns the value of a global Zeek script variable of Zeek type ``set``. The
value is returned as an opaque handle to the set, which can be used with the
``zeek::set_*()`` functions to access the set's content. Throws an exception
if there's no such Zeek of that name, or if it's not of the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_string:
.. rubric:: ``function zeek::get_string(id: string) : bytes``
Returns the value of a global Zeek script variable of Zeek type ``string``.
The string's value is returned as a Spicy ``bytes`` value. Throws an
exception if there's no such Zeek of that name, or if it's not of the
expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_subnet:
.. rubric:: ``function zeek::get_subnet(id: string) : network``
Returns the value of a global Zeek script variable of Zeek type ``subnet``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_table:
.. rubric:: ``function zeek::get_table(id: string) : ZeekTable``
Returns the value of a global Zeek script variable of Zeek type ``table``.
The value is returned as an opaque handle to the set, which can be used with
the ``zeek::set_*()`` functions to access the set's content. Throws an
exception if there's no such Zeek of that name, or if it's not of the
expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_time:
.. rubric:: ``function zeek::get_time(id: string) : time``
Returns the value of a global Zeek script variable of Zeek type ``time``.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_vector:
.. rubric:: ``function zeek::get_vector(id: string) : ZeekVector``
Returns the value of a global Zeek script variable of Zeek type ``vector``.
The value is returned as an opaque handle to the vector, which can be used
with the ``zeek::vector_*()`` functions to access the vector's content.
Throws an exception if there's no such Zeek of that name, or if it's not of
the expected type.
id: fully-qualified name of the global Zeek variable to retrieve
.. _spicy_get_value:
.. rubric:: ``function zeek::get_value(id: string) : ZeekVal``
Returns an opaque handle to a global Zeek script variable. The handle can be
used with the ``zeek::as_*()`` functions to access the variable's value.
Throws an exception if there's no Zeek variable of that name.
.. _spicy_as_address:
.. rubric:: ``function zeek::as_address(v: ZeekVal) : addr``
Returns a Zeek ``addr`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_bool:
.. rubric:: ``function zeek::as_bool(v: ZeekVal) : bool``
Returns a Zeek ``bool`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_count:
.. rubric:: ``function zeek::as_count(v: ZeekVal) : uint64``
Returns a Zeek ``count`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_double:
.. rubric:: ``function zeek::as_double(v: ZeekVal) : real``
Returns a Zeek ``double`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_enum:
.. rubric:: ``function zeek::as_enum(v: ZeekVal) : string``
Returns a Zeek ``enum`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_int:
.. rubric:: ``function zeek::as_int(v: ZeekVal) : int64``
Returns a Zeek ``int`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_interval:
.. rubric:: ``function zeek::as_interval(v: ZeekVal) : interval``
Returns a Zeek ``interval`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_port:
.. rubric:: ``function zeek::as_port(v: ZeekVal) : port``
Returns a Zeek ``port`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_record:
.. rubric:: ``function zeek::as_record(v: ZeekVal) : ZeekRecord``
Returns a Zeek ``record`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_set:
.. rubric:: ``function zeek::as_set(v: ZeekVal) : ZeekSet``
Returns a Zeek ``set`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_string:
.. rubric:: ``function zeek::as_string(v: ZeekVal) : bytes``
Returns a Zeek ``string`` value refereced by an opaque handle. The string's
value is returned as a Spicy ``bytes`` value. Throws an exception if the
referenced value is not of the expected type.
.. _spicy_as_subnet:
.. rubric:: ``function zeek::as_subnet(v: ZeekVal) : network``
Returns a Zeek ``subnet`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_table:
.. rubric:: ``function zeek::as_table(v: ZeekVal) : ZeekTable``
Returns a Zeek ``table`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_time:
.. rubric:: ``function zeek::as_time(v: ZeekVal) : time``
Returns a Zeek ``time`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_as_vector:
.. rubric:: ``function zeek::as_vector(v: ZeekVal) : ZeekVector``
Returns a Zeek ``vector`` value refereced by an opaque handle. Throws an
exception if the referenced value is not of the expected type.
.. _spicy_set_contains:
.. rubric:: ``function zeek::set_contains(id: string, v: any) : bool``
Returns true if a Zeek set contains a given value. Throws an exception if
the given ID does not exist, or does not have the expected type.
id: fully-qualified name of the global Zeek set to check
v: value to check for, which must be of the Spicy-side equivalent of the set's key type
.. _spicy_set_contains_2:
.. rubric:: ``function zeek::set_contains(s: ZeekSet, v: any) : bool``
Returns true if a Zeek set contains a given value. Throws an exception if
the set does not have the expected type.
s: opaque handle to the Zeek set, as returned by other functions
v: value to check for, which must be of the Spicy-side equivalent of the set's key type
.. _spicy_table_contains:
.. rubric:: ``function zeek::table_contains(id: string, v: any) : bool``
Returns true if a Zeek table contains a given value. Throws an exception if
the given ID does not exist, or does not have the expected type.
id: fully-qualified name of the global Zeek table to check
v: value to check for, which must be of the Spicy-side equivalent of the table's key type
.. _spicy_table_contains_2:
.. rubric:: ``function zeek::table_contains(t: ZeekTable, v: any) : bool``
Returns true if a Zeek table contains a given value. Throws an exception if
the given ID does not exist, or does not have the expected type.
t: opaque handle to the Zeek table, as returned by other functions
v: value to check for, which must be of the Spicy-side equivalent of the table's key type
.. _spicy_table_lookup:
.. rubric:: ``function zeek::table_lookup(id: string, v: any) : optional<ZeekVal>``
Returns the value associated with a key in a Zeek table. Returns an error
result if the key does not exist in the table. Throws an exception if the
given table ID does not exist, or does not have the expected type.
id: fully-qualified name of the global Zeek table to check
v: value to lookup, which must be of the Spicy-side equivalent of the table's key type
.. _spicy_table_lookup_2:
.. rubric:: ``function zeek::table_lookup(t: ZeekTable, v: any) : optional<ZeekVal>``
Returns the value associated with a key in a Zeek table. Returns an error
result if the key does not exist in the table. Throws an exception if the
given table ID does not exist, or does not have the expected type.
t: opaque handle to the Zeek table, as returned by other functions
v: value to lookup, which must be of the Spicy-side equivalent of the table's key type
.. _spicy_record_has_value:
.. rubric:: ``function zeek::record_has_value(id: string, field: string) : bool``
Returns true if a Zeek record provides a value for a given field. This
includes fields with `&default` values. Throws an exception if the given ID
does not exist, or does not have the expected type.
id: fully-qualified name of the global Zeek record to check field: name of
the field to check
.. _spicy_record_has_value_2:
.. rubric:: ``function zeek::record_has_value(r: ZeekRecord, field: string) : bool``
Returns true if a Zeek record provides a value for a given field.
This includes fields with `&default` values.
r: opaque handle to the Zeek record, as returned by other functions
field: name of the field to check
.. _spicy_record_has_field:
.. rubric:: ``function zeek::record_has_field(id: string, field: string) : bool``
Returns true if the type of a Zeek record has a field of a given name.
Throws an exception if the given ID does not exist, or does not have the
expected type.
id: fully-qualified name of the global Zeek record to check
field: name of the field to check
.. _spicy_record_has_field_2:
.. rubric:: ``function zeek::record_has_field(r: ZeekRecord, field: string) : bool``
Returns true if the type of a Zeek record has a field of a given name.
r: opaque handle to the Zeek record, as returned by other functions
field: name of the field to check
.. _spicy_record_field:
.. rubric:: ``function zeek::record_field(id: string, field: string) : ZeekVal``
Returns a field's value from a Zeek record. Throws an exception if the given
ID does not exist, or does not have the expected type; or if there's no such
field in the record type, or if the field does not have a value.
id: fully-qualified name of the global Zeek record to check
field: name of the field to retrieve
.. _spicy_record_field_2:
.. rubric:: ``function zeek::record_field(r: ZeekRecord, field: string) : ZeekVal``
Returns a field's value from a Zeek record. Throws an exception if the given
record does not have such a field, or if the field does not have a value.
r: opaque handle to the Zeek record, as returned by other functions
field: name of the field to retrieve
.. _spicy_vector_index:
.. rubric:: ``function zeek::vector_index(id: string, index: uint64) : ZeekVal``
Returns the value of an index in a Zeek vector. Throws an exception if the
given ID does not exist, or does not have the expected type; or if the index
is out of bounds.
id: fully-qualified name of the global Zeek vector to check
index: index of the element to retrieve
.. _spicy_vector_index_2:
.. rubric:: ``function zeek::vector_index(v: ZeekVector, index: uint64) : ZeekVal``
Returns the value of an index in a Zeek vector. Throws an exception if the
index is out of bounds.
v: opaque handle to the Zeek vector, as returned by other functions
index: index of the element to retrieve
.. _spicy_vector_size:
.. rubric:: ``function zeek::vector_size(id: string) : uint64``
Returns the size of a Zeek vector. Throws an exception if the given ID does
not exist, or does not have the expected type.
id: fully-qualified name of the global Zeek vector to check
.. _spicy_vector_size_2:
.. rubric:: ``function zeek::vector_size(v: ZeekVector) : uint64``
Returns the size of a Zeek vector.
v: opaque handle to the Zeek vector, as returned by other functions

View file

@ -0,0 +1,5 @@
protocol analyzer spicy::MyHTTP over TCP:
parse originator with MyHTTP::RequestLine,
port 12345/tcp;
on MyHTTP::RequestLine -> event MyHTTP::request_line($conn, self.method, self.uri, self.version.number);

View file

@ -0,0 +1,26 @@
# @TEST-EXEC: echo "GET /index.html HTTP/1.0" | spicy-driver %INPUT >output
# @TEST-EXEC: btest-diff output
module MyHTTP;
const Token = /[^ \t\r\n]+/;
const WhiteSpace = /[ \t]+/;
const NewLine = /\r?\n/;
type Version = unit {
: /HTTP\//;
number: /[0-9]+\.[0-9]+/;
};
public type RequestLine = unit {
method: Token;
: WhiteSpace;
uri: Token;
: WhiteSpace;
version: Version;
: NewLine;
on %done {
print self.method, self.uri, self.version.number;
}
};

View file

@ -0,0 +1,4 @@
event MyHTTP::request_line(c: connection, method: string, uri: string, version: string)
{
print fmt("Zeek saw from %s: %s %s %s", c$id$orig_h, method, uri, version);
}

Binary file not shown.

View file

@ -0,0 +1,37 @@
function schedule_tftp_analyzer(id: conn_id)
{
# Schedule the TFTP analyzer for the expected next packet coming in on different
# ports. We know that it will be exchanged between same IPs and reuse the
# originator's port. "Spicy_TFTP" is the Zeek-side name of the TFTP analyzer
# (generated from "Spicy::TFTP" in tftp.evt).
Analyzer::schedule_analyzer(id$resp_h, id$orig_h, id$orig_p, Analyzer::ANALYZER_SPICY_TFTP, 1min);
}
event tftp::read_request(c: connection, is_orig: bool, filename: string, mode: string)
{
print "TFTP read request", c$id, filename, mode;
schedule_tftp_analyzer(c$id);
}
event tftp::write_request(c: connection, is_orig: bool, filename: string, mode: string)
{
print "TFTP write request", c$id, filename, mode;
schedule_tftp_analyzer(c$id);
}
# Add handlers for other packet types so that we see their events being generated.
event tftp::data(c: connection, is_orig: bool, block_num: count, data: string)
{
print "TFTP data", block_num, data;
}
event tftp::ack(c: connection, is_orig: bool, block_num: count)
{
print "TFTP ack", block_num;
}
event tftp::error(c: connection, is_orig: bool, code: count, msg: string)
{
print "TFTP error", code, msg;
}

View file

@ -0,0 +1,7 @@
protocol analyzer spicy::TFTP over UDP:
parse with TFTP::Packet,
port 69/udp;
import TFTP;
on TFTP::Request -> event tftp::request($conn, $is_orig, self.filename, self.mode);

View file

@ -0,0 +1,4 @@
event tftp::request(c: connection, is_orig: bool, filename: string, mode: string)
{
print "TFTP request", c$id, is_orig, filename, mode;
}

View file

@ -0,0 +1,7 @@
protocol analyzer spicy::TFTP over UDP:
parse with TFTP::Packet,
port 69/udp;
import TFTP;
on TFTP::Request -> event tftp::request($conn);

View file

@ -0,0 +1,4 @@
event tftp::request(c: connection)
{
print "TFTP request", c$id;
}

View file

@ -0,0 +1,9 @@
event tftp::read_request(c: connection, is_orig: bool, filename: string, mode: string)
{
print "TFTP read request", c$id, is_orig, filename, mode;
}
event tftp::write_request(c: connection, is_orig: bool, filename: string, mode: string)
{
print "TFTP write request", c$id, is_orig, filename, mode;
}

88
doc/devel/spicy/faq.rst Normal file
View file

@ -0,0 +1,88 @@
===
FAQ
===
.. _faq_zeek_install_spicy_and_plugin_to_use_parsers:
.. rubric:: Do I need to install Spicy and/or a Zeek plugin to use Spicy parsers in Zeek?
If you're using Zeek >= 5.0 with a default build configuration,
there's nothing else you need to install. After installing Zeek, the
same folder containing the ``zeek`` binary will also have the relevant
Spicy tools, such as ``spicyc`` (provided by Spicy) and ``spicyz``
(provided by Zeek). To double check that the Spicy support is indeed
available, look for ``Zeek::Spicy`` in the output of ``zeek -N``::
# zeek -N
<...>
Zeek::Spicy - Support for Spicy parsers (``*.spicy``, ``*.evt``, ``*.hlto``) (built-in)
Note that it remains possible to build Zeek against an external Spicy
installation, or even without any Spicy support at all. Look at Zeek's
``configure`` for corresponding options.
.. note::
For some historic background: Zeek 5.0 started bundling Spicy, as well
as the former Zeek plugin for Spicy, so that now nothing else needs to
be installed separately anymore to use Spicy parsers. Since Zeek 6.0,
the code for that former plugin has further moved into Zeek itself,
and is now maintained directly by the Zeek developers.
.. _faq_zeek_spicy_dpd_support:
.. rubric:: Does Spicy support *Dynamic Protocol Detection (DPD)*?
Yes, see the :ref:`corresponding section <spicy_dpd>` on how to add it
to your analyzers.
.. _faq_zeek_layer2_analyzer:
.. rubric:: Can I write a Layer 2 protocol analyzer with Spicy?
Yes, you can. In Zeek terminology a layer 2 protocol analyzer is a packet
analyzer, see the :ref:`corresponding section <spicy_packet_analyzer>` on how
to declare such an analyzer.
.. _faq_zeek_print_statements_no_effect:
.. rubric:: I have ``print`` statements in my Spicy grammar, why do I not see any output when running Zeek?
Zeek by default disables the output of Spicy-side ``print``
statements. To enable them, add ``Spicy::enable_print=T`` to the Zeek
command line (or ``redef Spicy::enable_print=T;`` to a Zeek script
that you are loading).
.. _faq_zeek_tcp_analyzer_not_all_messages_recognized:
.. rubric:: My analyzer recognizes only one or two TCP packets even though there are more in the input.
In Zeek, a Spicy analyzer parses the sending and receiving sides of a TCP
connection each according to the given Spicy grammar. This means that
if more than one message can be sent per side the grammar needs to
allow for that. For example, if the grammar parses messages of the
protocol as ``Message``, the top-level parsing unit given in the EVT
file needs to be able to parse a list of messages ``Message[]``.
One way to express this is to introduce a parser which wraps messages
of the protocol in an :spicylink:`anonymous field
<programming/parsing.html#anonymous-fields>`.
.. warning:: Since in general the number of messages exchanged over a TCP
connection is unbounded, an anonymous field should be used. If a named field
was used instead the parser would need to store all messages over the
connection which would lead to unbounded memory growth.
.. code-block:: spicy
type Message = unit {
# Fields for messages of the protocol.
};
# Parser used e.g., in EVT file.
public type Messages = unit {
: Message[];
};

View file

@ -0,0 +1,118 @@
===============
Getting Started
===============
Spicy's own :spicylink:`Getting Started <getting-started.html>` guide
uses the following Spicy code to parse a simple HTTP request line:
.. literalinclude:: examples/my-http.spicy
:lines: 4-
:caption: my-http.spicy
:language: spicy
While the Spicy documentation goes on to show :spicylink:`how to use
this to parse corresponding data from the command line
<getting-started.html#a-simple-parser>`, here we will instead leverage
the ``RequestLine`` parser to build a proof-of-concept protocol
analyzer for Zeek. While this all remains simplified here, the
following, more in-depth :ref:`spicy_tutorial` demonstrates how
to build a complete analyzer for a real protocol.
.. rubric:: Preparations
Because Zeek works from network packets, we first need a packet trace
with the payload we want to parse. We can't just use a normal HTTP
session as our simple parser wouldn't go further than just the first
line of the protocol exchange and then bail out with an error. So
instead, for our example we create a custom packet trace with a TCP
connection that carries just a single HTTP request line as its
payload::
# tcpdump -i lo0 -w request-line.pcap port 12345 &
# nc -l 12345 &
# echo "GET /index.html HTTP/1.0" | nc localhost 12345
# killall tcpdump nc
This gets us :download:`this trace file <examples/request-line.pcap>`.
.. _example_spicy_my_http_adding_analyzer:
.. rubric:: Adding a Protocol Analyzer
Now we can go ahead and add a new protocol analyzer to Zeek. We
already got the Spicy grammar to parse our connection's payload, it's
in ``my-http.spicy``. In order to use this with Zeek, we have two
additional things to do: (1) We need to let Zeek know about our new
protocol analyzer, including when to use it; and (2) we need to define
at least one Zeek event that we want our parser to generate, so that
we can then write a Zeek script working with the information that it
extracts.
We do both of these by creating an additional control file for Zeek:
.. literalinclude:: examples/my-http.evt
:caption: my-http.evt
:linenos:
:language: spicy-evt
The first block (lines 1-3) tells Zeek that we have a new protocol
analyzer to provide. The analyzer's Zeek-side name is
``spicy::MyHTTP``, and it's meant to run on top of TCP connections
(line 1). Lines 2-3 then provide Zeek with more specifics: The entry
point for originator-side payload is the ``MyHTTP::RequestLine`` unit
type that our Spicy grammar defines (line 2); and we want Zeek to
activate our analyzer for all connections with a responder port of
12345 (which, of course, matches the packet trace we created).
The second block (line 5) tells Zeek that we want to
define one event. On the left-hand side of that line we give the unit
that is to trigger the event. The right-hand side defines its name and
arguments. What we are saying here is that every time a ``RequestLine``
line has been fully parsed, we'd like a ``MyHTTP::request_line`` event
to go to Zeek. Each event instance will come with four parameters:
Three of them are the values of corresponding unit fields, accessed
just through normal Spicy expressions (inside an event argument
expression, ``self`` refers to the unit instance that has led to the
generation of the current event). The first parameter, ``$conn``, is a
"magic" keyword that passes the Zeek-side
connection ID (``conn_id``) to the event.
Now we got everything in place that we need for our new protocol
analyzer---except for a Zeek script actually doing something with the
information we are parsing. Let's use this:
.. literalinclude:: examples/my-http.zeek
:caption: my-http.zeek
:language: zeek
You see an Zeek event handler for the event that we just defined,
having the expected signature of four parameters matching the types of
the parameter expressions that the ``*.evt`` file specifies. The
handler's body then just prints out what it gets.
.. _example_spicy_my_http:
Finally we can put together our pieces by compiling the Spicy grammar and the
EVT file into an HLTO file with ``spicyz``, and by pointing Zeek at the produced
file and the analyzer-specific Zeek scripts::
# spicyz my-http.spicy my-http.evt -o my-http.hlto
# zeek -Cr request-line.pcap my-http.hlto my-http.zeek
Zeek saw from 127.0.0.1: GET /index.html 1.0
When Zeek starts up here the Spicy integration registers a protocol analyzer to
the entry point of our Spicy grammar as specified in the EVT file. It then
begins processing the packet trace as usual, now activating our new analyzer
whenever it sees a TCP connection on port 12345. Accordingly, the
``MyHTTP::request_line`` event gets generated once the parser gets to process
the session's payload. The Zeek event handler then executes and prints the
output we would expect.
.. note::
By default, Zeek suppresses any output from Spicy-side
``print`` statements. You can add ``Spicy::enable_print=T`` to the
command line to see it. In the example above, you would then get
an additional line of output: ``GET, /index.html, 1.0``.

73
doc/devel/spicy/index.rst Normal file
View file

@ -0,0 +1,73 @@
============================
Writing Analyzers with Spicy
============================
:spicylink:`Spicy <index.html>` is a parser generator that makes it
easy to create robust C++ parsers for network protocols, file formats,
and more. Zeek supports integrating Spicy analyzers so that one can
create Zeek protocol, packet and file analyzers. This section digs
into how that integration works. We begin with a short "Getting
Started" guide showing you the basics of using Spicy with Zeek,
followed by an in-depth tutorial on adding a complete protocol
analyzer to Zeek. The final part consists of a reference section
documenting everything the Spicy integration supports.
While this documentation walks through all the bits and pieces that an
analyzer consists of, there's an easy way to get started when writing
a new analyzer from scratch: the `Zeek package manager
<https://docs.zeek.org/projects/package-manager>`_ can create analyzer
scaffolding for you that includes an initial Spicy grammar
(``*.spicy``), Zeek integration glue code (``*.evt``; see below) and a
corresponding CMake build setup. To create that scaffolding, use the
package managers ``create`` command and pass one of
``--features=spicy-protocol-analyzer``,
``--features=spicy-packet-analyzer``, or
``--features=spicy-file-analyzer`` to create a Zeek protocol, packet,
or file analyzer, respectively. See :ref:`the tutorial
<zkg_create_package>` for more on this.
Note that Zeek itself installs the grammars of its builtin Spicy
analyzers for potential reuse. For example, the `Finger grammar
<https://github.com/zeek/zeek/blob/master/src/analyzer/protocol/finger/finger.spicy>`_
gets installed to ``<PREFIX>/share/spicy/finger/finger.spicy``. It can
be used in custom code by importing it with ``import Finger from
finger;``.
.. toctree::
:maxdepth: 2
:caption: Table of Contents
installation
getting-started
tutorial
reference
faq
.. note::
This documentation focuses on writing *external* Spicy analyzers
that you can load into Zeek at startup. Zeek also comes with the
infrastructure to build Spicy analyzers directly into the
executable itself, just like traditional built-in analyzers. We
will document this more as we're converting more of Zeek's built-in
analyzers over to Spicy. For now, we recommend locking at one of
the existing built-in Spicy analyzers (Syslog, Finger) as examples.
.. _spicy_terminology:
Terminology
===========
A word on terminology: In Zeek, the term "analyzer" generally refers
to a component that processes a particular protocol ("protocol
analyzer"), file format ("file analyzer"), or low-level packet
structure ("packet analyzer"). "Processing" here means more than just
parsing content: An analyzer controls when it wants to be used (e.g.,
with connections on specific ports, or with files of a specific MIME
type); what events to generate for Zeek's scripting layer; and how to
handle any errors occurring during parsing. While Spicy itself focuses
just on the parsing part, Spicy makes it possible to provide the
remaining pieces to Zeek, turning a Spicy parser into a full Zeek
analyzer. That's what we refer to as a "Spicy (protocol/file/packet)
analyzer" for Zeek.

View file

@ -0,0 +1,18 @@
.. _spicy_installation:
Installation
============
Since Zeek version 5.0, support for Spicy is built right into Zeek by
default. To confirm that Spicy is indeed available, you can inspect
the output of ``zeek -N``::
# zeek -N Zeek::Spicy
Zeek::Spicy - Support for Spicy parsers (*.hlto) (built-in)
It remains possible to build Zeek against an external Spicy
installation through Zeek's ``configure`` option
``--with-spicy=PATH``, where ``PATH`` points to the Spicy installation
directory. In that case, you also need to ensure that the Spicy tools
(e.g., ``spicyc``, ``spicy-config``) are available in ``PATH``.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,441 @@
.. _spicy_tutorial:
Tutorial
========
This tutorial walks through the integration of a simple TFTP analyzer
into Zeek. This discussion continues the example from
:spicylink:`Spicy's own tutorial <tutorial/index.html>` that develops
the TFTP grammar, now focusing on how to use it with Zeek. Please go
through that Spicy tutorial first before continuing here.
To turn a Spicy-side grammar into a Zeek analyzer, we need to provide
Zeek with a description of how to employ it. There are two parts to
that: Telling Zeek when to activate the analyzer, and defining events
to generate. In addition, we will need a Zeek-side script to do
something with our new TFTP events. We will walk through this in the
following, starting with the mechanics of compiling the Spicy analyzer
for Zeek. While we will build up the files involved individually
first, see the :ref:`final section <zkg_create_package>` for how the
Zeek package manager, *zkg*, can be used to bootstrap a new Zeek
package with a skeleton of everything needed for an analyzer.
Before proceeding, make sure that your Zeek comes with Spicy support
built-in---which is the default since Zeek version 5.0::
# zeek -N Zeek::Spicy
Zeek::Spicy - Support for Spicy parsers (*.hlto) (built-in)
You should also have ``spicyz`` in your ``PATH``::
# which spicyz
/usr/local/zeek/bin/spicyz
.. note::
There are a number of pieces involved in creating a full Zeek
analyzer, in particular if you want to distribute it as a Zeek
package. To help you get started with that, Zeek's package manager
can create a skeleton Spicy package by running::
# zkg create --features=spicy-protocol-analyzer --packagedir <packagedir>
The generated files mark places that will need manual editing with
``TODO``. See the :ref:`tutorial <zkg_create_package>` for more on
this.
Compiling the Analyzer
----------------------
Zeek comes with a tool :ref:`spicyz <spicyz>` that compiles Spicy
analyzers into binary code that Zeek can load through a Spicy plugin.
The following command line produces a binary object file ``tftp.hlto``
containing the executable analyzer code:
.. code::
# spicyz -o tftp.hlto tftp.spicy
Below, we will prepare an additional interface definition file
``tftp.evt`` that describes the analyzer's integration into Zeek. We
will need to give that to ``spicyz`` as well, and our full
compilation command hence becomes:
.. code::
# spicyz -o tftp.hlto tftp.spicy tftp.evt
When starting Zeek, we add ``tftp.hlto`` to its command line:
.. code::
# zeek -r tftp_rrq.pcap tftp.hlto
Activating the Analyzer
-----------------------
In *Getting Started*, :ref:`we already saw
<example_spicy_my_http_adding_analyzer>` how to inform Zeek about a new
protocol analyzer. We follow the same scheme here and put the
following into ``tftp.evt``, the analyzer definition file:
.. literalinclude:: autogen/tftp.evt
:lines: 5-7
:language: spicy-evt
The first line provides our analyzer with a Zeek-side name
(``spicy::TFTP``) and also tells Zeek that we are adding an
application analyzer on top of UDP (``over UDP``). ``TFTP::Packet``
provides the top-level entry point for parsing both sides of a TFTP
connection. Furthermore, we want Zeek to automatically activate our
analyzer for all sessions on UDP port 69 (i.e., TFTP's well known
port). See :ref:`spicy_evt_analyzer_setup` for more details on defining
such a ``protocol analyzer`` section.
.. note::
We use the ``port`` attribute in the ``protocol analyzer`` section
mainly for convenience; it's not the only way to define the
well-known ports. For a production analyzer, it's more idiomatic
to use the a Zeek script instead; see :ref:`this note
<zeek_init_instead_of_port>` for more information.
With this in place, we can already employ the analyzer inside Zeek. It
will not generate any events yet, but we can at least see the output of
the ``on %done { print self; }`` hook that still remains part of the
grammar from earlier:
.. code::
# zeek -r tftp_rrq.pcap tftp.hlto Spicy::enable_print=T
[$opcode=Opcode::RRQ, $rrq=[$filename=b"rfc1350.txt", $mode=b"octet"], $wrq=(not set), $data=(not set), $ack=(not set), $error=(not set)]
As by default, the Zeek plugin does not show the output of Spicy-side
``print`` statements, we added ``Spicy::enable_print=T`` to the
command line to turn that on. We see that Zeek took care of the
lower network layers, extracted the UDP payload from the Read Request,
and passed that into our Spicy parser. (If you want to view more about
the internals of what is happening here, there are a couple kinds of
:ref:`debug output available <spicy_debugging>`.)
You might be wondering why there is only one line of output, even
though there are multiple TFTP packets in our pcap trace. Shouldn't
the ``print`` execute multiple times? Yes, it should, but it does not
currently: Due to some intricacies of the TFTP protocol, our analyzer
gets to see only the first packet for now. We will fix this later. For
now, we focus on the Read Request packet that the output above shows.
Defining Events
---------------
The core task of any Zeek analyzer is to generate events for Zeek
scripts to process. For binary protocols, events will often correspond
pretty directly to data units specified by their specifications---and
TFTP is no exception. We start with an event for Read/Write Requests
by adding this definition to ``tftp.evt``:
.. literalinclude:: examples/tftp-single-request.evt
:lines: 5-7
:language: spicy-evt
The first line makes our Spicy TFTP grammar available to the rest of
the file. The line ``on ...`` defines one event: Every time a
``Request`` unit will be parsed, we want to receive an event
``tftp::request`` with one parameter: the connection it belongs to.
Here, ``$conn`` is a reserved identifier that will turn into the
standard `connection record
<https://docs.zeek.org/en/current/scripts/base/init-bare.zeek.html#type-connection>`_
record on the Zeek side.
Now we need a Zeek event handler for our new event. Let's put this
into ``tftp.zeek``:
.. literalinclude:: examples/tftp-single-request.zeek
:language: zeek
Running Zeek then gives us:
.. code::
# spicyz -o tftp.hlto tftp.spicy tftp.evt
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
TFTP request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp]
Let's extend the event signature a bit by passing further arguments:
.. literalinclude:: examples/tftp-single-request-more-args.evt
:lines: 5-7
:language: spicy-evt
This shows how each parameter gets specified as a Spicy expression:
``self`` refers to the instance currently being parsed (``self``), and
``self.filename`` retrieves the value of its ``filename`` field.
``$is_orig`` is another reserved ID that turns into a boolean that
will be true if the event has been triggered by originator-side
traffic. On the Zeek side, our event now has the following signature:
.. literalinclude:: examples/tftp-single-request-more-args.zeek
:language: zeek
.. code::
# spicyz -o tftp.hlto tftp.spicy tftp.evt
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
TFTP request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], T, rfc1350.txt, octet
Going back to our earlier discussion of Read vs Write Requests, we do
not yet make that distinction with the ``request`` event that we are
sending to Zeek-land. However, since we had introduced the ``is_read``
unit parameter, we can easily separate the two by gating event
generation through an additional ``if`` condition:
.. literalinclude:: autogen/tftp.evt
:lines: 11-12
:language: spicy-evt
This now defines two separate events, each being generated only for
the corresponding value of ``is_read``. Let's try it with a new
``tftp.zeek``:
.. literalinclude:: examples/tftp-two-requests.zeek
:language: zeek
.. code::
# spicyz -o tftp.hlto tftp.spicy tftp.evt
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
TFTP read request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], T, rfc1350.txt, octet
If we look at the :file:`conn.log` that Zeek produces during this run, we
will see that the ``service`` field is not filled in yet. That's
because our analyzer does not yet confirm to Zeek that it has been
successful in parsing the content. To do that, we can call a library
function that Spicy makes available once we have successfully parsed a
request: :spicylink:`spicy::accept_input
<programming/library.html#spicy-accept-input>`. That function signals
the host application---i.e., Zeek in our case—--that the parser is
processing the expected protocol.
First, we need to make sure the Spicy standard library is imported
in ``tftp.spicy``, so that we will have its functions available:
.. code::
import spicy;
With that, our request looks like this now:
.. code-block::
type Request = unit(is_read: bool) {
filename: bytes &until=b"\x00";
mode: bytes &until=b"\x00";
on %done { spicy::accept_input(); }
};
Let's try it again:
.. code::
# spicyz -o tftp.hlto tftp.spicy tftp.evt
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
TFTP read request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], T, rfc1350.txt, octet
# cat conn.log
[...]
1367411051.972852 C1f7uj4uuv6zu2aKti 192.168.0.253 50618 192.168.0.10 69 udp spicy_tftp - - - S0 - -0 D 1 48 0 0 -
[...]
Now the service field says TFTP! (There will be a 2nd connection in
the log that we are not showing here; see the next section on that).
Turning to the other TFTP packet types, it is straight-forward to add
events for them as well. The following is our complete ``tftp.evt``
file:
.. literalinclude:: autogen/tftp.evt
:lines: 5-
:language: spicy-evt
Detour: Zeek vs. TFTP
---------------------
We noticed above that Zeek seems to be seeing only a single TFTP
packet from our input trace, even though ``tcpdump`` shows that the
pcap file contains multiple different types of packets. The reason
becomes clear once we look more closely at the UDP ports that are in
use:
.. code::
# tcpdump -ttnr tftp_rrq.pcap
1367411051.972852 IP 192.168.0.253.50618 > 192.168.0.10.69: 20 RRQ "rfc1350.txtoctet" [tftp]
1367411052.077243 IP 192.168.0.10.3445 > 192.168.0.253.50618: UDP, length 516
1367411052.081790 IP 192.168.0.253.50618 > 192.168.0.10.3445: UDP, length 4
1367411052.086300 IP 192.168.0.10.3445 > 192.168.0.253.50618: UDP, length 516
1367411052.088961 IP 192.168.0.253.50618 > 192.168.0.10.3445: UDP, length 4
1367411052.088995 IP 192.168.0.10.3445 > 192.168.0.253.50618: UDP, length 516
[...]
Turns out that only the first packet is using the well-known TFTP port
69/udp, whereas all the subsequent packets use ephemeral ports. Due to
the port difference, Zeek believes it is seeing two independent
network connections, and it does not associate TFTP with the second
one at all due to its lack of the well-known port (neither does
``tcpdump``!). Zeek's connection log confirms this by showing two
separate entries:
.. code::
# cat conn.log
1367411051.972852 CH3xFz3U1nYI1Dp1Dk 192.168.0.253 50618 192.168.0.10 69 udp spicy_tftp - - - S0 - - 0 D 1 48 0 0 -
1367411052.077243 CfwsLw2TaTIeo3gE9g 192.168.0.10 3445 192.168.0.253 50618 udp - 0.181558 24795 196 SF - - 0 Dd 49 26167 49 1568 -
Switching the ports for subsequent packets is a quirk in TFTP that
resembles similar behaviour in standard FTP, where data connections
get set up separately as well. Fortunately, Zeek provides a built-in
function to designate a specific analyzer for an anticipated future
connection. We can call that function when we see the initial request:
.. literalinclude:: examples/tftp-schedule-analyzer.zeek
:language: zeek
.. code::
# spicyz -o tftp.hlto tftp.spicy tftp.evt
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
TFTP read request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], rfc1350.txt, octet
TFTP data, 1, \x0a\x0a\x0a\x0a\x0a\x0aNetwork Working Group [...]
TFTP ack, 1
TFTP data, 2, B Official Protocol\x0a Standards" for the [...]
TFTP ack, 2
TFTP data, 3, protocol was originally designed by Noel Chia [...]
TFTP ack, 3
TFTP data, 4, r mechanism was suggested by\x0a PARC's EFT [...]
TFTP ack, 4
[...]
Now we are seeing all the packets as we would expect.
Zeek Script
-----------
Analyzers normally come along with a Zeek-side script that implements
a set of standard base functionality, such as recording activity into
a protocol specific log file. These scripts provide handlers for the
analyzers' events, and collect and correlate their activity as
desired. We have created such :download:`a script for TFTP
<autogen/tftp.zeek>`, based on the events that our Spicy analyzer
generates. Once we add that to the Zeek command line, we will see a
new :file:`tftp.log`:
.. code::
# spicyz -o tftp.hlto tftp.spicy tftp.evt
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
# cat tftp.log
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p wrq fname mode uid_data size block_sent block_acked error_code error_msg
1367411051.972852 CKWH8L3AIekSHYzBU 192.168.0.253 50618 192.168.0.10 69 F rfc1350.txt octet ClAr3P158Ei77Fql8h 24599 49 49 - -
The TFTP script also labels the second session as TFTP data by
adding a corresponding entry to the ``service`` field inside the
Zeek-side connection record. With that, we are now seeing this in
:file:`conn.log`:
.. code::
1367411051.972852 ChbSfq3QWKuNirt9Uh 192.168.0.253 50618 192.168.0.10 69 udp spicy_tftp - - - S0 - -0 D 1 48 0 0 -
1367411052.077243 CowFQj20FHHduhHSYk 192.168.0.10 3445 192.168.0.253 50618 udp spicy_tftp_data 0.181558 24795 196 SF -- 0 Dd 49 26167 49 1568 -
The TFTP script ends up being a bit more complex than one would expect
for such a simple protocol. That's because it tracks the two related
connections (initial request and follow-up traffic on a different
port), and combines them into a single TFTP transaction for logging.
Since there is nothing Spicy-specific in that Zeek script, we skip
discussing it here in more detail.
.. _zkg_create_package:
Creating a Zeek Package
-----------------------
We have now assembled all the parts needed for providing a new
analyzer to Zeek. By adding a few further pieces, we can wrap that
analyzer into a full *Zeek package* for others to install easily
through *zkg*. To help create that wrapping, *zkg* provides a template
for instantiating a skeleton analyzer package as a starting point. The
skeleton comes in three different flavors, depending on which kind of
analyzer you want to create: protocol, file, or packet analyzer.
In each case, it creates all the necessary files along with the
appropriate directory layout, and even includes a couple of
standard test cases.
To create the scaffolding for our TFTP analyzer, execute the following
command and provide the requested information::
# zkg create --features spicy-protocol-analyzer --packagedir spicy-tftp
"package-template" requires a "name" value (the name of the package, e.g. "FooBar" or "spicy-http"):
name: spicy-tftp
"package-template" requires a "analyzer" value (name of the Spicy analyzer, which typically corresponds to the protocol/format being parsed (e.g. "HTTP", "PNG")):
analyzer: TFTP
"package-template" requires a "protocol" value (transport protocol for the analyzer to use: TCP or UDP):
protocol: UDP
"package-template" requires a "unit_orig" value (name of the top-level Spicy parsing unit for the originator side of the connection (e.g. "Request")):
unit_orig: Packet
"package-template" requires a "unit_resp" value (name of the top-level Spicy parsing unit for the responder side of the connection (e.g. "Reply"); may be the same as originator side):
unit_resp: Packet
The above creates the following files (skipping anything related to
``.git``)::
spicy-tftp/CMakeLists.txt
spicy-tftp/COPYING
spicy-tftp/README
spicy-tftp/analyzer/CMakeLists.txt
spicy-tftp/analyzer/tftp.evt
spicy-tftp/analyzer/tftp.spicy
spicy-tftp/cmake/FindSpicyPlugin.cmake
spicy-tftp/scripts/__load__.zeek
spicy-tftp/scripts/dpd.sig
spicy-tftp/scripts/main.zeek
spicy-tftp/testing/Baseline/tests.run-pcap/conn.log
spicy-tftp/testing/Baseline/tests.run-pcap/output
spicy-tftp/testing/Baseline/tests.standalone/
spicy-tftp/testing/Baseline/tests.standalone/output
spicy-tftp/testing/Baseline/tests.trace/output
spicy-tftp/testing/Baseline/tests.trace/tftp.log
spicy-tftp/testing/Files/random.seed
spicy-tftp/testing/Makefile
spicy-tftp/testing/Scripts/README
spicy-tftp/testing/Scripts/diff-remove-timestamps
spicy-tftp/testing/Scripts/get-zeek-env
spicy-tftp/testing/Traces/tcp-port-12345.pcap
spicy-tftp/testing/Traces/udp-port-12345.pcap
spicy-tftp/testing/btest.cfg
spicy-tftp/testing/tests/availability.zeek
spicy-tftp/testing/tests/standalone.spicy
spicy-tftp/testing/tests/trace.zeek
spicy-tftp/zkg.meta
Note the ``*.evt``, ``*.spicy``, ``*.zeek`` files: they correspond to
the files we created for TFTP in the preceding sections; we can just
move our versions in there. Furthermore, the generated scaffolding
marks places with ``TODO`` that need manual editing: use ``git grep
TODO`` inside the ``spicy-tftp`` directory to find them. We won't go
through all the specific customizations for TFTP here, but for
reference you can find the full TFTP package as created from the *zkg*
template on `GitHub <https://github.com/zeek/spicy-tftp>`_.
If instead of a protocol analyzer, you'd like to create a file or
packet analyzer, run zkg with ``--features spicy-file-analyzer`` or
``--features spicy-packet-analyzer``, respectively. The generated
skeleton will be suitably adjusted then.

317
doc/devel/websocket-api.rst Normal file
View file

@ -0,0 +1,317 @@
.. _websocket-api:
.. _websocat: https://github.com/vi/websocat
======================================
Interacting with Zeek using WebSockets
======================================
Introduction
============
Usually, Zeek produces protocol logs consumed by external applications. These
external applications might be SIEMs, real-time streaming analysis platforms
or basic archival processes compressing logs for long term storage.
Certain use-cases require interacting and influencing Zeek's runtime behavior
outside of static configuration via ``local.zeek``.
The classic :ref:`framework-input` and :ref:`framework-configuration` can be
leveraged for runtime configuration of Zeek as well as triggering arbitrary
events or script execution via option handlers. These frameworks are mostly
file- or process-based and may feel a bit unusual in environments where creation
of files is uncommon or even impossible due to separation of concerns. In many
of today's environments, interacting using HTTP-based APIs or other remote
interfaces is more common.
.. note::
As an aside, if you need more flexibility than the WebSocket API offers today,
an alternative could be to use :ref:`javascript` within Zeek. This opens the
possibility to run a separate HTTP or a totally different Node.js based server
within a Zeek process for quick experimentation and evaluation of other
approaches.
Background and Setup
====================
Since Zeek 5.0, Zeek allows connections from external clients over WebSocket.
This allows these clients to interact with Zeek's publish-subscribe layer and
exchange Zeek events with other Zeek nodes.
Initially, this implementation resided in the Broker subsystem.
With Zeek 8.0, most of the implementation has been moved into core Zeek
itself with the v1 serialization format remaining in Broker.
WebSocket clients may subscribe to a fixed set of topics and will receive
Zeek events matching these topics that Zeek cluster nodes, but also other
WebSocket clients, publish.
With Zeek 8.0, Zeekctl has received support to interact with Zeek cluster nodes
using the WebSocket protocol. If you're running a Zeekctl based cluster and
want to experiment with WebSocket functionality, add ``UseWebSocket = 1`` to
your ``zeekctl.cfg``:
.. code-block:: ini
# zeekctl.cfg
...
UseWebSocket = 1
This will essentially add the following snippet, enabling a WebSocket server
on the Zeek manager:
.. code-block:: zeek
:caption: websocket.zeek
event zeek_init()
{
if ( Cluster::local_node_type() == Cluster::MANAGER )
{
Cluster::listen_websocket([
$listen_addr=127.0.0.1,
$listen_port=27759/tcp,
]);
}
}
To verify that the WebSocket API is functional in your deployment use, for example,
`websocat`_ as a quick check.
.. code-block:: shell
$ echo '[]' | websocat ws://127.0.0.1:27759/v1/messages/json
{"type":"ack","endpoint":"3eece35d-9f94-568d-861c-6a16c433e090-websocket-2","version":"8.0.0-dev.684"}
Zeek's ``cluster.log`` file will also have an entry for the WebSocket client connection.
The empty array in the command specifies the client's subscriptions, in this case none.
Version 1
=========
The currently implemented protocol is accessible at ``/v1/messages/json``.
The `data representation <https://docs.zeek.org/projects/broker/en/current/web-socket.html#data-representation>`_
is documented in detail within the Broker project. Note that this format is a
direct translation of Broker's binary format into JSON, resulting in a fairly
tight coupling between WebSocket clients and the corresponding Zeek scripts.
Most prominently is the representation of record values as vectors instead
of objects, making the protocol sensitive against reordering or introduction
of optional fields to records.
.. note::
We're looking into an iteration of the format. If you have feedback or
would like to contribute, please reach out on the usual community channels.
Handshake and Acknowledgement
-----------------------------
The first message after a WebSocket connection has been established originates
from the client. This message is a JSON array of strings that represent the
topics the WebSocket client wishes to subscribe to.
Zeek replies with an acknowledgement message that's a JSON object or an error.
Events
------
After the acknowledgement, WebSocket clients receive all events arriving on
topics they have subscribed to.
.. code-block:: shell
$ websocat ws://127.0.0.1:27759/v1/messages/json
["zeek.test"]
{"type":"ack","endpoint":"d955d990-ad8a-5ed4-8bc5-bee252d4a2e6-websocket-0","version":"8.0.0-dev.684"}
{"type":"data-message","topic":"zeek.test","@data-type":"vector","data":[{"@data-type":"count","data":1},{"@data-type":"count","data":1},{"@data-type":"vector","data":[{"@data-type":"string","data":"hello"},{"@data-type":"vector","data":[{"@data-type":"count","data":3}]},{"@data-type":"vector","data":[]}]}]}
The received messages, again, are encoded in Broker's JSON format. Above ``data-message``
represents an event received on topic ``zeek.test``. The event's name is ``hello``.
This event has a single argument of type :zeek:type:`count`. In the example above
its value is ``3``.
To send events, WebSocket clients similarly encode their event representation
to Broker's JSON format and send them as `text data frames <https://datatracker.ietf.org/doc/html/rfc6455#section-5.6>`_.
X-Application-Name Header
-------------------------
When a WebSocket client includes an ``X-Application-Name`` HTTP header in
the initial WebSocket Handshake's GET request, that header's value is available
in the :zeek:see:`Cluster::websocket_client_added` event's ``endpoint`` argument (see :zeek:see:`Cluster::EndpointInfo`).
The header's value will also be included in ``cluster.log`` messages.
Additionally, if the cluster telemetry for WebSocket clients is set to
:zeek:see:`Cluster::Telemetry::VERBOSE` or :zeek:see:`Cluster::Telemetry::DEBUG`
via :zeek:see:`Cluster::Telemetry::websocket_metrics`, the header's value is
included as ``app`` label in metrics exposed by the :ref:`framework-telemetry`.
As of Zeek 8.0, a WebSocket client will be rejected if the header is set, but
its value doesn't match ``[-/_.=:*@a-zA-Z0-9]+``.
Language Bindings
-----------------
Note that it's possible to use any language that offers WebSocket bindings.
The ones listed below mostly add a bit of convenience features around the
initial Handshake message, error handling and serializing Zeek events and
values into the Broker-specific serialization format.
For example, using the Node.js `builtin WebSocket functionality <https://nodejs.org/en/learn/getting-started/websocket>`_,
the ``websocat`` example from above can be reproduced as follows:
.. code-block:: javascript
:caption: client.js
// client.js
const socket = new WebSocket('ws://192.168.122.107:27759/v1/messages/json');
socket.addEventListener('open', event => {
socket.send('["zeek.test"]');
});
socket.addEventListener('message', event => {
console.log('Message from server: ', event.data);
});
.. code-block:: shell
$ node ./client.js
Message from server: {"type":"ack","endpoint":"2e951b0c-3ca4-504c-ae8a-5d3750fec588-websocket-10","version":"8.0.0-dev.684"}
Message from server: {"type":"data-message","topic":"zeek.test","@data-type":"vector","data":[{"@data-type":"count","data":1},{"@data-type":"count","data":1},{"@data-type":"vector","data":[{"@data-type":"string","data":"hello"},{"@data-type":"vector","data":[{"@data-type":"count","data":374}]},{"@data-type":"vector","data":[]}]}]}
Golang
^^^^^^
* `Zeek Broker websocket interface library for Golang <https://github.com/corelight/go-zeek-broker-ws>`_ (not an official Zeek project)
Rust
^^^^
* `Rust types for interacting with Zeek over WebSocket <https://github.com/bbannier/zeek-websocket-rs>`_ (not an official Zeek project)
Python
^^^^^^
There are no ready to use Python libraries available, but the third-party
`websockets <https://github.com/python-websockets/websockets>`_ package
allows to get started quickly.
You may take inspiration from `zeek-client's implementation <https://github.com/zeek/zeek-client>`_
or the `small helper library <https://raw.githubusercontent.com/zeek/zeek/refs/heads/master/testing/btest/Files/ws/wstest.py>`_ used by various of Zeek's own tests for the
WebSocket API.
Zeekctl similarly ships a `light implementation <https://github.com/zeek/zeekctl/blob/93459b37c3deab4bec9e886211672024fa3e4759/ZeekControl/events.py#L159>`_
using the ``websockets`` library to implement its ``netstats`` and ``print`` commands.
Outgoing Connections
====================
For some deployment scenarios, Zeek only offering a WebSocket server can be cumbersome.
Concretely, when multiple independent Zeek clusters interact with
a single instance of a remote API. For instance, this could be needed for
configuring a central firewall.
In such scenarios, it is more natural for Zeek to connect out to the
remote API, rather than the remote API connecting to the Zeek cluster.
For these use-cases, the current suggestion is to run a WebSocket bridge between
a Zeek cluster and the remote API. One concrete tool that can be used
for this purpose is `websocat`_.
.. note::
This topic has previously been discussed elsewhere. The following
`GitHub issue <https://github.com/zeek/zeek/issues/3597>`_ and
`discussion <https://github.com/zeek/zeek/discussions/4768>`_
provide more background and details.
Example Architecture
--------------------
.. figure:: ../images/websocket-api/one-api-many-zeek.svg
:width: 300
Multiple Zeek instances and a single remote API
The following proposal decouples the components using a WebSocket
bridge for every Zeek cluster. This ensures that the depicted remote API
does not need knowledge about an arbitrary number of Zeek clusters.
.. figure:: ../images/websocket-api/one-api-many-zeek-ws-bridge.svg
:width: 300
Multiple Zeek instances and a single remote API with WebSocket bridges.
Example Implementation
----------------------
Assuming the depicted remote API provides a WebSocket server as well,
it is possible to use ``websocat`` as the bridge directly.
The crux for the remote API is that upon a new WebSocket client connection,
the first message is the topic array that the remote API wishes to subscribe
to on a Zeek cluster.
Putting these pieces together, the following JavaScript script presents the
remote API, implemented using the `ws library <https://github.com/websockets/ws?tab=readme-ov-file>`_.
It accepts WebSocket clients on port 8080 and sends the topic array as the first message
containing just ``zeek.bridge.test``. Thereafter, it simply echos all incoming
WebSocket messages.
.. literalinclude:: websocket-api/server.js
:caption: server.js
:language: javascript
The Zeek side starts a WebSocket server on port 8000 and regularly publishes
a ``hello`` event to the ``zeek.bridge.test`` topic.
.. literalinclude:: websocket-api/server.zeek
:caption: server.zeek
:language: zeek
These two servers can now be connected by running ``websocat`` as follows:
.. code-block:: shell
# In terminal 1 (use node if your Zeek has no JavaScript support)
$ zeek server.js
# In terminal 2
$ zeek server.zeek
# In terminal 3
$ while true; do websocat --text -H='X-Application-Name: client1' ws://localhost:8000/v1/messages/json ws://localhost:8080 || sleep 0.1 ; done
The first few lines of output in terminal 1 should then look as follows:
.. code-block:: shell
# zeek server.js
client1: connected, sending topics array ["zeek.bridge.test"]
client1: received: {"type":"ack","endpoint":"9089e06b-8d33-5585-ad79-4f7f6348754e-websocket-135","version":"8.1.0-dev.91"}
client1: received: {"type":"data-message","topic":"zeek.bridge.test","@data-type":"vector","data":[{"@data-type":"count","data":1},{"@data-type":"count","data":1},{"@data-type":"vector","data":[{"@data-type":"string","data":"hello"},{"@data-type":"vector","data":[{"@data-type":"count","data":1792}]},{"@data-type":"vector","data":[]}]}]}
...
If you require synchronization between the Zeek instance and the remote API, this
is best achieved with events once the connection between the remote API and the
Zeek cluster is established.
Alternative Approaches
----------------------
Since v21, Node.js contains a built-in `WebSocket client <https://nodejs.org/en/learn/getting-started/websocket>`_,
making it possible to use vanilla :ref:`javascript` within
Zeek to establish outgoing WebSocket connections, too.
The ``websocat`` tool provides more flexibility, potentially allowing
to forward WebSocket messages to external commands which in turn could
use HTTP POST requests to an external API.

View file

@ -0,0 +1,23 @@
// server.js
import WebSocket, { WebSocketServer } from 'ws';
const wss = new WebSocketServer({ port: 8080 });
wss.on('connection', (ws, req) => {
ws.on('error', console.error);
ws.on('close', () => { console.log('%s: gone', ws.zeek.app); });
ws.on('message', function message(data) {
console.log('%s: received: %s', ws.zeek.app, data);
});
let topics = ['zeek.bridge.test'];
let app = req.headers['x-application-name'] || '<unknown application>'
ws.zeek = {
app: app,
topics: topics,
};
console.log(`${app}: connected, sending topics array ${JSON.stringify(topics)}`);
ws.send(JSON.stringify(topics));
});

View file

@ -0,0 +1,15 @@
global hello: event(c : count);
global c = 0;
event tick()
{
Cluster::publish("zeek.bridge.test", hello, ++c);
schedule 1.0sec { tick() };
}
event zeek_init()
{
Cluster::listen_websocket([$listen_addr=127.0.0.1, $listen_port=8000/tcp]);
event tick();
}

41
doc/ext/literal-emph.py Normal file
View file

@ -0,0 +1,41 @@
import re
import sphinx
from docutils import nodes
# This extension adds a 'literal-emph' directive that operates the same
# as the 'code-block' directive except that it additionally understands
# the **strong emphasis** markup, allowing custom rendering of it to be
# substituted in the final literal block (e.g. HTML adds <strong> elements).
# Adding " (no-emph)" to the end of a line within the 'literal-emph' content
# disables substitutions for that line.
class LiteralEmphNode(nodes.General, nodes.Element):
pass
class LiteralEmph(sphinx.directives.code.CodeBlock):
def run(self):
node = LiteralEmphNode()
node += super().run()
return [node]
def visit_litemph_node(self, node):
pass
def depart_litemph_node(self, node):
text = self.body[-1]
text = re.sub(r"\*\*(.*?)\*\*(?!.* \(no-emph\)\n)", r"<strong>\1</strong>", text)
text = re.sub(r"(.*) \(no-emph\)\n", r"\1\n", text)
self.body[-1] = text
def setup(app):
app.add_directive("literal-emph", LiteralEmph)
app.add_node(LiteralEmphNode, html=(visit_litemph_node, depart_litemph_node))
return {
"parallel_read_safe": True,
}

391
doc/ext/spicy-pygments.py Normal file
View file

@ -0,0 +1,391 @@
# Copyright (c) 2020-now by the Zeek Project. See LICENSE for details.
from pygments.lexer import RegexLexer, bygroups, include, words
from pygments.token import (
Comment,
Keyword,
Name,
Number,
Operator,
Punctuation,
String,
Text,
)
from sphinx.highlighting import lexers
def setup(app):
lexers["spicy"] = SpicyLexer()
lexers["spicy-evt"] = SpicyEvtLexer()
return {
"parallel_read_safe": True,
"parallel_write_safe": True,
}
class SpicyLexer(RegexLexer):
"""
For `Spicy <https://github.com/zeek/spicy>`_ grammars.
"""
name = "Spicy"
aliases = ["spicy"]
filenames = ["*.spicy"]
_hex = r"[0-9a-fA-F]"
_float = r"((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?"
_h = r"[A-Za-z0-9][-A-Za-z0-9]*"
_id = r"[a-zA-Z_][a-zA-Z_0-9]*"
tokens = {
"root": [
include("whitespace"),
include("comments"),
include("directives"),
include("attributes"),
include("hooks"),
include("properties"),
include("types"),
include("modules"),
include("keywords"),
include("literals"),
include("operators"),
include("punctuation"),
include("function-call"),
include("identifiers"),
],
"whitespace": [
(r"\n", Text),
(r"\s+", Text),
(r"\\\n", Text),
],
"comments": [
(r"#.*$", Comment),
],
"directives": [(r"(@(if|else|endif))\b", Comment.Preproc)],
"attributes": [
(
words(
(
"bit-order",
"byte-order",
"chunked",
"convert",
"count",
"cxxname",
"default",
"eod",
"internal",
"ipv4",
"ipv6",
"length",
"max-size",
"no-emit",
"nosub",
"on-heap",
"optional",
"originator",
"parse-at",
"parse-from",
"priority",
"requires",
"responder",
"size",
"static",
"synchronize",
"transient",
"try",
"type",
"until",
"until-including",
"while",
"have_prototype",
),
prefix=r"&",
suffix=r"\b",
),
Keyword.Pseudo,
),
],
"hooks": [
(
rf"(on)(\s+)(({_id}::)+%?{_id}(\.{_id})*)",
bygroups(Keyword, Text, Name.Function),
),
(rf"(on)(\s+)(%?{_id}(\.{_id})*)", bygroups(Keyword, Text, Name.Function)),
],
"properties": [
# Like an ID, but allow hyphenation ('-')
(r"%[a-zA-Z_][a-zA-Z_0-9-]*", Name.Attribute),
],
"types": [
(
words(
(
"any",
"addr",
"bitfield",
"bool",
"bytes",
"__library_type",
"iterator",
"const_iterator",
"int8",
"int16",
"int32",
"int64",
"uint8",
"uint16",
"uint32",
"uint64",
"enum",
"interval",
"interval_ns",
"list",
"map",
"optional",
"port",
"real",
"regexp",
"set",
"sink",
"stream",
"view",
"string",
"time",
"time_ns",
"tuple",
"unit",
"vector",
"void",
"function",
"struct",
),
prefix=r"\b",
suffix=r"\b",
),
Keyword.Type,
),
(
rf"\b(type)(\s+)((?:{_id})(?:::(?:{_id}))*)\b",
bygroups(Keyword, Text, Name.Class),
),
],
"modules": [
(
rf"\b(import)(\s+)({_id})(\s+)(from)(\s+)(\S+)\b",
bygroups(
Keyword.Namespace,
Text,
Name.Namespace,
Text,
Keyword.Namespace,
Text,
Name.Namespace,
),
),
(
rf"\b(module|import)(\s+)({_id})\b",
bygroups(Keyword.Namespace, Text, Name.Namespace),
),
],
"keywords": [
(
words(
("global", "const", "local", "var", "public", "private", "inout"),
prefix=r"\b",
suffix=r"\b",
),
Keyword.Declaration,
),
(
words(
(
"print",
"add",
"delete",
"stop",
"unset",
"assert",
"assert-exception",
"new",
"cast",
"begin",
"end",
"type",
"attribute",
"on",
"priority",
"if",
"else",
"switch",
"case",
"default",
"try",
"catch",
"break",
"return",
"continue",
"while",
"for",
"foreach",
"module",
"import",
"export",
"from",
),
prefix=r"\b",
suffix=r"\b",
),
Keyword,
),
],
"literals": [
(r'b?"', String, "string"),
# Not the greatest match for patterns, but generally helps
# disambiguate between start of a pattern and just a division
# operator.
(r"/(?=.*/)", String.Regex, "regex"),
(r"\b(True|False|None|Null)\b", Keyword.Constant),
# Port
(r"\b\d{1,5}/(udp|tcp)\b", Number),
# IPv4 Address
(
r"\b(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\b",
Number,
),
# IPv6 Address (not 100% correct: that takes more effort)
(
r"\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2}))?\]",
Number,
),
# Numeric
(rf"\b0[xX]{_hex}+\b", Number.Hex),
(rf"\b{_float}\b", Number.Float),
(r"\b(\d+)\b", Number.Integer),
],
"operators": [
(r"[$][$]", Name.Builtin.Pseudo), # just-parsed-element
(r"[$]\d+", Name.Builtin.Pseudo), # capture-group
(r"\b(in)\b", Operator.Word),
(r"[-+*=&|<>.]{2}", Operator),
(r"[-+*/=!><]=", Operator),
(r"[?][.]", Operator),
(r"[.][?]", Operator),
(r"[-][>]", Operator),
(r"[!][<>]", Operator),
(r"[!%*/+<=>~|&^-]", Operator),
# Technically, colons are often used for punctuation/sepration.
# E.g. field name/type separation.
(r"[?:]", Operator),
],
"punctuation": [
(r"[{}()\[\],;:.]", Punctuation),
],
"function-call": [
(rf"\b((?:{_id})(?:::(?:{_id}))*)(?=\s*\()", Name.Function),
],
"identifiers": [
(r"\b(self)\b", Name.Builtin.Pseudo),
(r"([a-zA-Z_]\w*)(::)", bygroups(Name, Punctuation)),
(r"[a-zA-Z_]\w*", Name),
],
"string": [
(r"\\.", String.Escape),
(r"%-?[0-9]*(\.[0-9]+)?[DTdxsefg]", String.Escape),
(r'"', String, "#pop"),
(r".", String),
],
"regex": [
(r"\\.", String.Escape),
(r"/", String.Regex, "#pop"),
(r".", String.Regex),
],
}
class SpicyEvtLexer(RegexLexer):
"""
For `Spicy <https://github.com/zeek/spicy>`_ Zeek interface definitions.
"""
name = "SpicyEvt"
aliases = ["spicy-evt"]
filenames = ["*.evt"]
_id = r"[a-zA-Z_][a-zA-Z_0-9]*"
tokens = {
"root": [
include("whitespace"),
include("comments"),
include("directives"),
include("hooks"),
include("modules"),
include("keywords"),
include("literals"),
include("operators"),
include("punctuation"),
include("function-call"),
include("identifiers"),
],
"whitespace": SpicyLexer.tokens["whitespace"],
"comments": SpicyLexer.tokens["comments"],
"directives": SpicyLexer.tokens["directives"],
"hooks": SpicyLexer.tokens["hooks"],
"modules": SpicyLexer.tokens["modules"],
"keywords": [
(
rf"\b(analyzer|with|replaces)(\s+)({_id}(::{_id})*)",
bygroups(Keyword, Text, Name.Class),
),
(
words(("protocol", "packet", "file"), prefix=r"\b", suffix=r"\b"),
Keyword.Type,
),
(
words(
("port", "event", "parse", "over", "mime-type"),
prefix=r"\b",
suffix=r"\b",
),
Keyword,
),
(words(("cast"), prefix=r"\b", suffix=r"\b"), Keyword),
(
words(
(
"if",
"else",
"switch",
"case",
"default",
"try",
"catch",
"break",
"return",
"continue",
"while",
"for",
"foreach",
),
prefix=r"\b",
suffix=r"\b",
),
Keyword,
),
],
"literals": SpicyLexer.tokens["literals"],
"operators": SpicyLexer.tokens["operators"],
"punctuation": SpicyLexer.tokens["punctuation"],
"function-call": SpicyLexer.tokens["function-call"],
"identifiers": [
(r"\b(ZEEK_VERSION)\b", Name.Builtin),
(r"\b(self)\b", Name.Builtin.Pseudo),
(r"[$](conn|file|is_orig)", Name.Builtin.Pseudo),
(r"([a-zA-Z_]\w*)(::)", bygroups(Name, Punctuation)),
(r"[a-zA-Z_]\w*", Name),
],
"string": SpicyLexer.tokens["string"],
"regex": SpicyLexer.tokens["regex"],
}

597
doc/ext/zeek.py Normal file
View file

@ -0,0 +1,597 @@
"""
The Zeek domain for Sphinx.
"""
import collections
def setup(Sphinx):
Sphinx.add_domain(ZeekDomain)
Sphinx.add_node(see)
Sphinx.add_directive_to_domain("zeek", "see", SeeDirective)
Sphinx.connect("object-description-transform", object_description_transform)
Sphinx.connect("doctree-resolved", process_see_nodes)
return {
"parallel_read_safe": True,
}
from sphinx import addnodes, version_info
from sphinx.directives import ObjectDescription
from sphinx.domains import Domain, Index, ObjType
from sphinx.locale import _
from sphinx.roles import XRefRole
from sphinx.util import docfields, logging
from sphinx.util.nodes import make_refnode
logger = logging.getLogger(__name__)
from docutils import nodes
from docutils.parsers.rst import Directive, directives
class see(nodes.General, nodes.Element):
refs = []
class SeeDirective(Directive):
has_content = True
def run(self):
n = see("")
n.refs = " ".join(self.content).split()
return [n]
# Wrapper for creating a tuple for index nodes, staying backwards
# compatible to Sphinx < 1.4:
def make_index_tuple(indextype, indexentry, targetname, targetname2):
if version_info >= (1, 4, 0, "", 0):
return (indextype, indexentry, targetname, targetname2, None)
else:
return (indextype, indexentry, targetname, targetname2)
def object_description_transform(app, domain, objtype, contentnode):
"""
Add all collected record fields as a "Field" field to a ZeekType.
"""
if domain != "zeek" or objtype != "type":
return
type_name = app.env.ref_context["zeek:type"]
record_fields = app.env.domaindata["zeek"].get("fields", {}).get(type_name)
if not record_fields:
return
field_list = contentnode[0]
name = nodes.field_name("", _("Fields"))
body = nodes.field_body("")
for field_name, record_field in record_fields.items():
body += record_field["idx"]
body += record_field["signode"]
field_list.append(nodes.field("", name, body))
def process_see_nodes(app, doctree, fromdocname):
for node in doctree.traverse(see):
content = []
para = nodes.paragraph()
para += nodes.Text("See also:", "See also:")
for name in node.refs:
join_str = " "
if name != node.refs[0]:
join_str = ", "
link_txt = join_str + name
if name not in app.env.domaindata["zeek"]["idtypes"]:
# Just create the text and issue warning
logger.warning(
'%s: unknown target for ".. zeek:see:: %s"',
fromdocname,
name,
location=node,
)
para += nodes.Text(link_txt, link_txt)
else:
# Create a reference
typ = app.env.domaindata["zeek"]["idtypes"][name]
todocname = app.env.domaindata["zeek"]["objects"][(typ, name)]
newnode = nodes.reference("", "")
innernode = nodes.literal(_(name), _(name), classes=["xref"])
newnode["refdocname"] = todocname
newnode["refuri"] = app.builder.get_relative_uri(fromdocname, todocname)
newnode["refuri"] += "#" + typ + "-" + name
newnode.append(innernode)
para += nodes.Text(join_str, join_str)
para += newnode
content.append(para)
node.replace_self(content)
class ZeekGeneric(ObjectDescription):
option_spec = {"source-code": directives.unchanged}
def __init__(self, *args, **kwargs):
super(ObjectDescription, self).__init__(*args, **kwargs)
options = args[2]
self.code_url = None
if "source-code" in options and "zeek-code-url" in self.env.config:
base_url = self.env.config["zeek-code-url"]
path, start, end = options["source-code"].split()
path_parts = path.split("/")
file_name = path_parts[-1]
# Don't have anything to link to for BIFs
if not file_name.endswith(".bif.zeek"):
self.code_url = f"{base_url}/scripts/{path}#L{start}-L{end}"
def get_obj_name(self):
return self.objtype
def update_type_map(self, idname):
if "idtypes" not in self.env.domaindata["zeek"]:
self.env.domaindata["zeek"]["idtypes"] = {}
self.env.domaindata["zeek"]["idtypes"][idname] = self.get_obj_name()
def process_signode(self, name, sig, signode, targetname):
signode["names"].append(targetname)
signode["ids"].append(targetname)
signode["first"] = not self.names
self.state.document.note_explicit_target(signode)
def add_target_and_index(self, name, sig, signode):
targetname = self.get_obj_name() + "-" + name
if targetname not in self.state.document.ids:
self.process_signode(name, sig, signode, targetname)
objects = self.env.domaindata["zeek"]["objects"]
key = (self.get_obj_name(), name)
if (
key in objects
and self.get_obj_name() != "id"
and self.get_obj_name() != "type"
and self.get_obj_name() != "field"
):
logger.warning(
"%s: duplicate description of %s %s, other instance in %s %s",
self.env.docname,
self.get_obj_name(),
name,
self.env.doc2path(objects[key]),
self.lineno,
)
objects[key] = self.env.docname
self.update_type_map(name)
indextext = self.get_index_text(name)
if indextext:
self.indexnode["entries"].append(
make_index_tuple("single", indextext, targetname, targetname)
)
def get_index_text(self, name):
return _("%s (%s)") % (name, self.get_obj_name())
def handle_signature(self, sig, signode):
if self.code_url:
signode += nodes.reference(
sig, sig, refuri=self.code_url, reftitle="View Source Code"
)
# Could embed snippets directly, but would probably want to clean
# up how it's done: don't use an external script, figure out why
# tab/indentation is broken, toggle snippet visibility on mouse
# hover or other explicit button/link, fix the colors/theming...
# But for now, leaving this commented out as an example and quick
# way of checking that the code ranges that Zeekygen outputs are
# sensible.
# import urllib
# snippet_target = urllib.parse.quote(self.code_url, '')
# snippet_url = 'https://emgithub.com/embed.js'
# snippet_url += f'?target={snippet_target}'
# snippet_url += '&style=github'
# snippet_url += '&showLineNumbers=on'
# snippet_url += '&showBorder=on'
# snippet_url += '&ts=4'
# rawnode = nodes.raw('', f'<script src="{snippet_url}"></script>',
# format='html')
# signode += rawnode
else:
signode += addnodes.desc_name("", sig)
return sig
class ZeekNamespace(ZeekGeneric):
def add_target_and_index(self, name, sig, signode):
targetname = self.get_obj_name() + "-" + name
if targetname not in self.state.document.ids:
signode["names"].append(targetname)
signode["ids"].append(targetname)
signode["first"] = not self.names
self.state.document.note_explicit_target(signode)
objects = self.env.domaindata["zeek"]["objects"]
key = (self.get_obj_name(), name)
objects[key] = self.env.docname
self.update_type_map(name)
indextext = self.get_index_text(name)
self.indexnode["entries"].append(
make_index_tuple("single", indextext, targetname, targetname)
)
self.indexnode["entries"].append(
make_index_tuple("single", f"namespaces; {sig}", targetname, targetname)
)
def get_index_text(self, name):
return _("%s (namespace); %s") % (name, self.env.docname)
def handle_signature(self, sig, signode):
signode += addnodes.desc_name("", sig)
return sig
class ZeekEnum(ZeekGeneric):
def add_target_and_index(self, name, sig, signode):
targetname = self.get_obj_name() + "-" + name
if targetname not in self.state.document.ids:
self.process_signode(name, sig, signode, targetname)
objects = self.env.domaindata["zeek"]["objects"]
key = (self.get_obj_name(), name)
objects[key] = self.env.docname
self.update_type_map(name)
# indextext = self.get_index_text(name)
# self.indexnode['entries'].append(make_index_tuple('single', indextext,
# targetname, targetname))
m = sig.split()
if len(m) < 2:
logger.warning(
"%s: zeek:enum directive missing argument(s)", self.env.docname
)
return
if m[1] == "Notice::Type":
if "notices" not in self.env.domaindata["zeek"]:
self.env.domaindata["zeek"]["notices"] = []
self.env.domaindata["zeek"]["notices"].append(
(m[0], self.env.docname, targetname)
)
self.indexnode["entries"].append(
make_index_tuple(
"single", f"{m[1]} (enum values); {m[0]}", targetname, targetname
)
)
def handle_signature(self, sig, signode):
m = sig.split()
name = m[0]
signode += addnodes.desc_name("", name)
return name
class ZeekParamField(docfields.GroupedField):
has_arg = True
is_typed = True
class ZeekIdentifier(ZeekGeneric):
zeek_param_field = ZeekParamField("param", label="Parameters", can_collapse=True)
field_type_map = {"param": (zeek_param_field, False)}
def get_index_text(self, name):
return name
def get_field_type_map(self):
return self.field_type_map
class ZeekNative(ZeekGeneric):
def handle_signature(self, sig, signode):
# The run() method is overridden to drop signode anyway in favor of
# simply adding the index and a target nodes and leaving up
# to the .rst document to explicitly add things that need to
# be presented in the final rendering (e.g. a section header)
self.native_name = sig
return sig
def process_signode(self, name, sig, signode, targetname):
pass
def run(self):
ns = super().run()
index_node = ns[0]
target_id = self.get_obj_name() + "-" + self.native_name
target_node = nodes.target("", "", ids=[target_id])
self.state.document.note_explicit_target(target_node)
# Replace the description node from Sphinx with a simple target node
return [index_node, target_node]
class ZeekKeyword(ZeekNative):
def get_index_text(self, name):
if name and name[0] == "@":
return _("%s (directive)") % (name)
else:
return _("%s (keyword)") % (name)
class ZeekAttribute(ZeekNative):
def get_index_text(self, name):
return _("%s (attribute)") % (name)
class ZeekType(ZeekGeneric):
"""
Put the type that's currently documented into env.ref_context
for usage with the ZeekField directive.
"""
def before_content(self):
self.env.ref_context["zeek:type"] = self.arguments[0]
def after_content(self):
self.env.ref_context.pop("zeek:type", None)
class ZeekField(ZeekGeneric):
def handle_signature(self, sig, signode):
"""
The signature for .. zeek:field: currently looks like the following:
.. zeek:field:: ts :zeek:type:`time` :zeek:attr:`&log` :zeek:attr:`&optional`
"""
parts = sig.split(" ", 2)
name, type_str = parts[0:2]
record_type = self.env.ref_context["zeek:type"]
fullname = "$".join([record_type, name])
attrs_str = ""
if len(parts) == 3:
attrs_str = parts[2]
type_nodes, _ = self.state.inline_text(type_str, -1)
signode += addnodes.desc_name(name, name)
signode += addnodes.desc_sig_punctuation("", ":")
signode += addnodes.desc_sig_space()
signode += type_nodes
if attrs_str:
attr_nodes, _ = self.state.inline_text(attrs_str, -1)
signode += addnodes.desc_sig_space()
signode += attr_nodes
signode["class"] = record_type
signode["fullname"] = fullname
return fullname
def run(self):
idx, signode = super().run()
record_type = self.env.ref_context["zeek:type"]
fields = self.env.domaindata["zeek"].setdefault("fields", {})
rfields = fields.setdefault(record_type, collections.OrderedDict())
rfields[signode[0]["fullname"]] = {
"idx": idx,
"signode": signode,
}
return []
class ZeekNativeType(ZeekNative):
def get_obj_name(self):
# As opposed to using 'native-type', just imitate 'type'.
return "type"
class ZeekFieldXRefRole(XRefRole):
def process_link(self, env, refnode, has_explicit_title, title, target):
title, target = super().process_link(
env, refnode, has_explicit_title, title, target
)
parts = title.split("$")
if len(parts) == 2 and parts[0] and parts[1]:
# If a field is in Type$field, form, strip Type.
title = parts[1]
return title, target
class ZeekNotices(Index):
"""
Index subclass to provide the Zeek notices index.
"""
name = "noticeindex"
localname = _("Zeek Notice Index")
shortname = _("notices")
def generate(self, docnames=None):
content = {}
if "notices" not in self.domain.env.domaindata["zeek"]:
return content, False
for n in self.domain.env.domaindata["zeek"]["notices"]:
modname = n[0].split("::")[0]
entries = content.setdefault(modname, [])
entries.append([n[0], 0, n[1], n[2], "", "", ""])
content = sorted(content.items())
return content, False
class ZeekDomain(Domain):
"""Zeek domain."""
name = "zeek"
label = "Zeek"
object_types = {
"type": ObjType(_("type"), "type"),
"native-type": ObjType(_("type"), "type"),
"namespace": ObjType(_("namespace"), "namespace"),
"id": ObjType(_("id"), "id"),
"keyword": ObjType(_("keyword"), "keyword"),
"enum": ObjType(_("enum"), "enum"),
"attr": ObjType(_("attr"), "attr"),
"field": ObjType(_("field"), "field"),
}
directives = {
"type": ZeekType,
"native-type": ZeekNativeType,
"namespace": ZeekNamespace,
"id": ZeekIdentifier,
"keyword": ZeekKeyword,
"enum": ZeekEnum,
"attr": ZeekAttribute,
"field": ZeekField,
}
roles = {
"type": XRefRole(),
"namespace": XRefRole(),
"id": XRefRole(),
"keyword": XRefRole(),
"enum": XRefRole(),
"attr": XRefRole(),
"see": XRefRole(),
"field": ZeekFieldXRefRole(),
}
indices = [
ZeekNotices,
]
initial_data = {
"objects": {}, # fullname -> docname, objtype
}
def clear_doc(self, docname):
to_delete = []
for (typ, name), doc in self.data["objects"].items():
if doc == docname:
to_delete.append((typ, name))
for typ, name in to_delete:
del self.data["objects"][typ, name]
def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode):
objects = self.data["objects"]
if typ == "see":
if target not in self.data["idtypes"]:
logger.warning(
'%s: unknown target for ":zeek:see:`%s`"', fromdocname, target
)
return []
objtype = self.data["idtypes"][target]
return make_refnode(
builder,
fromdocname,
objects[objtype, target],
objtype + "-" + target,
contnode,
target + " " + objtype,
)
elif typ == "field" and "$" not in target:
# :zeek:field:`x` without a record type ends up just x, no ref.
return []
else:
objtypes = self.objtypes_for_role(typ)
for objtype in objtypes:
if (objtype, target) in objects:
return make_refnode(
builder,
fromdocname,
objects[objtype, target],
objtype + "-" + target,
contnode,
target + " " + objtype,
)
else:
logger.warning(
'%s: unknown target for ":zeek:%s:`%s`"',
fromdocname,
typ,
target,
)
def get_objects(self):
for (typ, name), docname in self.data["objects"].items():
yield name, name, typ, docname, typ + "-" + name, 1
def merge_domaindata(self, docnames, otherdata):
"""
Merge domaindata in multiprocess mode.
I'm quite unclear how the objects dict works out okay in single
process mode. For example, the file_entropy() event is defined
in scripts/base/bif/plugins/Zeek_FileEntropy.events.bif.zeek.rst
*and* in script-reference/autogenerated-file-analyzer-index.rst.
The current documentation refers to the first one for :zeek:see:.
It seems in single process mode the reading sorts filenames and
just uses the last highest sorting one. That ends-up being the one
in scripts/base.
In [4]: "script-reference/autogenerated" < "scripts/base"
Out[4]: True
"""
for target, data in otherdata.items():
if target == "version":
continue
elif hasattr(data, "items"):
target_data = self.env.domaindata["zeek"].setdefault(target, {})
# Iterate manually over the elements for debugging
for k, v in data.items():
if k not in target_data:
target_data[k] = v
else:
# The > comparison below updates the objects domaindata
# to filenames that sort higher. See comment above.
if isinstance(v, str):
if v > target_data[k]:
target_data[k] = v
else:
# Otherwise assume it's a dict and we can merge
# using update()
target_data[k].update(v)
elif hasattr(data, "extend"):
# notices are a list
target_data = self.env.domaindata["zeek"].setdefault(target, [])
target_data.extend(data)
else:
raise NotImplementedError(target, type(data))

247
doc/ext/zeek_pygments.py Normal file
View file

@ -0,0 +1,247 @@
from pygments.lexer import RegexLexer, bygroups, include, words
from pygments.token import (
Comment,
Keyword,
Literal,
Name,
Number,
Operator,
Punctuation,
String,
Text,
)
def setup(Sphinx):
return {
"parallel_read_safe": True,
}
class ZeekLexer(RegexLexer):
"""
For `Zeek <https://www.zeek.org/>`_ scripts.
.. versionadded:: 2.5
"""
name = "Zeek"
aliases = ["zeek"]
filenames = ["*.zeek"]
_hex = r"[0-9a-fA-F]"
_float = r"((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?"
_h = r"[A-Za-z0-9][-A-Za-z0-9]*"
tokens = {
"root": [
include("whitespace"),
include("comments"),
include("directives"),
include("attributes"),
include("types"),
include("keywords"),
include("literals"),
include("operators"),
include("punctuation"),
(
r"\b((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(?=\s*\()",
Name.Function,
),
include("identifiers"),
],
"whitespace": [
(r"\n", Text),
(r"\s+", Text),
(r"\\\n", Text),
],
"comments": [
(r"#.*$", Comment),
],
"directives": [
(r"(@(load-plugin|load-sigs|load|unload))\b.*$", Comment.Preproc),
(
r"(@(DEBUG|DIR|FILENAME|deprecated|if|ifdef|ifndef|else|endif))\b",
Comment.Preproc,
),
(r"(@prefixes)\s*(\+?=).*$", Comment.Preproc),
],
"attributes": [
(
words(
(
"redef",
"priority",
"log",
"optional",
"default",
"add_func",
"delete_func",
"expire_func",
"read_expire",
"write_expire",
"create_expire",
"synchronized",
"persistent",
"rotate_interval",
"rotate_size",
"encrypt",
"raw_output",
"mergeable",
"error_handler",
"broker_allow_complex_type",
"is_assigned",
"is_used",
"type_column",
"deprecated",
"on_change",
"backend",
"broker_store",
),
prefix=r"&",
suffix=r"\b",
),
Keyword.Pseudo,
),
],
"types": [
(
words(
(
"any",
"enum",
"record",
"set",
"table",
"vector",
"function",
"hook",
"event",
"addr",
"bool",
"count",
"double",
"file",
"int",
"interval",
"pattern",
"port",
"string",
"subnet",
"time",
),
prefix=r"\b",
suffix=r"\b",
),
Keyword.Type,
),
(
r"\b(opaque)(\s+)(of)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b",
bygroups(Keyword.Type, Text, Operator.Word, Text, Keyword.Type),
),
(
r"\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)(\s*)\b(record|enum)\b",
bygroups(Keyword, Text, Name.Class, Text, Operator, Text, Keyword.Type),
),
(
r"\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)",
bygroups(Keyword, Text, Name, Text, Operator),
),
(
r"\b(redef)(\s+)(record|enum)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b",
bygroups(Keyword, Text, Keyword.Type, Text, Name.Class),
),
],
"keywords": [
(
words(
(
"redef",
"export",
"if",
"else",
"for",
"while",
"return",
"break",
"next",
"continue",
"fallthrough",
"switch",
"default",
"case",
"add",
"delete",
"copy",
"when",
"timeout",
"schedule",
),
prefix=r"\b",
suffix=r"\b",
),
Keyword,
),
(r"\b(print)\b", Keyword),
(r"\b(global|local|const|option)\b", Keyword.Declaration),
(
r"\b(module)(\s+)(([A-Za-z_][A-Za-z_0-9]*)(?:::([A-Za-z_][A-Za-z_0-9]*))*)\b",
bygroups(Keyword.Namespace, Text, Name.Namespace),
),
],
"literals": [
(r'"', String, "string"),
# Not the greatest match for patterns, but generally helps
# disambiguate between start of a pattern and just a division
# operator.
(r"/(?=.*/)", String.Regex, "regex"),
(r"\b(T|F)\b", Keyword.Constant),
# Port
(r"\b\d{1,5}/(udp|tcp|icmp|unknown)\b", Number),
# IPv4 Address
(
r"\b(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\b",
Number,
),
# IPv6 Address (not 100% correct: that takes more effort)
(
r"\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2}))?\]",
Number,
),
# Numeric
(r"\b0[xX]" + _hex + r"+\b", Number.Hex),
(r"\b" + _float + r"\s*(day|hr|min|sec|msec|usec)s?\b", Literal.Date),
(r"\b" + _float + r"\b", Number.Float),
(r"\b(\d+)\b", Number.Integer),
# Hostnames
(_h + r"(\." + _h + r")+", String),
],
"operators": [
(r"[!%*/+<=>~|&^-]", Operator),
(r"([-+=&|]{2}|[+=!><-]=)", Operator),
(r"\b(in|as|is|of)\b", Operator.Word),
(r"\??\$", Operator),
# Technically, colons are often used for punctuation/separation.
# E.g. field name/type separation.
(r"[?:]", Operator),
],
"punctuation": [
(r"\?\$", Punctuation),
(r"[{}()\[\],;:.]", Punctuation),
],
"identifiers": [
(r"([a-zA-Z_]\w*)(::)", bygroups(Name, Punctuation)),
(r"[a-zA-Z_]\w*", Name),
],
"string": [
(r"\\.", String.Escape),
(r"%-?[0-9]*(\.[0-9]+)?[DTdxsefg]", String.Escape),
(r'"', String, "#pop"),
(r".", String),
],
"regex": [
(r"\\.", String.Escape),
(r"/", String.Regex, "#pop"),
(r".", String.Regex),
],
}

644
doc/frameworks/broker.rst Normal file
View file

@ -0,0 +1,644 @@
.. _CAF: https://github.com/actor-framework/actor-framework
.. _broker-framework:
==============================
Broker Communication Framework
==============================
.. rst-class:: opening
Zeek uses the `Broker Library
<https://docs.zeek.org/projects/broker>`_ to exchange information with
other Zeek processes. Broker itself uses CAF_ (C++ Actor Framework)
internally for connecting nodes and exchanging arbitrary data over
networks. Broker then introduces, on top of CAF, a topic-based
publish/subscribe communication pattern using a data model that is
compatible to Zeek's. Broker itself can be utilized outside the
context of Zeek, with Zeek itself making use of only a few predefined
Broker message formats that represent Zeek events, log entries, etc.
In summary, the Zeek's Broker framework provides basic facilities for
connecting broker-enabled peers (e.g. Zeek instances) to each other
and exchanging messages (e.g. events and logs).
Cluster Layout / API
====================
Layout / Topology
-----------------
In a Zeek cluster setup, every Zeek process is assigned a cluster role.
Such a process is then called a Zeek node, a cluster node, or just named
after the role of the process (the manager, the loggers, ...). A basic Zeek
cluster uses four different node types, enumerated in the script-level
variable :zeek:see:`Cluster::NodeType`.
- Manager
- Logger
- Worker
- Proxy
In small Zeek deployments, all nodes may run on a single host. In large
Zeek deployments, nodes may be distributed across multiple physical
systems for scaling.
Currently, a single Manager node in a Zeek cluster exists. Further, connectivity
between nodes is determined statically based on their type:
- Every node connects to all loggers and the manager.
- Each worker connects to all proxies.
.. figure:: broker/cluster-layout.png
Some general suggestions as to the purpose/utilization of each node type:
- Workers: are a good first choice for doing the brunt of any work you need
done. They should be spending a lot of time performing the actual job
of parsing/analyzing incoming data from packets, so you might choose
to look at them as doing a "first pass" analysis and then deciding how
the results should be shared with other nodes in the cluster.
- Proxies: serve as intermediaries for data storage and work/calculation
offloading. Good for helping offload work or data in a scalable and
distributed way. Since any given worker is connected to all
proxies and can agree on an "arbitrary key -> proxy node" mapping
(more on that later), you can partition work or data amongst them in a
uniform manner. e.g. you might choose to use proxies as a method of
sharing non-persistent state or as a "second pass" analysis for any
work that you don't want interfering with the workers' capacity to
keep up with capturing and parsing packets. Note that the default scripts
that come with Zeek make minimal use of proxies, so if you are coming
from a previous ZeekControl deployment, you may want to try reducing down
to a single proxy node. If you come to have custom/community scripts
that utilize proxies, that would be the time to start considering scaling
up the number of proxies to meet demands.
- Manager: this node will be good at performing decisions that require a
global view of things since it is in a centralized location, connected
to everything. However, that also makes it easy to overload, so try
to use it sparingly and only for tasks that must be done in a
centralized or authoritative location. Optionally, for some
deployments, the Manager can also serve as the sole Logger.
- Loggers: these nodes should simply be spending their time writing out
logs to disk and not used for much else. In the default cluster
configuration, logs get distributed among available loggers in a
round-robin fashion, providing failover capability should any given
logger temporarily go offline.
Data Management/Sharing Strategies
==================================
There's maybe no single, best approach or pattern to use when you need a
Zeek script to store or share long-term state and data. The two
approaches that were previously used were either using the ``&synchronized``
attribute on tables/sets or by explicitly sending events to specific
nodes on which you wanted data to be stored. The former is no longer
possible, though there are several new possibilities that the new
Broker/Cluster framework offer, namely distributed data store and data
partitioning APIs.
Data Stores
-----------
Broker provides a distributed key-value store interface with optional
choice of using a persistent backend. For more detail, see
:ref:`this example <data_store_example>`.
Some ideas/considerations/scenarios when deciding whether to use
a data store for your use-case:
* If you need the full data set locally in order to achieve low-latency
queries using data store "clones" can provide that.
* If you need data that persists across restarts of Zeek processes, then
data stores can also provide that.
* If the data you want to store is complex (tables, sets, records) or
you expect to read, modify, and store back, then data stores may not
be able to provide simple, race-free methods of performing the pattern
of logic that you want.
* If the data set you want to store is excessively large, that's still
problematic even for stores that use a persistent backend as they are
implemented in a way that requires a full snapshot of the store's
contents to fit in memory (this limitation may change in the future).
Data Partitioning
-----------------
New data partitioning strategies are available using the API in
:doc:`/scripts/base/frameworks/cluster/pools.zeek`. Using that API, developers
of custom Zeek scripts can define a custom pool of nodes that best fits the
needs of their script.
One example strategy is to use Highest Random Weight (HRW) hashing to
partition data tables amongst the pool of all proxy nodes. e.g. using
:zeek:see:`Cluster::publish_hrw`. This could allow clusters to
be scaled more easily than the approach of "the entire data set gets
synchronized to all nodes" as the solution to memory limitations becomes
"just add another proxy node". It may also take away some of the
messaging load that used to be required to synchronize data sets across
all nodes.
The tradeoff of this approach, is that nodes that leave the pool (due to
crashing, etc.) cause a temporary gap in the total data set until
workers start hashing keys to a new proxy node that is still alive,
causing data to now be located and updated there.
If the developer of a script expects its workload to be particularly
intensive, wants to ensure that their operations get exclusive
access to nodes, or otherwise set constraints on the number of nodes within
a pool utilized by their script, then the :zeek:see:`Cluster::PoolSpec`
structure will allow them to do that while still allowing users of that script
to override the default suggestions made by the original developer.
Broker Framework Examples
=========================
The broker framework provides basic facilities for connecting Zeek instances
to each other and exchanging messages, like events or logs.
See :doc:`/scripts/base/frameworks/broker/main.zeek` for an overview
of the main Broker API.
.. _broker_topic_naming:
Topic Naming Conventions
------------------------
All Broker-based messaging involves two components: the information you
want to send (e.g. an event w/ its arguments) along with an associated
topic name string. The topic strings are used as a filtering mechanism:
Broker uses a publish/subscribe communication pattern where peers
advertise interest in topic **prefixes** and only receive messages which
match one of their prefix subscriptions.
Broker itself supports arbitrary topic strings, however Zeek generally
follows certain conventions in choosing these topics to help avoid
conflicts and generally make them easier to remember.
As a reminder of how topic subscriptions work, subscribers advertise
interest in a topic **prefix** and then receive any messages published by a
peer to a topic name that starts with that prefix. E.g. Alice
subscribes to the "alice/dogs" prefix, then would receive the following
message topics published by Bob:
- topic "alice/dogs/corgi"
- topic "alice/dogs"
- topic "alice/dogsarecool/oratleastilikethem"
Alice would **not** receive the following message topics published by Bob:
- topic "alice/cats/siamese"
- topic "alice/cats"
- topic "alice/dog"
- topic "alice"
Note that the topics aren't required to form a slash-delimited hierarchy,
the subscription matching is purely a byte-per-byte prefix comparison.
However, Zeek scripts generally will follow a topic naming hierarchy and
any given script will make the topic names it uses apparent via some
redef'able constant in its export section. Generally topics that Zeek
scripts use will be along the lines of :samp:`zeek/{<namespace>}/{<specifics>}`
with :samp:`{<namespace>}` being the script's module name (in all-undercase).
For example, you might expect an imaginary ``Pretend`` framework to
publish/subscribe using topic names like ``zeek/pretend/my_cool_event``.
For scripts that use Broker as a means of cluster-aware analysis,
it's usually sufficient for them to make use of the topics declared
by the cluster framework. For scripts that are meant to establish
communication flows unrelated to Zeek cluster, new topics are declared
(examples being the NetControl and Control frameworks).
For cluster operation, see :doc:`/scripts/base/frameworks/cluster/main.zeek`
for a list of topics that are useful for steering published events to
the various node classes. E.g. you have the ability to broadcast
to all nodes of a given class (e.g. just workers) or just send to a
specific node within a class.
The topic names that logs get published under are a bit nuanced. In the
default cluster configuration, they are round-robin published to
explicit topic names that identify a single logger. In standalone Zeek
processes, logs get published to the topic indicated by
:zeek:see:`Broker::default_log_topic_prefix`.
For those writing their own scripts which need new topic names, a
suggestion would be to avoid prefixing any new topics/prefixes with
``zeek/`` as any changes in scripts shipping with Zeek will use that prefix
and it's better to not risk unintended conflicts. Again, it's
often less confusing to just re-use existing topic names instead
of introducing new topic names. The typical use case is writing
a cluster-enabled script, which usually just needs to route events
based upon node classes, and that already has usable topics in the
cluster framework.
Connecting to Peers
-------------------
Zeek can accept incoming connections by calling :zeek:see:`Broker::listen`.
.. literalinclude:: broker/connecting-listener.zeek
:caption: connecting-listener.zeek
:language: zeek
:linenos:
:tab-width: 4
Zeek can initiate outgoing connections by calling :zeek:see:`Broker::peer`.
.. literalinclude:: broker/connecting-connector.zeek
:caption: connecting-connector.zeek
:language: zeek
:linenos:
:tab-width: 4
In either case, connection status updates are monitored via the
:zeek:see:`Broker::peer_added` and :zeek:see:`Broker::peer_lost` events.
Remote Events
-------------
To receive remote events, you need to first subscribe to a "topic" to which
the events are being sent. A topic is just a string chosen by the sender,
and named in a way that helps organize events into various categories.
See the :ref:`topic naming conventions section <broker_topic_naming>` for
more on how topics work and are chosen.
Use the :zeek:see:`Broker::subscribe` function to subscribe to topics and
define any event handlers for events that peers will send.
.. literalinclude:: broker/events-listener.zeek
:caption: events-listener.zeek
:language: zeek
:linenos:
:tab-width: 4
To send an event, call the :zeek:see:`Broker::publish` function which you can
supply directly with the event and its arguments or give it the return value of
:zeek:see:`Broker::make_event` in case you need to send the same event/args
multiple times. When publishing events like this, local event handlers for
the event are not called, even if a matching subscription exists.
.. literalinclude:: broker/events-connector.zeek
:caption: events-connector.zeek
:language: zeek
:linenos:
:tab-width: 4
Note that the subscription model is prefix-based, meaning that if you subscribe
to the ``zeek/events`` topic prefix you would receive events that are published
to topic names ``zeek/events/foo`` and ``zeek/events/bar`` but not
``zeek/misc``.
.. note::
In prior Zeek versions, ``Broker::auto_publish`` was available to automatically
send events to peers whenever the events were called locally via the normal
event invocation syntax. When auto-publishing events, local event handlers for
the event were called in addition to sending the event to any subscribed peers.
``Broker::auto_publish`` was removed due to its
`implicit nature <https://github.com/zeek/zeek/discussions/3637>`_.
Remote Logging
--------------
.. literalinclude:: broker/testlog.zeek
:caption: testlog.zeek
:language: zeek
:linenos:
:tab-width: 4
To toggle remote logs, redef :zeek:see:`Log::enable_remote_logging`.
Use the :zeek:see:`Broker::subscribe` function to advertise interest
in logs written by peers. The topic names that Zeek uses are determined by
:zeek:see:`Broker::log_topic`.
.. literalinclude:: broker/logs-listener.zeek
:caption: logs-listener.zeek
:language: zeek
:linenos:
:tab-width: 4
.. literalinclude:: broker/logs-connector.zeek
:caption: logs-connector.zeek
:language: zeek
:linenos:
:tab-width: 4
Note that logging events are only raised locally on the node that performs
the :zeek:see:`Log::write` and not automatically published to peers.
.. _data_store_example:
Distributed Data Stores
-----------------------
See :doc:`/scripts/base/frameworks/broker/store.zeek` for an overview
of the Broker data store API.
There are two flavors of key-value data store interfaces: master and clone.
A master data store can be cloned from remote peers which may then
perform lightweight, local queries against the clone, which
automatically stays synchronized with the master store. Clones cannot
modify their content directly, instead they send modifications to the
centralized master store which applies them and then broadcasts them to
all clones.
Master stores get to choose what type of storage backend to
use. E.g. In-memory versus SQLite for persistence.
Data stores also support expiration on a per-key basis using an amount of
time relative to the entry's last modification time.
.. literalinclude:: broker/stores-listener.zeek
:caption: stores-listener.zeek
:language: zeek
:linenos:
:tab-width: 4
.. literalinclude:: broker/stores-connector.zeek
:caption: stores-connector.zeek
:language: zeek
:linenos:
:tab-width: 4
Note that all data store queries must be made within Zeek's asynchronous
``when`` statements and must specify a timeout block.
SQLite Data Store Tuning
^^^^^^^^^^^^^^^^^^^^^^^^
When leveraging the SQLite backend for persistence, SQLite's default journaling
and consistency settings are used. Concretely, ``journal_mode`` is set to
``DELETE`` and ``synchronous`` to ``FULL``. This in turn is not optimal for
`high INSERT or UPDATE rates <https://www.sqlite.org/faq.html#q19>`_
due to SQLite waiting for the required IO to complete until data is safely
on disk. This can also have a non-negligible system effect when the
SQLite database is located on the same device as other IO critical processes.
Starting with Zeek 5.2, it is possible to tune and relax these settings by
providing an appropriate :zeek:see:`Broker::BackendOptions` and
:zeek:see:`Broker::SQLiteOptions` instance to
:zeek:see:`Broker::create_master`. The following example changes the
data store to use `Write-Ahead Logging <https://www.sqlite.org/wal.html>`_
which should perform significantly faster than the default.
.. literalinclude:: broker/store-sqlite-tuning.zeek
:caption: store-sqlite-tuning.zeek
:language: zeek
:linenos:
:tab-width: 4
If your use-case turns out to require more and lower-level tuning around
SQLite options, please get in contact or open a feature request on GitHub.
Cluster Framework Examples
==========================
This section contains a few brief examples of how various communication
patterns one might use when developing Zeek scripts that are to operate in
the context of a cluster.
.. _event-namespacing-pitfall:
A Reminder About Events and Module Namespaces
---------------------------------------------
For simplicity, the following examples do not use any modules/namespaces.
If you choose to use them within your own code, it's important to
remember that the ``event`` and ``schedule`` dispatching statements
should always use the fully-qualified event name.
For example, this will likely not work as expected:
.. code-block:: zeek
module MyModule;
export {
global my_event: event();
}
event my_event()
{
print "got my event";
}
event zeek_init()
{
event my_event();
schedule 10sec { my_event() };
}
This code runs without errors, however, the local ``my_event`` handler
will never be called and also not any remote handlers either. Instead, at
minimum you would need change the ``zeek_init()`` handler:
.. code-block:: zeek
event zeek_init()
{
event MyModule::my_event();
schedule 10sec { MyModule::my_event() };
}
Though, an easy rule of thumb to remember would be to always use the
explicit module namespace scoping and you can't go wrong:
.. code-block:: zeek
module MyModule;
export {
global MyModule::my_event: event();
}
event MyModule::my_event()
{
print "got my event";
}
event zeek_init()
{
event MyModule::my_event();
schedule 10sec { MyModule::my_event() };
}
Event types that reside in the default namespace (such as :zeek:id:`zeek_init` or
:zeek:id:`connection_established`) require no qualification, even when scheduled from
inside a module. Don't force qualification of such events by prefixing with
``GLOBAL::``.
Note that other identifiers in Zeek do not have this inconsistency
related to module namespacing, it's just events that require
explicitness.
Manager Sending Events To Workers
---------------------------------
This is fairly straightforward, we just need a topic name which we know
all workers are subscribed combined with the event we want to send them.
.. code-block:: zeek
event manager_to_workers(s: string)
{
print "got event from manager", s;
}
event some_event_handled_on_manager()
{
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v0");
# If you know this event is only handled on the manager, you don't
# need any of the following conditions, they're just here as an
# example of how you can further discriminate based on node identity.
# Can check based on the name of the node.
if ( Cluster::node == "manager" )
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v1");
# Can check based on the type of the node.
if ( Cluster::local_node_type() == Cluster::MANAGER )
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v2");
# The run-time overhead of the above conditions can even be
# eliminated by using the following conditional directives.
# It's evaluated once per node at parse-time and, if false,
# any code within is just ignored / treated as not existing at all.
@if ( Cluster::local_node_type() == Cluster::MANAGER )
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v3");
@endif
}
Worker Sending Events To Manager
--------------------------------
This should look almost identical to the previous case of sending an event
from the manager to workers, except it simply changes the topic name to
one which the manager is subscribed.
.. code-block:: zeek
event worker_to_manager(worker_name: string)
{
print "got event from worker", worker_name;
}
event some_event_handled_on_worker()
{
Broker::publish(Cluster::manager_topic, worker_to_manager,
Cluster::node);
}
Worker Sending Events To All Workers
------------------------------------
Since workers are not directly connected to each other in the cluster
topology, this type of communication is a bit different than what we
did before since we have to manually relay the event via some node that *is*
connected to all workers. The manager or a proxy satisfies that requirement:
.. code-block:: zeek
event worker_to_workers(worker_name: string)
{
@if ( Cluster::local_node_type() == Cluster::MANAGER ||
Cluster::local_node_type() == Cluster::PROXY )
Broker::publish(Cluster::worker_topic, worker_to_workers,
worker_name);
@else
print "got event from worker", worker_name;
@endif
}
event some_event_handled_on_worker()
{
# We know the manager is connected to all workers, so we could
# choose to relay the event across it.
Broker::publish(Cluster::manager_topic, worker_to_workers,
Cluster::node + " (via manager)");
# We also know that any given proxy is connected to all workers,
# though now we have a choice of which proxy to use. If we
# want to distribute the work associated with relaying uniformly,
# we can use a round-robin strategy. The key used here is simply
# used by the cluster framework internally to keep track of
# which node is up next in the round-robin.
local pt = Cluster::rr_topic(Cluster::proxy_pool, "example_key");
Broker::publish(pt, worker_to_workers,
Cluster::node + " (via a proxy)");
}
Worker Distributing Events Uniformly Across Proxies
---------------------------------------------------
If you want to offload some data/work from a worker to your proxies,
we can make use of a `Highest Random Weight (HRW) hashing
<https://en.wikipedia.org/wiki/Rendezvous_hashing>`_ distribution strategy
to uniformly map an arbitrary key space across all available proxies.
.. code-block:: zeek
event worker_to_proxies(worker_name: string)
{
print "got event from worker", worker_name;
}
global my_counter = 0;
event some_event_handled_on_worker()
{
# The key here is used to choose which proxy shall receive
# the event. Different keys may map to different nodes, but
# any given key always maps to the same node provided the
# pool of nodes remains consistent. If a proxy goes offline,
# that key maps to a different node until the original comes
# back up.
Cluster::publish_hrw(Cluster::proxy_pool,
cat("example_key", ++my_counter),
worker_to_proxies, Cluster::node);
}
Broker-backed Zeek Tables for Data Synchronization and Persistence
==================================================================
Starting with Zeek 3.2, it is possible to "bind" a Zeek table to a backing
Broker store. Changes to the Zeek table are sent to the Broker store. Similarly,
changes of the Broker store are applied to the Zeek table.
This feature allows easy distribution of table contents across a cluster.
It also offers persistence for tables (when using a persistent Broker store
backend like SQLite).
To give a short example, to distribute a table over a cluster you can use
the :zeek:attr:`&backend` attribute.
.. code-block:: zeek
global t: table[string] of count &backend=Broker::MEMORY;
The :zeek:attr:`&backend` attribute creates a master data store on the
manager and a clone data store on all other node on the cluster. This
in essence means that the table exists twice in each Zeek process. One
copy of the table is contained in a Broker data store (either a master
or a clone depending on the node), which data store distributes the
data across the cluster---and, depending on the backend, might also
make the data persistent. Since Broker data stores are only accessible
via asynchronous operations, and accessing them might not always be
immediate, a second copy of the table, which is immediately
accessible, is held inside the Zeek core. This is the copy that you
see and interact with on the Zeek side.

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

View file

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<mxfile userAgent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36" version="9.0.3-1" editor="www.draw.io" type="device"><diagram name="Page-1" id="42789a77-a242-8287-6e28-9cd8cfd52e62">7VxLc6M4EP41Po4LSUjAcZJJZg+7VVOVrd3ZowwKZgYjFyaxvb9+hZEwEhA/eITs2JdYrZYE0ve1ultyZuh+tfua0vXyDx6weAatYDdDX2ZQfDxL/Mkl+0Li2G4hCNMoKETgKHiK/mVSKNuFL1HANppixnmcRWtd6PMkYX6myWia8q2u9sxjfdQ1DVlN8OTTuC79OwqypZQCyzpW/MaicCmHdrGsWFD/Z5jyl0SON4Po+fApqldU9SX1N0sa8G1FhB5m6D7lPCu+rXb3LM7nVk1b0e6xpbZ87pQl2VkNGEXAcTwrgD71XPQJoKKLVxq/MPUOhyfN9mp2WCAmSxYTnog/d4dXZnmnQJSW2SqWX2O6YPFdOSv3PObpsdkmo2n2OV8wQ/YYxXkPlipLiGBRZkmgWvgx3Wwi/89llBQVshkoSpVGP1iW7WWZvmRciHiaLXnIExr/zvlattpkKf/J1FOK1fOIgz6TskahIdd95kn2SFdRnIP8L5YGNKFSLEcCUJYrHVqHj5An/KEyj4dH3kXZ9/yt51iW/pFzcHxWqVpfZ7n0G/6S+qxtcSWVaBqyrEXHK3TyNa50LNHzlfEVy9K9UEhZTLPoVScMlbwLSz3ZVKwY3VcU1jxKsk2l52+5QCgoE6KYJg0IcqCOY1MfQTFp1Ra2bVdbiC/FM6hS5WWOogM7zmWKfWPKL80UNAmmAHIZUwB0RmdKjSgrAYCQpTNIYjG7d4v8W5iVq1VlUByLTT9H+3YZZexpTQ9rthV+h06fI68OVFD7al7l81Xky4pTLGsANW0BtWBQRZOBADOnO9hpHIVJzliBVzFDbwD4laUZ270JPVWL9BVHyrpuj86NctmWFbdGQasJrBW4XIYG2GA2Cxhs1jRRQFinfLf/BJsQUkjEuFX9qQEncLyFZZ0DnOdnRnx/msCBUAcOwHXgOLgOHDwEcJo80zpwYh4Ky5LbnI+BE7Ig+CwDI4IIOFWcePoeZJN3hIlXg4mERIN7dlv7HjYXD7/b4t+CVQ1QXxzvrm3T6Qac0uGuuNvFg4sV+14tKEe87rT35JrDM1xzMIhrXvOlbYLmnmMjDCxgI9dBjsYED84RJB7GmCAHQ7334h1lh29Fth6YE9GLhRwxAlbOerkj2/Y8790Rr01sYBFjmGKaasO0Rxka9S5z/JsC4jbPDtw8u3e12kbUOKZjh29Ge1yjrTImNaNtDWW07enYaAKN/Agi1xlirLwN1RExOhrT1JIbh0biUMmVObIcjS9zizRSpjk52UimQ2ffWBqJpc8t+2Eqe2PYMKn8GjGQbTAM4OsYZqO+GdaSArWMiAVoh2SdE6DOjZwjRyU1plVoS1yDtfC8je56bl4VsgxzmlAnK3J1jlnOdWRFjt4RgEZHPZEV4mHJqixphZx63FBhYnu0ezU57lxs42ZyNMXcBdL7ctO8Oi7tcWBonu/a5lFDCwwvNvVQD5e9nsFDLrPsJp5OOeij4GxANE30dgFCRvRrGbdkTuirMHgo/d5tnNsNpsfNFI9q+Grb+phQhSNZQqLvo90tYYewEoFuQGmwZxXM1C3avJNjORGbNo17IMjM6J2yaeRCG9VR37BpdR4YUSS2+7WB9WPBpuT0lqc/i6PCD5qdXrRzwsxOuz6bana6TBKPcYAMG5BxC9ff4xDRmtumfzFooH5OEu0WlzeP4wwblt/uoU/nlGhOiJn5nmYObaSEN1Fucpnwdq/jKnb1jgA09rO+cmjuwDk0XOPmFTHCIKHk4EedgJwBzJFiSdfMqoGB8KTOUAfKqjmXmfoWhNXzpu8UabaA8PBI/WFsJOMHjPN0bOYrumLsVPx6Kv48s/5UPFp7z57jURWQdgX5W0dfo2TrhjSkw5xGDJM6s/pZT2M1p2WyejVYI0VW4NRRU0+b4qVnChem0zqp9x6dNd0/as2mfdy7nv+PbNqYv8ZoSrP+wmH7G1Z4oGTamCcfI13hhMZ9rauvcNrmTTUT8n1dMDN+EmPsBd19x/ovam8sGzU5dpELNUc5UrT8WemZX5ccO8e9Gomc5W1P5Wp4BqfOJWf5EwTl4pgd9UVO0is5RfH471oK9eP/xEEP/wE=</diagram></mxfile>

View file

@ -0,0 +1,12 @@
redef exit_only_after_terminate = T;
event zeek_init()
{
Broker::peer("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
terminate();
}

View file

@ -0,0 +1,17 @@
redef exit_only_after_terminate = T;
event zeek_init()
{
Broker::listen("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer lost", endpoint;
terminate();
}

View file

@ -0,0 +1,26 @@
redef exit_only_after_terminate = T;
global my_event: event(msg: string, c: count);
event zeek_init()
{
Broker::peer("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
Broker::publish("zeek/event/my_event", my_event, "hi", 0);
Broker::publish("zeek/event/my_event", my_event, "...", 1);
local e = Broker::make_event(my_event, "bye", 2);
Broker::publish("zeek/event/my_event", e);
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
terminate();
}
event my_event(msg: string, c: count)
{
print "got my_event", msg, c;
}

View file

@ -0,0 +1,24 @@
redef exit_only_after_terminate = T;
global msg_count = 0;
global my_event: event(msg: string, c: count);
global my_auto_event: event(msg: string, c: count);
event zeek_init()
{
Broker::subscribe("zeek/event/");
Broker::listen("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
}
event my_event(msg: string, c: count)
{
++msg_count;
print "got my_event", msg, c;
if ( msg_count == 5 )
terminate();
}

View file

@ -0,0 +1,36 @@
@load ./testlog
redef exit_only_after_terminate = T;
global n = 0;
event zeek_init()
{
Broker::peer("127.0.0.1");
}
event do_write()
{
if ( n == 6 )
return;
Log::write(Test::LOG, [$msg = "ping", $num = n]);
++n;
event do_write();
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
event do_write();
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
terminate();
}
event Test::log_test(rec: Test::Info)
{
print "wrote log", rec;
Broker::publish("zeek/logs/forward/test", Test::log_test, rec);
}

View file

@ -0,0 +1,22 @@
@load ./testlog
redef exit_only_after_terminate = T;
event zeek_init()
{
Broker::subscribe("zeek/logs");
Broker::listen("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
}
event Test::log_test(rec: Test::Info)
{
print "got log event", rec;
if ( rec$num == 5 )
terminate();
}

View file

@ -0,0 +1,19 @@
global h: opaque of Broker::Store;
event zeek_init()
{
# Use WAL mode.
local sqlite_options=Broker::SQLiteOptions(
$synchronous=Broker::SQLITE_SYNCHRONOUS_NORMAL,
$journal_mode=Broker::SQLITE_JOURNAL_MODE_WAL,
);
local options = Broker::BackendOptions($sqlite=sqlite_options);
h = Broker::create_master("persistent-store", Broker::SQLITE, options);
local c = 1000;
while (c > 0)
{
Broker::put(h, cat(c), rand(10000));
--c;
}
}

View file

@ -0,0 +1,29 @@
redef exit_only_after_terminate = T;
global h: opaque of Broker::Store;
global ready: event();
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
terminate();
}
event zeek_init()
{
h = Broker::create_master("mystore");
local myset: set[string] = {"a", "b", "c"};
local myvec: vector of string = {"alpha", "beta", "gamma"};
Broker::put(h, "one", 110);
Broker::put(h, "two", 223);
Broker::put(h, "myset", myset);
Broker::put(h, "myvec", myvec);
Broker::increment(h, "one");
Broker::decrement(h, "two");
Broker::insert_into_set(h, "myset", "d");
Broker::remove_from(h, "myset", "b");
Broker::push(h, "myvec", "delta");
Broker::peer("127.0.0.1");
}

View file

@ -0,0 +1,79 @@
redef exit_only_after_terminate = T;
global h: opaque of Broker::Store;
global expected_key_count = 4;
global key_count = 0;
# Lookup a value in the store based on an arbitrary key string.
function do_lookup(key: string)
{
when ( local res = Broker::get(h, key) )
{
++key_count;
print "lookup", key, res;
# End after we iterated over looking up each key in the store twice.
if ( key_count == expected_key_count * 2 )
terminate();
}
# All data store queries must specify a timeout
timeout 3sec
{ print "timeout", key; }
}
event check_keys()
{
# Here we just query for the list of keys in the store, and show how to
# look up each one's value.
when ( local res = Broker::keys(h) )
{
print "clone keys", res;
if ( res?$result )
{
# Since we know that the keys we are storing are all strings,
# we can conveniently cast the result of Broker::keys to
# a native Bro type, namely 'set[string]'.
for ( k in res$result as string_set )
do_lookup(k);
# Alternatively, we can use a generic iterator to iterate
# over the results (which we know is of the 'set' type because
# that's what Broker::keys() always returns). If the keys
# we stored were not all of the same type, then you would
# likely want to use this method of inspecting the store's keys.
local i = Broker::set_iterator(res$result);
while ( ! Broker::set_iterator_last(i) )
{
do_lookup(Broker::set_iterator_value(i) as string);
Broker::set_iterator_next(i);
}
}
}
# All data store queries must specify a timeout.
# You also might see timeouts on connecting/initializing a clone since
# it hasn't had time to get fully set up yet.
timeout 1sec
{
print "timeout";
schedule 1sec { check_keys() };
}
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added";
# We could create a clone early, like in zeek_init and it will periodically
# try to synchronize with its master once it connects, however, we just
# create it now since we know the peer w/ the master store has just
# connected.
h = Broker::create_clone("mystore");
event check_keys();
}
event zeek_init()
{
Broker::listen("127.0.0.1");
}

View file

@ -0,0 +1,17 @@
module Test;
export {
redef enum Log::ID += { LOG };
type Info: record {
msg: string &log;
num: count &log;
};
global log_test: event(rec: Test::Info);
}
event zeek_init() &priority=5
{
Log::create_stream(Test::LOG, [$columns=Test::Info, $ev=log_test, $path="test"]);
}

630
doc/frameworks/cluster.rst Normal file
View file

@ -0,0 +1,630 @@
.. _cluster-framework:
=================
Cluster Framework
=================
The basic premise of Zeek clusterization is to break down network traffic into
smaller pieces, while preserving the affinity of individual network sessions to
a single analysis process. Cluster architecture thus allows Zeek to distribute
that analysis across many dozens or hundreds of worker processes, allowing the
monitoring system to scale up to line speeds of 100G or more.
.. figure:: /images/cluster-diagram.png
Figure 1: Block diagram of cluster setup showing multiple network feeds to a
traffic aggregator. This device sends traffic to workers after symmetric
hashing/load-balancing. Traffic is then fed to the Zeek cluster using
load-balancing network cards.
Zeek's Cluster Components
=========================
By distributing network traffic across hosts and processes, overall traffic
finally reaches a volume that can be effectively analyzed by a single worker
process. Zeek then acts as a distributed network security monitor to perform
analysis across many dozens or hundreds of workers, all acting on a small
fraction of the overall traffic volume. The analysis of the worker process is
further facilitated by nodes such as manager and proxies, ultimately logging
the alerts and or relevant network logs. A Zeek cluster therefore consists of
four main components: a manager, workers, proxies, and a logger.
Manager
-------
The manager is a Zeek process that has two primary jobs. It normally receives
log messages and notices from the rest of the nodes in the cluster using the
Zeek communications protocol. It combines the individual logs that each worker
produces, so that the result is a set of joint logs instead of many discrete
logs that you would have to combine in some manner with post-processing. (Note
that if you use a separate logger node, then the logger receives all logs
instead of the manager.) The manager also supports other functionality and
analysis which requires a centralized, global view of events or data.
Worker
------
The worker is the Zeek process that sniffs network traffic and does protocol
analysis on the reassembled traffic streams. Most of the work of an active
cluster takes place on the workers. Workers typically represent the bulk of the
Zeek processes that are running in a cluster. The fastest memory and CPU core
speed you can afford is recommended since all of the protocol parsing and most
analysis will take place here. There are no particular requirements for the
disks in workers since almost all logging is done remotely to the manager (or
dedicated logger). Normally, very little is written to disk.
Proxy
-----
A proxy is a Zeek process that may be used to offload data storage or any
arbitrary workload. A cluster may contain multiple proxy nodes.
Zeek's default scripts make only minimal use of proxies.
Custom scripts or third-party packages may exercise proxies more heavily
to partition data or workloads, providing greater cluster scalability potential.
The number of required proxy nodes in a cluster depends on the deployed scripts,
cluster size and traffic characteristics. For small clusters with four or less workers,
a single proxy node is usually sufficient. For larger clusters, you may want to
closely monitor :ref:`CPU and memory usage <framework-telemetry>` of proxy
nodes and increase their number as needed.
Zeek processes acting as proxies dont tend to be extremely hard on CPU or
memory, and users frequently run proxy processes on the same physical host as
the manager.
Logger
------
A logger is an optional Zeek process that receives log messages from the rest
of the nodes in the cluster using the Zeek communications protocol. The purpose
of having a logger to receive logs instead of the manager is to reduce the load
on the manager. If no logger is needed, then the manager will receive logs
instead.
Running a Zeek Cluster
======================
Zeek Cluster Setup
------------------
This :ref:`link <cluster-configuration>` describes the cluster setup in great
detail.
General Usage and Deployment
----------------------------
The biggest advantage to using a Zeek cluster is that most of its inner
workings are transparent to the user. Clusterization is a clever trick to
divide-and-conquer ever increasing network traffic volume.
As a practitioner one must know how to set up a cluster by defining components
such as the manager, proxies, loggers and workers in the
:samp:`{<prefix>}/etc/node.cfg` file on the manager.
Edit the ZeekControl node configuration file, :samp:`{<prefix>}/etc/node.cfg`,
to define where the logger, manager, proxies, and workers will run. For a
cluster configuration, comment-out (or remove) the standalone node in that
file, and either uncomment or add node entries for each node in your cluster
(logger, manager, proxy, and workers).
For example, to run five Zeek nodes (two workers, one proxy, a logger, and a
manager) on a cluster consisting of three machines, the cluster configuration
would look like this::
[logger]
type=logger
host=10.0.0.10
[manager]
type=manager
host=10.0.0.10
[proxy-1]
type=proxy
host=10.0.0.10
[worker-1]
type=worker
host=10.0.0.11
interface=eth0
[worker-2]
type=worker
host=10.0.0.12
interface=eth0
To set up a cluster we need a network-aggregator/load balancing device which
can aggregate inputs from network sources, such as taps or span ports. This
device also performs the critical function of ensuring each TCP session is
distributed to a single link. This function is provided through symmetric
hashing.
Once the tap aggregator is set, output from each port is sent to a “Zeek node”
which is typically built on commodity hardware. Zeek clusters have evolved from
running the manager, workers and proxies on individual servers, to most often
now running a “cluster-in-a-box” setup, where a powerful multi-core box with
dedicated cores hosts the workers, proxies logger and manager. Weve seen
instances of 90 workers running on a single physical server.
At present the preferred way to run a cluster is to use a load-balancing
network card such as Myricom NICs or Intel cards with PF_RING or AF_PACKET
support. The NIC (and associated software) further divides the traffic to
multiple Zeek worker processes running on the Zeek- node.
While the Zeek cluster allows us to monitor traffic at scale, an optional
add-on technology called “shunting” is helpful to reduce the volume that needs
be processed.. Shunting can detect specific large data flows based on
predetermined characteristics and communicate with the network tap via an API
to stop sending those flows to Zeek for analysis. This allows Zeek to maintain
awareness and logs of these shunted large flows while dramatically reducing the
analysis load necessary to process traffic.
The following links gives more specific information on how to set up
clusterization using one of the above approaches: :ref:`cluster-configuration`.
Developing Scripts/Heuristics
=============================
This section is for developers who are interested in writing
packages/scripts/heuristics and want to take advantage of clusterization.
In order to make your scripts/packages “clusterized,” one must understand the
purpose of each of the cluster components (manager, workers, proxies and
logger) and how/where the data is generated and how to move data/information
across the different nodes in the cluster.
* **Workers**: Workers are a good first choice for doing the brunt of any work.
They should be spending a lot of time parsing or analyzing incoming data from
packets. You might choose them to do a “first pass” analysis and then decide
how the results should be shared with other nodes in the cluster.
* **Proxies**: Proxies serve as intermediaries for data storage and computation
offloading. Proxies help offload work or data in a scalable and distributed
way. Since any given worker is connected to all proxies and can agree on an
“arbitrary key -> proxy node” mapping (discussed later), you can partition
work or data amongst them in a uniform manner. You might choose to use
proxies as a method to share non-persistent state or as a “second pass”
analysis for any work that you dont want interfering with the workers
capacity to keep up with capturing and parsing packets. The default scripts
that come with Zeek make minimal use of proxies. If you are migrating from a
previous ZeekControl deployment, you may want to implement a single proxy
node. If you have custom or community scripts that utilize proxies,
considering scaling up the number of proxies to meet demand.
* **Manager**: A manager will make decisions that require a global view, as it
is in a centralized location and connected to everything. However, that
connectivity also makes it easy to overload it. Try to use a manager
sparingly and only for tasks that must be done in a centralized or
authoritative location. Optionally, for some deployments, the manager can
also serve as the sole logger.
* **Loggers**: Loggers should simply write logs to disk. In the default cluster
configuration, log content gets distributed among available loggers in a
round-robin fashion, providing failover capability should any given logger
temporarily go offline.
The Need to Move Data and Events Across Different Nodes
-------------------------------------------------------
Imagine you have a list of IP addresses that you want to distribute across all
workers to keep in a watch list, such as the Intel framework. You may also want
to aggregate results across workers to see if that count crosses a threshold,
such as using scan detection. Finally, you might want to extract URLs from
emails and then redistribute the extracted URLs to all workers to be able to
find which of these extracted URLs got clicked on. All these examples tend to
introduce challenges in a Zeek cluster setup due to data centrality issues. In
other words, the very advantageous divide-and-conquer approach of
clusterization also introduces complexity in Zeek scripts. However, with the
introduction of the Broker communication framework and additional helper
functions, data centrality complexities can be addressed efficiently. One must
rely on clusterization techniques provided by Zeek scripting, the Broker API,
and clusterization components.
When clustering your scripts, the fundamental work to move data or events in
the context of a cluster falls primarily on few high level abstractions of
communication patterns:
1. Manager-to-worker
2. Worker-to-manager
3. Worker-to-proxy
4. Worker-to-manager-to-worker
5. Manager-to-worker-to-manager
All the communication between workers, proxies and manager is established by
Zeek via the Broker framework. The Broker framework provides basic facilities
for connecting Zeek instances to each other and exchanging messages, events or
data.
Cluster Topics
--------------
All Broker-based messaging involves two components: the information you want to
send, such as an event with its arguments, along with an associated topic name
string. The topic strings are used as a filtering mechanism: Broker uses a
publish-subscribe communication pattern where peers advertise interest in topic
prefixes and only receive messages which match one of their prefix
subscriptions. Broker itself supports arbitrary topic strings. However, Zeek
generally follows certain conventions in choosing these topics to help avoid
conflicts and generally make them easier to remember.
To communicate between workers, proxies and manager one needs to know the topic
name to which all workers, proxies and manager are subscribed to. These are:
1. :zeek:see:`Cluster::worker_topic` - to which all workers are subscribed
2. :zeek:see:`Cluster::proxy_topic` - to which all proxies are subscribed
3. :zeek:see:`Cluster::manager_topic` - to which manager is subscribed
The following table illustrates all the topics and communication events for
clusterization, along with potential use cases:
.. list-table::
:header-rows: 1
* - Event
- Topic
- Use cases
* - Manager to worker
- :zeek:see:`Cluster::worker_topic`
- * Read input file on manager
* Distribute data and events from manager to workers
* - Worker to manager
- :zeek:see:`Cluster::manager_topic`
- * Find characteristics of a “scan” eg. SYN-only pkts
* Send data to manager for aggregation
* - Worker or manager to proxy
- :zeek:see:`Cluster::proxy_topic`
- * Run operation on all proxies
* Disseminate notice suppression
* - Worker to manager to worker
- :zeek:see:`Cluster::manager_topic` + :zeek:see:`Cluster::worker_topic`
- * Find URLs in emails
* Send to manager
* Distribute to workers to check against HTTP GET requests
* - Manager to worker to manager
- :zeek:see:`Cluster::worker_topic` + :zeek:see:`Cluster::manager_topic`
- * Read input file on manager
* Distribute data to workers
* Workers to report counts of connections to manager
* Aggregate the counts on manager
Cluster Pools
-------------
In addition to topics, Zeek nodes can join a :zeek:see:`Cluster::Pool`.
Using :zeek:see:`Cluster::publish_hrw` and :zeek:see:`Cluster::publish_rr`,
pools allow to publish events to individual proxies without prior knowledge
of a cluster's shape and size.
A popular pool is the :zeek:see:`Cluster::proxy_pool`. It comprises all
the proxies of a cluster. Examples of its use are listed in the following table.
.. list-table::
:header-rows: 1
* - Event
- Pool
- Use cases
* - Workers to individual proxy processes
- :zeek:see:`Cluster::proxy_pool`
- * Aggregation based on Highest Random Weight (eg. DNS query types, see the :ref:`section below <cluster-framework-proxies-uniform>` for details.)
* Aggregation of Software versions for a given host
* Offloading tasks in round-robin fashion across proxies
Publishing Events Across the Cluster
------------------------------------
Broker, as well as Zeeks higher-level cluster framework, provide a set of
function to publish events, including:
.. list-table::
:header-rows: 1
* - Function
- Description
- Use
* - :zeek:see:`Cluster::publish`
- Publishes an event at a given topic
- Standard function to send an event to all nodes subscribed to a given
topic.
* - :zeek:see:`Cluster::publish_hrw`
- Publishes an event to a node within a pool according to
Highest Random Weight (HRW) hashing strategy; see details below
- Use this in cases of any aggregation needs - eg. scan detection or
anything that needs a counter going.
* - :zeek:see:`Cluster::publish_rr`
- Publishes an event to a node within a pool according to Round-Robin
distribution strategy.
- Generally used inside Zeek for multiple logger nodes.
* - :zeek:see:`Broker::publish`
- Publishes an event at a given topic
- Standard function to send an event to all nodes subscribed to a given
topic.
Starting with Zeek 7.1, this function should only be used in
Broker-specific scripts. Use :zeek:see:`Cluster::publish` otherwise.
.. note::
The ``Cluster::publish`` function was added in Zeek 7.1. In contrast to
``Broker:publish``, it publishes events even when a non-Broker cluster
backend is in use. Going forward, ``Cluster:publish`` should be preferred
over ``Broker::publish``, unless the script is specific to the Broker backend,
e.g. when interacting with an external application using native Python
bindings for Broker.
An example sending an event from worker to manager:
.. code-block:: zeek
event worker_to_manager(worker_name: string)
{
print "got event from worker", worker_name;
}
event some_event_handled_on_worker()
{
Broker::publish(Cluster::manager_topic, worker_to_manager,
Cluster::node);
}
More details and code snippets and documentation on Broker communication
frameworks are available at :ref:`broker-framework`.
.. _cluster-framework-proxies-uniform:
Distributing Events Uniformly Across Proxies
--------------------------------------------
If you want to offload some data/work from a worker to your proxies, we can
make use of a `Highest Random Weight (HRW) hashing
<https://en.wikipedia.org/wiki/Rendezvous_hashing>`_ distribution strategy to
uniformly map an arbitrary key space across all available proxies through
:zeek:see:`Cluster::publish_hrw`. This function publishes an event to one node
within a pool according to a Highest Random Weight hashing strategy. By
assigning :zeek:see:`Cluster::proxy_pool` to this event, one can utilize
proxies to handle it. Note that :zeek:see:`Cluster::publish_hrw` requires a
unique key as an input to the hashing function to uniformly distribute keys
among available nodes. Often this key is a source or destination IP address. If
you are using :zeek:see:`Cluster::publish_hrw` for an aggregate function, such
as counts unique across the workers, make sure to appropriately select the
hashing key.
The following example illustrates this issue. Assume that we are counting the
number of scanner IPs from each ``/24`` subnet. If the key were the source IP,
then depending on the hashing, different IP addresses from the same ``/24``
might end up on different proxies for the aggregation function. In this case
one might instead want to use a more inclusive hashing key, such as the subnet
(``/24``) itself. To illustrate the issue, in the notice log below, you see
that 3 scanners each from ``52.100.165.0/24`` went to ``proxy-1`` and
``proxy-2``. Ideally we want a single count of 6 scanners instead.
::
1600212249.061779 Scan::Subnet 52.100.165.0/24 has 3 spf IPs originating from it 52.100.165.249 52.100.165.237 52.100.165.246 - 52.100.165.246 - - proxy-2 Notice::ACTION_LOG 3600.000000 F
1600212293.581745 Scan::Subnet 52.100.165.0/24 has 3 spf IPs originating from it 52.100.165.247 52.100.165.244 52.100.165.205 - 52.100.165.205 - - proxy-1 Notice::ACTION_LOG 3600.000000
Instead, we can ensure the hash key is ``52.100.165.0/24`` instead of the
original IP, as the hash for ``52.100.165.0/24`` will be the same for all
addresses belonging to this subnet. Then the data will reach only one proxy.
To that end, we can use the ``mask_address`` function to extract subnet
information for a given IP address to use as a key in the hash function:
.. code-block:: zeek
local spf = mask_address(orig);
@if ( Cluster::is_enabled())
Cluster::publish_hrw(Cluster::proxy_pool, spf, smtpsink::aggregate_stats, c) ;
@else
event smtpsink::aggregate_stats(c);
@endif
Carefully select the key for :zeek:see:`Cluster::publish_hrw`. If done right,
this feature will bring tremendous benefits in code scalability, especially
when working with aggregate and threshold functions.
.. note::
In scripting for clusterization, using the correct module names and
namespaces is crucial as both events and data are transmitted to different
systems. In order to make sure the contexts are correct, all functions,
events and datasets should be scoped within their respective namespaces and
modules. An easy rule of thumb is to always use the explicit module namespace
scoping. See :ref:`event-namespacing-pitfall` for further explanation and
examples.
Clusterization of Zeek scripts can be an intimidating task for beginners.
However, with reliance on the new Broker framework, clusterization has become
simpler and straightforward. Consider the following:
1. Communication overhead: Be sure not to generate unnecessary communication
overhead. For example, scan detection is one of the worst cases for
distributed analysis. One needs to count connections from a given IP address
across all workers and then aggregate them on a proxy or manager. All the
connections have to reach an aggregate function before Zeek can determine if
a given source is a scanner or not. This happens because each worker only
has a limited picture of the activity generated by a given remote IP.
2. Communication optimizations: Once a given remote IP is identified as
desired, make sure a manager reports that to the worker, and workers stop
sending any further data for that IP to the manager. This is especially
useful in scan detection where it takes only a few connections to identify
scans, while a given scanner might send millions of probes eventually. If
done right, workers will only send the first N connections, and stop after
that, thus saving a lot of communication overheads. However, it makes sense
to stop workers from sending any further connection information
3. Clusterization also requires timely state synchronization across the
workers, to make sure that all workers have a common view of a particular
heuristic.
4. When writing scripts for clusterization make sure your detection runs in
both cluster and standalone setup.
A Cluster Script Walkthrough
----------------------------
Let's say we want to count how many connections a remote IP is making to a host
in our network on port 3389 UDP. Due to the distributed nature of Zeek
clusters, connections are distributed across the workers based on a 5-tuple
hash (source IP, source port, destination IP, destination port, and protocol).
To get a central view of a connection between a given IP pair, one must deploy
a clusterized scripting approach. The following example highlights how to go
about doing so.
In this use case, we intend to create an aggregation function.
:zeek:see:`Cluster::publish_hrw` appears to be the appropriate function, since
it allows offloading a lot of work to proxies, thus leaving workers and manager
to process traffic.
In order to make sure all the connections between two hosts go to a single
specific proxy, we need to make sure the key for the hashing function
accommodates this constraint. We will use ``orig_h+resp_h`` as the key. We
create a new data-type called ``pair`` as seen in code below. This allows us
to use the ``orig+resp`` as a unique key across the code, including in the
candidate table. Further, we create a new data type called ``stats`` to keep
track of additional data associated with a connection pair.
.. code-block:: zeek
module DoS;
export {
redef enum Notice::Type += {
Threshold,
Victim_3389UDP,
};
type pair: record {
orig: addr;
resp: addr;
};
type stats: record {
orig: addr;
resp: addr ;
orig_bytes: count &default=0;
resp_bytes: count &default=0;
conns: count &default=0;
};
global dos_candidates: table [pair] of stats &create_expire=1 day;
global DoS::aggregate_stats:event(s: stats);
}
We choose the :zeek:see:`connection_state_remove` event as the primary event to
tap into. :zeek:see:`connection_state_remove` is generated when a connections
internal state is about to be removed from memory. It's appropriate for this
case, as all the information about the connection is now included in the
:zeek:see:`connection` record ``c``. One disadvantage of using
:zeek:see:`connection_state_remove` is that the event is fired at the very end
of the connection, after the expiration timeouts are over. Thus, there are
delays, and any operation which happens on the data is “after-the-fact” that
connection is over. While this could be a problem in approaches such as
proactive blocking and early detection heuristics, in this case of aggregation
it is not an issue.
The thing to pay attention to in the code snippet below is the
:zeek:see:`@if`-:zeek:see:`@else`-:zeek:see:`@endif` directives which
differentiate between clusterized and standalone operation of the script. With
the :zeek:see:`@if` construct, the specified expression must evaluate to type
bool. If the value is true, then the following script lines (up to the next
:zeek:see:`@else` or :zeek:see:`@endif`) are available to be executed. In this
case we check if :zeek:see:`Cluster::is_enabled`. If so, we call
:zeek:see:`Cluster::publish_hrw` along with the key (``hash_pair``) and the
aggregate function followed by parameters, which is the stats record in this
case. If the cluster isnt running that aggregate function, it is directly
called.
.. code-block:: zeek
event connection_state_remove(c: connection)
{
local service = c$id$resp_p;
local resp = c$id$resp_h;
if ( service != 3389/udp )
return;
if ( resp !in Site::local_nets )
return;
local s: stats;
s$orig = c$id$orig_h;
s$resp = c$id$resp_h;
s$orig_bytes = c$conn$orig_ip_bytes;
s$resp_bytes = c$conn$resp_ip_bytes;
local hash_pair: pair;
hash_pair$orig = c$id$orig_h;
hash_pair$resp = resp;
@if ( Cluster::is_enabled() )
Cluster::publish_hrw(Cluster::proxy_pool, hash_pair, DoS::aggregate_stats, s);
@else
event DoS::aggregate_stats(s);
@endif
}
Since ``hash_pair`` makes the key unique, irrespective of what worker this
specific connection has gone to, it will end up on a one specific proxy only.
.. code-block:: zeek
event DoS::aggregate_stats(s: stats)
{
local p: pair ;
p$orig = s$orig;
p$resp = s$resp ;
if ( p !in dos_candidates )
{
local tmp_s: stats;
tmp_s$orig = s$orig;
tmp_s$resp = s$resp;
tmp_s$orig_bytes = 0;
tmp_s$resp_bytes= 0;
tmp_s$conns = 0;
dos_candidates[p] = tmp_s;
}
dos_candidates[p]$conns += 1;
dos_candidates[p]$orig_bytes += s$orig_bytes;
dos_candidates[p]$resp_bytes += s$resp_bytes;
local n = dos_candidates[p]$conns;
local thresh = check_ip_threshold(dos_threshold, ip_pair_threshold_idx, p, n);
if ( thresh )
{
local msg = fmt("%s pair has reached %s threshold %s",
p, n, dos_candidates[p]);
NOTICE([$note=DoS::Threshold, $src=p$orig, $msg=msg]);
if ( dos_candidates[p]$resp_bytes > 0 )
NOTICE([$note=DoS::Victim, $src=p$orig, $msg=msg,
$identifier=cat(p$resp), $suppress_for=1 hrs]);
}
}

View file

@ -0,0 +1,356 @@
.. _framework-configuration:
=======================
Configuration Framework
=======================
Zeek includes a configuration framework that allows updating script options at
runtime. This functionality consists of an :zeek:see:`option` declaration in
the Zeek language, configuration files that enable changing the value of
options at runtime, option-change callbacks to process updates in your Zeek
scripts, a couple of script-level functions to manage config settings directly,
and a log file (:file:`config.log`) that contains information about every
option value change according to :zeek:see:`Config::Info`.
Introduction
============
The configuration framework provides an alternative to using Zeek script
constants to store various Zeek settings.
While traditional constants work well when a value is not expected to change at
runtime, they cannot be used for values that need to be modified occasionally.
While a :zeek:see:`redef` allows a re-definition of an already defined constant
in Zeek, these redefinitions can only be performed when Zeek first starts.
Afterwards, constants can no longer be modified.
However, it is clearly desirable to be able to change at runtime many of the
configuration options that Zeek offers. Restarting Zeek can be time-consuming
and causes it to lose all connection state and knowledge that it accumulated.
Zeeks configuration framework solves this problem.
Declaring Options
=================
The :zeek:see:`option` keyword allows variables to be declared as configuration
options:
.. code-block:: zeek
module Test;
export {
option my_networks: set[subnet] = {};
option enable_feature = F;
option hostname = "testsystem";
option timeout_after = 1min;
option my_ports: vector of port = {};
}
Options combine aspects of global variables and constants. Like global
variables, options cannot be declared inside a function, hook, or event
handler. Like constants, options must be initialized when declared (the type
can often be inferred from the initializer but may need to be specified when
ambiguous). The value of an option can change at runtime, but options cannot be
assigned a new value using normal assignments.
The initial value of an option can be redefined with a :zeek:see:`redef`
declaration just like for global variables and constants. However, there is no
need to specify the :zeek:see:`&redef` attribute in the declaration of an
option. For example, given the above option declarations, here are possible
redefs that work anyway:
.. code-block:: zeek
redef Test::enable_feature = T;
redef Test::my_networks += { 10.1.0.0/16, 10.2.0.0/16 };
Changing Options
================
The configuration framework facilitates reading in new option values from
external files at runtime. Configuration files contain a mapping between option
names and their values. Each line contains one option assignment, formatted as
follows::
[option name][tab/spaces][new value]
Lines starting with ``#`` are comments and ignored.
You register configuration files by adding them to
:zeek:see:`Config::config_files`, a set of filenames. Simply say something like
the following in :file:`local.zeek`:
.. code-block:: zeek
redef Config::config_files += { "/path/to/config.dat" };
Zeek will then monitor the specified file continuously for changes. For
example, editing a line containing::
Test::enable_feature T
to the config file while Zeek is running will cause it to automatically update
the options value in the scripting layer. The next time your code accesses the
option, it will see the new value.
.. note::
The config framework is clusterized. In a cluster configuration, only the
manager node watches the specified configuration files, and relays option
updates across the cluster.
Config File Formatting
----------------------
The formatting of config option values in the config file is not the same as in
Zeeks scripting language. Keep an eye on the :file:`reporter.log` for warnings
from the config reader in case of incorrectly formatted values, which itll
generally ignore when encountered. The following table summarizes supported
types and their value representations:
.. list-table::
:header-rows: 1
* - Data Type
- Sample Config File Entry
- Comments
* - :zeek:see:`addr`
- ``1.2.3.4``
- Plain IPv4 or IPv6 address, as in Zeek. No ``/32`` or similar netmasks.
* - :zeek:see:`bool`
- ``T``
- ``T`` or ``1`` for true, ``F`` or ``0`` for false
* - :zeek:see:`count`
- ``42``
- Plain, nonnegative integer.
* - :zeek:see:`double`
- ``-42.5``
- Plain double number.
* - :zeek:see:`enum`
- ``Enum::FOO_A``
- Plain enum string.
* - :zeek:see:`int`
- ``-1``
- Plain integer.
* - :zeek:see:`interval`
- ``3600.0``
- Always in epoch seconds, with optional fraction of seconds. Never
includes a time unit.
* - :zeek:see:`pattern`
- ``/(foo|bar)/``
- The regex pattern, within forward-slash characters.
* - :zeek:see:`port`
- ``42/tcp``
- Port number with protocol, as in Zeek. When the protocol part is missing,
Zeek interprets it as ``/unknown``.
* - :zeek:see:`set`
- ``80/tcp,53/udp``
- The set members, formatted as per their own type, separated by commas.
For an empty set, use an empty string: just follow the option name with
whitespace.
Sets with multiple index types (e.g. ``set[addr,string]``) are currently
not supported in config files.
* - :zeek:see:`string`
- ``Dont bite, Zeek``
- Plain string, no quotation marks. Given quotation marks become part of
the string. Everything after the whitespace separator delineating the
option name becomes the string. Saces and special characters are fine.
Backslash characters (e.g. ``\n``) have no special meaning.
* - :zeek:see:`subnet`
- ``1.2.3.4/16``
- Plain subnet, as in Zeek.
* - :zeek:see:`time`
- ``1608164505.5``
- Always in epoch seconds, with optional fraction of seconds. Never
includes a time unit.
* - :zeek:see:`vector`
- ``1,2,3,4``
- The set members, formatted as per their own type, separated by commas.
For an empty vector, use an empty string: just follow the option name
with whitespace.
This leaves a few data types unsupported, notably tables and records. If you
require these, build up an instance of the corresponding type manually (perhaps
from a separate input framework file) and then call
:zeek:see:`Config::set_value` to update the option:
.. code-block:: zeek
module Test;
export {
option host_port: table[addr] of port = {};
}
event zeek_init() {
local t: table[addr] of port = { [10.0.0.2] = 123/tcp };
Config::set_value("Test::host_port", t);
}
Regardless of whether an option change is triggered by a config file or via
explicit :zeek:see:`Config::set_value` calls, Zeek always logs the change to
:file:`config.log`. A sample entry::
#fields ts id old_value new_value location
#types time string string string string
1608167352.498872 Test::a_count 42 3 config.txt
Mentioning options repeatedly in the config files leads to multiple update
events; the last entry “wins”. Mentioning options that do not correspond to
existing options in the script layer is safe, but triggers warnings in
:file:`reporter.log`::
warning: config.txt/Input::READER_CONFIG: Option 'an_unknown' does not exist. Ignoring line.
Internally, the framework uses the Zeek input framework to learn about config
changes. If you inspect the configuration framework scripts, you will notice
that the scripts simply catch input framework events and call
:zeek:see:`Config::set_value` to set the relevant option to the new value. If
you want to change an option in your scripts at runtime, you can likewise call
:zeek:see:`Config::set_value` directly from a script (in a cluster
configuration, this only needs to happen on the manager, as the change will be
automatically sent to all other nodes in the cluster).
.. note::
The input framework is usually very strict about the syntax of input files, but
that is not the case for configuration files. These require no header lines,
and both tabs and spaces are accepted as separators. A custom input reader,
specifically for reading config files, facilitates this.
.. tip::
The gory details of option-parsing reside in ``Ascii::ParseValue()`` in
:file:`src/threading/formatters/Ascii.cc` and ``Value::ValueToVal`` in
:file:`src/threading/SerialTypes.cc` in the Zeek core.
Change Handlers
===============
A change handler is a user-defined function that Zeek calls each time an option
value changes. This allows you to react programmatically to option changes. The
following example shows how to register a change handler for an option that has
a data type of :zeek:see:`addr` (for other data types, the return type and
second parameter data type must be adjusted accordingly):
.. code-block:: zeek
module Test;
export {
option testaddr = 127.0.0.1;
}
# Note: the data type of 2nd parameter and return type must match
function change_addr(id: string, new_value: addr): addr
{
print fmt("Value of %s changed from %s to %s", id, testaddr, new_value);
return new_value;
}
event zeek_init()
{
Option::set_change_handler("Test::testaddr", change_addr);
}
Immediately before Zeek changes the specified option value, it invokes any
registered change handlers. The value returned by the change handler is the
value Zeek assigns to the option. This allows, for example, checking of values
to reject invalid input (the original value can be returned to override the
change).
.. note::
:zeek:see:`Option::set_change_handler` expects the name of the option to
invoke the change handler for, not the option itself. Also, that name
includes the module name, even when registering from within the module.
It is possible to define multiple change handlers for a single option. In this
case, the change handlers are chained together: the value returned by the first
change handler is the “new value” seen by the next change handler, and so on.
The built-in function :zeek:see:`Option::set_change_handler` takes an optional
third argument that can specify a priority for the handlers.
A change handler function can optionally have a third argument of type string.
When a config file triggers a change, then the third argument is the pathname
of the config file. When the :zeek:see:`Config::set_value` function triggers a
change, then the third argument of the change handler is the value passed to
the optional third argument of the :zeek:see:`Config::set_value` function.
.. tip::
Change handlers are also used internally by the configuration framework. If
you look at the script-level source code of the config framework, you can see
that change handlers log the option changes to :file:`config.log`.
When Change Handlers Trigger
----------------------------
Change handlers often implement logic that manages additional internal state.
For example, depending on a performance toggle option, you might initialize or
clean up a caching structure. In such scenarios you need to know exactly when
and whether a handler gets invoked. The following hold:
* When no config files get registered in :zeek:see:`Config::config_files`,
change handlers do not run.
* When none of any registered config files exist on disk, change handlers do
not run.
That is, change handlers are tied to config files, and dont automatically run
with the options default values.
* When a config file exists on disk at Zeek startup, change handlers run with
the files config values.
* When the config file contains the same value the option already defaults to,
its change handlers are invoked anyway.
* :zeek:see:`zeek_init` handlers run before any change handlers — i.e., they
run with the options default values.
* Since the config framework relies on the input framework, the input
frameworks inherent asynchrony applies: you cant assume when exactly an
option change manifests in the code.
If your change handler needs to run consistently at startup and when options
change, you can call the handler manually from :zeek:see:`zeek_init` when you
register it. That way, initialization code always runs for the options default
value, and also for any new values.
.. code-block:: zeek
module Test;
export {
option use_cache = T;
}
function use_cache_hdlr(id: string, new_value: bool): bool
{
if ( new_value ) {
# Ensure caching structures are set up properly
}
return new_value;
}
event zeek_init()
{
use_cache_hdlr("Test::use_cache", use_cache);
Option::set_change_handler("Test::use_cache", use_cache_hdlr);
}

View file

@ -0,0 +1,3 @@
{"ip": "192.168.17.1", "timestamp": 1333252748, "reason": "Malware host"}
{"ip": "192.168.27.2", "timestamp": 1330235733, "reason": "Botnet server"}
{"ip": "192.168.250.3", "timestamp": 1333145108, "reason": "Virus detected"}

Some files were not shown because too many files have changed in this diff Show more