mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge 7e638a41f2
into 0700427bac
This commit is contained in:
commit
372b70a8ca
1077 changed files with 169219 additions and 29 deletions
24
.github/workflows/generate-docs.yml
vendored
24
.github/workflows/generate-docs.yml
vendored
|
@ -34,15 +34,6 @@ jobs:
|
|||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
# Only reset the submodule pointer for scheduled builds. The reason to do
|
||||
# this is to pick up any merge commits or anything that may have been
|
||||
# missed in a merge, but not have any actual content. We don't want to do
|
||||
# it otherwise because PRs should just use the submodule they're pointing
|
||||
# at.
|
||||
- name: Switch doc submodule to master
|
||||
if: github.event_name == 'schedule'
|
||||
run: cd doc && git checkout master
|
||||
|
||||
- name: Fetch Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
|
@ -119,9 +110,6 @@ jobs:
|
|||
|
||||
cd doc
|
||||
|
||||
echo "*** Running pre-commit ***"
|
||||
pre-commit run -a --show-diff-on-failure --color=always
|
||||
|
||||
echo "*** Generating Sphinx Docs ***"
|
||||
make > make.out 2>&1
|
||||
make_status=$?
|
||||
|
@ -132,7 +120,7 @@ jobs:
|
|||
grep -q WARNING make.out && exit 1
|
||||
rm make.out
|
||||
|
||||
- name: Push zeek-docs Changes
|
||||
- name: Push docs Changes
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
cd doc
|
||||
|
@ -142,16 +130,6 @@ jobs:
|
|||
# with a check that detects whether there's anything staged.
|
||||
git diff-index --cached --quiet HEAD || { git commit -m "Generate docs" && git push; }
|
||||
|
||||
- name: Update zeek-docs Submodule
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
git config --global user.name zeek-bot
|
||||
git config --global user.email info@zeek.org
|
||||
git add doc
|
||||
git status
|
||||
# Similar logic here: proceed only if there's a change in the submodule.
|
||||
git diff-index --cached --quiet HEAD || { git commit -m 'Update doc submodule [nomail] [skip ci]' && git push; }
|
||||
|
||||
- name: Send email
|
||||
# Only send notifications for scheduled runs. Runs from pull requests
|
||||
# show failures in the GitHub UI.
|
||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -3,6 +3,9 @@
|
|||
build*
|
||||
!ci/windows/build.cmd
|
||||
|
||||
# Don't ignore things in the docs directory
|
||||
!doc/**
|
||||
|
||||
tmp
|
||||
*.gcov
|
||||
|
||||
|
|
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -16,9 +16,6 @@
|
|||
[submodule "auxil/netcontrol-connectors"]
|
||||
path = auxil/netcontrol-connectors
|
||||
url = https://github.com/zeek/zeek-netcontrol
|
||||
[submodule "doc"]
|
||||
path = doc
|
||||
url = https://github.com/zeek/zeek-docs
|
||||
[submodule "auxil/paraglob"]
|
||||
path = auxil/paraglob
|
||||
url = https://github.com/zeek/paraglob
|
||||
|
|
|
@ -10,7 +10,7 @@ repos:
|
|||
language: python
|
||||
files: '\.(h|c|cpp|cc|spicy|evt)$'
|
||||
types: [file]
|
||||
exclude: '^(testing/btest/(Baseline|plugins|spicy|scripts)/.*|testing/builtin-plugins/.*|src/3rdparty/.*)$'
|
||||
exclude: '^(testing/btest/(Baseline|plugins|spicy|scripts)/.*|testing/builtin-plugins/.*|src/3rdparty/.*|doc/.*)$'
|
||||
|
||||
- id: btest-command-commented
|
||||
name: Check that all BTest command lines are commented out
|
||||
|
@ -56,4 +56,4 @@ repos:
|
|||
rev: v0.26.0
|
||||
hooks:
|
||||
- id: spicy-format
|
||||
exclude: '^testing/.*'
|
||||
exclude: '^(testing/.*|doc/devel/spicy/autogen/.*)'
|
||||
|
|
16
.readthedocs.yml
Normal file
16
.readthedocs.yml
Normal file
|
@ -0,0 +1,16 @@
|
|||
version: 2
|
||||
|
||||
formats:
|
||||
- htmlzip
|
||||
|
||||
build:
|
||||
os: ubuntu-24.04
|
||||
tools:
|
||||
python: "3.13"
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: doc/requirements.txt
|
||||
|
||||
sphinx:
|
||||
configuration: doc/conf.py
|
1
doc
1
doc
|
@ -1 +0,0 @@
|
|||
Subproject commit 2731def9159247e6da8a3191783c89683363689c
|
2
doc/.gitignore
vendored
Normal file
2
doc/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
build
|
||||
*.pyc
|
66
doc/.typos.toml
Normal file
66
doc/.typos.toml
Normal file
|
@ -0,0 +1,66 @@
|
|||
[default]
|
||||
extend-ignore-re = [
|
||||
# seh too close to she
|
||||
"registered SEH to support IDL",
|
||||
# ALLO is a valid FTP command
|
||||
"\"ALLO\".*[0-9]{3}",
|
||||
"des-ede3-cbc-Env-OID",
|
||||
# On purpose
|
||||
"\"THE NETBIOS NAM\"",
|
||||
# NFS stuff.
|
||||
"commited :zeek:type:`NFS3::stable_how_t`",
|
||||
"\\/fo\\(o",
|
||||
" nd\\.<br",
|
||||
"\"BaR\"",
|
||||
"Not-ECT",
|
||||
"Ninteenth: Ninteenth",
|
||||
|
||||
# Connecton and file UIDs
|
||||
"[CF][a-zA-Z0-9]{17}",
|
||||
|
||||
# Smoot
|
||||
"Smoot",
|
||||
|
||||
"SIEM",
|
||||
]
|
||||
|
||||
extend-ignore-identifiers-re = [
|
||||
"TLS_.*_EDE.*_.*",
|
||||
"SSL.*_EDE.*_.*",
|
||||
"_3DES_EDE_CBC_SHA",
|
||||
"GOST_R_.*",
|
||||
"icmp6_nd_.*",
|
||||
"pn", # Use for `PoolNode` variables
|
||||
"complte_flag", # Existing use in exported record in base.
|
||||
"VidP(n|N)", # In SMB.
|
||||
"iin", # In DNP3.
|
||||
"(ScValidatePnPService|ScSendPnPMessage)", # In DCE-RPC.
|
||||
"snet", # Used as shorthand for subnet in base scripts.
|
||||
"typ",
|
||||
"tpe",
|
||||
]
|
||||
|
||||
[default.extend-identifiers]
|
||||
MCA_OCCURED = "MCA_OCCURED"
|
||||
MNT3ERR_ACCES = "MNT3ERR_ACCES"
|
||||
ND_QUEUE_OVERFLOW = "ND_QUEUE_OVERFLOW"
|
||||
ND_REDIRECT = "ND_REDIRECT"
|
||||
NFS3ERR_ACCES = "NFS3ERR_ACCES"
|
||||
NO_SEH = "NO_SEH"
|
||||
RPC_NT_CALL_FAILED_DNE = "RPC_NT_CALL_FAILED_DNE"
|
||||
RpcAddPrintProvidor = "RpcAddPrintProvidor"
|
||||
RpcDeletePrintProvidor = "RpcDeletePrintProvidor"
|
||||
THA = "THA"
|
||||
tha = "tha"
|
||||
uses_seh = "uses_seh"
|
||||
exat = "exat"
|
||||
EXAT = "EXAT"
|
||||
tpe = "tpe"
|
||||
|
||||
[default.extend-words]
|
||||
caf = "caf"
|
||||
helo = "helo"
|
||||
# Seems we use this in the management framework
|
||||
requestor = "requestor"
|
||||
# `inout` is used as a keyword in Spicy, but looks like a typo of `input`.
|
||||
inout = "inout"
|
5
doc/LICENSE
Normal file
5
doc/LICENSE
Normal file
|
@ -0,0 +1,5 @@
|
|||
This work is licensed under the Creative Commons
|
||||
Attribution 4.0 International License. To view a copy of this
|
||||
license, visit https://creativecommons.org/licenses/by/4.0/ or send
|
||||
a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain
|
||||
View, California, 94041, USA.
|
37
doc/Makefile
Normal file
37
doc/Makefile
Normal file
|
@ -0,0 +1,37 @@
|
|||
SPHINXOPTS =
|
||||
|
||||
NUMJOBS ?= auto
|
||||
|
||||
all: html
|
||||
|
||||
doc: html
|
||||
|
||||
builddir:
|
||||
mkdir -p build/html
|
||||
|
||||
clean:
|
||||
rm -rf build/html
|
||||
|
||||
html: builddir
|
||||
sphinx-build -j $(NUMJOBS) -b html $(SPHINXOPTS) . ./build/html
|
||||
|
||||
livehtml: builddir
|
||||
sphinx-autobuild --ignore "*.git/*" --ignore "*.lock" --ignore "*.pyc" --ignore "*.swp" --ignore "*.swpx" --ignore "*.swx" -b html $(SPHINXOPTS) . ./build/html
|
||||
|
||||
commit:
|
||||
git add * && git commit -m 'Update generated docs'
|
||||
|
||||
spicy-%:
|
||||
git clone https://github.com/zeek/$@
|
||||
|
||||
check-spicy-docs: spicy-tftp
|
||||
@echo Refreshing checkouts
|
||||
@for REPO in $^; do (cd $$REPO && git pull && git reset HEAD --hard)>/dev/null; done
|
||||
@
|
||||
@echo Checking whether docs for Spicy integration are up-to-date
|
||||
@./devel/spicy/autogen-spicy-docs spicy-tftp
|
||||
@
|
||||
@git diff --quiet devel/spicy/autogen/ \
|
||||
|| (echo "Spicy docs are not up-to-date, rerun './devel/spicy/autogen-spicy-docs'." && exit 1)
|
||||
|
||||
.PHONY : all doc builddir clean html livehtml
|
132
doc/README
Normal file
132
doc/README
Normal file
|
@ -0,0 +1,132 @@
|
|||
.. _zeek-docs: https://github.com/zeek/zeek-docs
|
||||
.. _Read the Docs: https://docs.readthedocs.io/en/stable/index.html
|
||||
.. _Zeek repo: https://github.com/zeek/zeek
|
||||
.. _Sphinx: https://www.sphinx-doc.org/en/master
|
||||
.. _pip: https://pypi.org/project/pip
|
||||
|
||||
Zeek Documentation
|
||||
==================
|
||||
|
||||
The documentation repo at zeek-docs_
|
||||
contains version-specific Zeek documentation source files that are ultimately
|
||||
used as the basis for content hosted at https://docs.zeek.org.
|
||||
|
||||
Markup Format, Style, and Conventions
|
||||
-------------------------------------
|
||||
|
||||
For general guidance on the basics of how the documentation is written,
|
||||
consult this Zeek wiki:
|
||||
|
||||
https://github.com/zeek/zeek/wiki/Documentation-Style-and-Conventions
|
||||
|
||||
Source-Tree Organization
|
||||
------------------------
|
||||
|
||||
The zeek-docs_ repo containing this README file is the root of a Sphinx_ source
|
||||
tree and can be modified to add more documentation, style sheets, JavaScript,
|
||||
etc. The Sphinx config file is ``conf.py``. The typical way new documents get
|
||||
integrated is from them being referenced directly in ``index.rst`` or
|
||||
indirectly from something in the ``toctree`` (Table of Contents Tree) specified
|
||||
in that main index.
|
||||
|
||||
There is also a custom Sphinx domain implemented in ``ext/zeek.py`` which adds
|
||||
some reStructureText (reST) directives and roles that aid in generating useful
|
||||
index entries and cross-references. This primarily supports integration with
|
||||
the script-reference sections, some of which are auto-generated by Zeek's
|
||||
Doxygen-like feature, named "Zeekygen". The bulk of auto-generated content
|
||||
lives under the ``scripts/`` directory or has a file name starting with
|
||||
"autogenerated", so if you find yourself wanting to change those, you should
|
||||
actually look at at doing those changes within the `Zeek repo`_ itself rather
|
||||
than here, so see the next section for how Zeekygen docs can be (re)generated.
|
||||
|
||||
Generating Zeekygen Reference Docs
|
||||
----------------------------------
|
||||
|
||||
All Zeekygen-generated docs get committed into Git, so if you don't have to
|
||||
perform any changes on it and just want to preview what's already existing,
|
||||
you can skip down to the next :ref:`Local Previewing <local-doc-preview>` section.
|
||||
|
||||
The Zeekygen documentation-generation feature is a part of Zeek itself, so
|
||||
you'll want to obtain the `Zeek repo`_ from Git, read the :doc:`INSTALL
|
||||
</install>` file directions to install required dependencies, and build Zeek::
|
||||
|
||||
git clone --recursive https://github.com/zeek/zeek
|
||||
cd zeek
|
||||
# Read INSTALL file and get dependencies here
|
||||
./configure && make -j $(nproc)
|
||||
# Make desired edits to scripts/, src/, etc.
|
||||
./ci/update-zeekygen-docs.sh
|
||||
|
||||
The last command runs a script to generate documentation, which will end up in
|
||||
the ``doc/`` subdirectory. Note that ``doc/`` is just a Git submodule of this
|
||||
this zeek-docs_ repository, so you can run ``git status`` there to find exactly
|
||||
what changed.
|
||||
|
||||
Also note that the documentation-generation script is run automatically
|
||||
on a daily basis to incorporate up any documentation changes that people make
|
||||
in Zeek itself without them having to necessarily be aware of the full
|
||||
documentation process. The GitHub Action that does that daily task is
|
||||
located in the Zeek repo's ``.github/workflows/generate-docs.yml`` file.
|
||||
|
||||
.. _local-doc-preview:
|
||||
|
||||
Local Previewing (How To Build)
|
||||
-------------------------------
|
||||
|
||||
First make sure you have the required dependencies used for building docs:
|
||||
|
||||
* Python interpreter >= 3.9
|
||||
* Sphinx: https://www.sphinx-doc.org/en/master/
|
||||
* Read the Docs Sphinx Theme: https://github.com/rtfd/sphinx_rtd_theme
|
||||
* GitPython: https://github.com/gitpython-developers/GitPython
|
||||
|
||||
If you have pip_, you may just use the command ``pip3 install -r
|
||||
requirements.txt`` to install all the dependencies using the
|
||||
``requirements.txt`` from zeek-docs_.
|
||||
|
||||
Now run ``make`` within the zeek-docs_ repository's top-level to locally render
|
||||
its reST files into HTML. After the build completes, HTML documentation is
|
||||
symlinked in ``build/html`` and you can open the ``index.html`` found there in
|
||||
your web browser.
|
||||
|
||||
There's also a ``make livehtml`` (requires ``pip3 install sphinx-autobuild``)
|
||||
target in the top-level Makefile that is useful for editing the reST files and
|
||||
seeing changes rendered out live to a separate browser.
|
||||
|
||||
Hosting
|
||||
-------
|
||||
|
||||
Documentation is hosted by `Read the Docs`_ (RTD), so you can generally read
|
||||
about how it works there. The web-interface is accessible via
|
||||
https://readthedocs.org/projects/zeek-docs.
|
||||
|
||||
How zeek-docs_ is configured to use RTD is a combination of some custom
|
||||
settings in its ``.readthedocs.yml`` file and others only accessible through
|
||||
RTD's web-interface (e.g. domain and subproject settings). Most config
|
||||
settings are likely understandable just by browsing the web-interface and
|
||||
RTD's guides, but a few particular points to mention:
|
||||
|
||||
* There is an associated, always-failing project at
|
||||
https://readthedocs.org/projects/zeek. It's always-failing because
|
||||
RTD redirects only activate when pages 404 and this project exists so that
|
||||
all attempts to use https://zeek.rtfd.io or https://zeek.readthedocs.io
|
||||
get redirected to https://docs.zeek.org. Those would have been the project
|
||||
URLs if ownership of the RTD 'zeek' project was had from the start, but
|
||||
it was only obtained later, after documentation already started development
|
||||
in the 'zeek-docs' RTD project slug.
|
||||
|
||||
* Over time, page redirects have accrued into ``redirects.yml`` as a way to
|
||||
help document what they are and why they happened and also as a potential
|
||||
way to automate addition/reinstantiation of a large number of redirects,
|
||||
but typically redirects can be manually added via the RTD web interface
|
||||
first and then noted in ``redirects.yml``
|
||||
|
||||
* There are RTD subprojects for things like Broker, Package Manager,
|
||||
and Spicy. The use of subprojects simply allows access to their RTD
|
||||
docs via the custom domain of https://docs.zeek.org
|
||||
|
||||
* RTD will auto-build any newly-pushed commits to zeek-docs_ (i.e. a webhook is
|
||||
configured), but if a tag is changed to point somewhere different, you'll
|
||||
typically have to go into the RTD web interface, "Edit" the associated
|
||||
version under "Versions", "wipe" the existing docs, and then manually trigger
|
||||
a rebuild of that version tag under "Builds".
|
132
doc/README.rst
Normal file
132
doc/README.rst
Normal file
|
@ -0,0 +1,132 @@
|
|||
.. _zeek-docs: https://github.com/zeek/zeek-docs
|
||||
.. _Read the Docs: https://docs.readthedocs.io/en/stable/index.html
|
||||
.. _Zeek repo: https://github.com/zeek/zeek
|
||||
.. _Sphinx: https://www.sphinx-doc.org/en/master
|
||||
.. _pip: https://pypi.org/project/pip
|
||||
|
||||
Zeek Documentation
|
||||
==================
|
||||
|
||||
The documentation repo at zeek-docs_
|
||||
contains version-specific Zeek documentation source files that are ultimately
|
||||
used as the basis for content hosted at https://docs.zeek.org.
|
||||
|
||||
Markup Format, Style, and Conventions
|
||||
-------------------------------------
|
||||
|
||||
For general guidance on the basics of how the documentation is written,
|
||||
consult this Zeek wiki:
|
||||
|
||||
https://github.com/zeek/zeek/wiki/Documentation-Style-and-Conventions
|
||||
|
||||
Source-Tree Organization
|
||||
------------------------
|
||||
|
||||
The zeek-docs_ repo containing this README file is the root of a Sphinx_ source
|
||||
tree and can be modified to add more documentation, style sheets, JavaScript,
|
||||
etc. The Sphinx config file is ``conf.py``. The typical way new documents get
|
||||
integrated is from them being referenced directly in ``index.rst`` or
|
||||
indirectly from something in the ``toctree`` (Table of Contents Tree) specified
|
||||
in that main index.
|
||||
|
||||
There is also a custom Sphinx domain implemented in ``ext/zeek.py`` which adds
|
||||
some reStructureText (reST) directives and roles that aid in generating useful
|
||||
index entries and cross-references. This primarily supports integration with
|
||||
the script-reference sections, some of which are auto-generated by Zeek's
|
||||
Doxygen-like feature, named "Zeekygen". The bulk of auto-generated content
|
||||
lives under the ``scripts/`` directory or has a file name starting with
|
||||
"autogenerated", so if you find yourself wanting to change those, you should
|
||||
actually look at at doing those changes within the `Zeek repo`_ itself rather
|
||||
than here, so see the next section for how Zeekygen docs can be (re)generated.
|
||||
|
||||
Generating Zeekygen Reference Docs
|
||||
----------------------------------
|
||||
|
||||
All Zeekygen-generated docs get committed into Git, so if you don't have to
|
||||
perform any changes on it and just want to preview what's already existing,
|
||||
you can skip down to the next :ref:`Local Previewing <local-doc-preview>` section.
|
||||
|
||||
The Zeekygen documentation-generation feature is a part of Zeek itself, so
|
||||
you'll want to obtain the `Zeek repo`_ from Git, read the :doc:`INSTALL
|
||||
</install>` file directions to install required dependencies, and build Zeek::
|
||||
|
||||
git clone --recursive https://github.com/zeek/zeek
|
||||
cd zeek
|
||||
# Read INSTALL file and get dependencies here
|
||||
./configure && make -j $(nproc)
|
||||
# Make desired edits to scripts/, src/, etc.
|
||||
./ci/update-zeekygen-docs.sh
|
||||
|
||||
The last command runs a script to generate documentation, which will end up in
|
||||
the ``doc/`` subdirectory. Note that ``doc/`` is just a Git submodule of this
|
||||
this zeek-docs_ repository, so you can run ``git status`` there to find exactly
|
||||
what changed.
|
||||
|
||||
Also note that the documentation-generation script is run automatically
|
||||
on a daily basis to incorporate up any documentation changes that people make
|
||||
in Zeek itself without them having to necessarily be aware of the full
|
||||
documentation process. The GitHub Action that does that daily task is
|
||||
located in the Zeek repo's ``.github/workflows/generate-docs.yml`` file.
|
||||
|
||||
.. _local-doc-preview:
|
||||
|
||||
Local Previewing (How To Build)
|
||||
-------------------------------
|
||||
|
||||
First make sure you have the required dependencies used for building docs:
|
||||
|
||||
* Python interpreter >= 3.9
|
||||
* Sphinx: https://www.sphinx-doc.org/en/master/
|
||||
* Read the Docs Sphinx Theme: https://github.com/rtfd/sphinx_rtd_theme
|
||||
* GitPython: https://github.com/gitpython-developers/GitPython
|
||||
|
||||
If you have pip_, you may just use the command ``pip3 install -r
|
||||
requirements.txt`` to install all the dependencies using the
|
||||
``requirements.txt`` from zeek-docs_.
|
||||
|
||||
Now run ``make`` within the zeek-docs_ repository's top-level to locally render
|
||||
its reST files into HTML. After the build completes, HTML documentation is
|
||||
symlinked in ``build/html`` and you can open the ``index.html`` found there in
|
||||
your web browser.
|
||||
|
||||
There's also a ``make livehtml`` (requires ``pip3 install sphinx-autobuild``)
|
||||
target in the top-level Makefile that is useful for editing the reST files and
|
||||
seeing changes rendered out live to a separate browser.
|
||||
|
||||
Hosting
|
||||
-------
|
||||
|
||||
Documentation is hosted by `Read the Docs`_ (RTD), so you can generally read
|
||||
about how it works there. The web-interface is accessible via
|
||||
https://readthedocs.org/projects/zeek-docs.
|
||||
|
||||
How zeek-docs_ is configured to use RTD is a combination of some custom
|
||||
settings in its ``.readthedocs.yml`` file and others only accessible through
|
||||
RTD's web-interface (e.g. domain and subproject settings). Most config
|
||||
settings are likely understandable just by browsing the web-interface and
|
||||
RTD's guides, but a few particular points to mention:
|
||||
|
||||
* There is an associated, always-failing project at
|
||||
https://readthedocs.org/projects/zeek. It's always-failing because
|
||||
RTD redirects only activate when pages 404 and this project exists so that
|
||||
all attempts to use https://zeek.rtfd.io or https://zeek.readthedocs.io
|
||||
get redirected to https://docs.zeek.org. Those would have been the project
|
||||
URLs if ownership of the RTD 'zeek' project was had from the start, but
|
||||
it was only obtained later, after documentation already started development
|
||||
in the 'zeek-docs' RTD project slug.
|
||||
|
||||
* Over time, page redirects have accrued into ``redirects.yml`` as a way to
|
||||
help document what they are and why they happened and also as a potential
|
||||
way to automate addition/reinstantiation of a large number of redirects,
|
||||
but typically redirects can be manually added via the RTD web interface
|
||||
first and then noted in ``redirects.yml``
|
||||
|
||||
* There are RTD subprojects for things like Broker, Package Manager,
|
||||
and Spicy. The use of subprojects simply allows access to their RTD
|
||||
docs via the custom domain of https://docs.zeek.org
|
||||
|
||||
* RTD will auto-build any newly-pushed commits to zeek-docs_ (i.e. a webhook is
|
||||
configured), but if a tag is changed to point somewhere different, you'll
|
||||
typically have to go into the RTD web interface, "Edit" the associated
|
||||
version under "Versions", "wipe" the existing docs, and then manually trigger
|
||||
a rebuild of that version tag under "Builds".
|
32
doc/_static/theme_overrides.css
vendored
Normal file
32
doc/_static/theme_overrides.css
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* override table width restrictions */
|
||||
@media screen and (min-width: 767px) {
|
||||
|
||||
.wy-table-responsive table td {
|
||||
/* !important prevents the common CSS stylesheets from overriding
|
||||
this as on RTD they are loaded after this stylesheet */
|
||||
white-space: normal !important;
|
||||
}
|
||||
|
||||
.wy-table-responsive {
|
||||
overflow: visible !important;
|
||||
}
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
color: #294488;
|
||||
font-family: 'Open Sans',Helvetica,Arial,Lucida,sans-serif!important;
|
||||
}
|
||||
|
||||
a {
|
||||
color: #2ea3f2;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: "Open Sans",Arial,sans-serif;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
div.highlight pre strong {
|
||||
font-weight: 800;
|
||||
background-color: #ffffcc;
|
||||
}
|
15
doc/_templates/breadcrumbs.html
vendored
Normal file
15
doc/_templates/breadcrumbs.html
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
{% extends "!breadcrumbs.html" %}
|
||||
|
||||
{% block breadcrumbs_aside %}
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
{% if pagename != "search" %}
|
||||
{% if display_github %}
|
||||
{% if github_version == "master" %}
|
||||
<a href="https://{{ github_host|default("github.com") }}/{{ github_user }}/{{ github_repo }}/edit/{{ github_version }}{{ conf_py_path }}{{ pagename }}{{ page_source_suffix }}" class="fa fa-github"> {{ _('Edit on GitHub') }}</a>
|
||||
{% endif %}
|
||||
{% elif show_source and has_source and sourcename %}
|
||||
<a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow"> {{ _('View page source') }}</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endblock %}
|
14
doc/_templates/layout.html
vendored
Normal file
14
doc/_templates/layout.html
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
{% extends "!layout.html" %}
|
||||
|
||||
{% if READTHEDOCS and current_version %}
|
||||
{% if current_version == "latest" or current_version == "stable"
|
||||
or current_version == "master" or current_version == "current"
|
||||
or current_version == "lts" or current_version == "LTS" %}
|
||||
{% set current_version = current_version ~ " (" ~ version ~ ")" %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% block menu %}
|
||||
{{ super() }}
|
||||
<a href="{{pathto('genindex.html', 1)}}">Index</a>
|
||||
{% endblock %}
|
256
doc/about.rst
Normal file
256
doc/about.rst
Normal file
|
@ -0,0 +1,256 @@
|
|||
==========
|
||||
About Zeek
|
||||
==========
|
||||
|
||||
What Is Zeek?
|
||||
=============
|
||||
|
||||
Zeek is a passive, open-source network traffic analyzer. Many operators use
|
||||
Zeek as a network security monitor (NSM) to support investigations of
|
||||
suspicious or malicious activity. Zeek also supports a wide range of traffic
|
||||
analysis tasks beyond the security domain, including performance measurement
|
||||
and troubleshooting.
|
||||
|
||||
The first benefit a new user derives from Zeek is the extensive set of logs
|
||||
describing network activity. These logs include not only a comprehensive record
|
||||
of every connection seen on the wire, but also application-layer transcripts.
|
||||
These include all HTTP sessions with their requested URIs, key headers, MIME
|
||||
types, and server responses; DNS requests with replies; SSL certificates; key
|
||||
content of SMTP sessions; and much more. By default, Zeek writes all this
|
||||
information into well-structured tab-separated or JSON log files suitable for
|
||||
post-processing with external software. Users can also choose to have external
|
||||
databases or SIEM products consume, store, process, and present the data for
|
||||
querying.
|
||||
|
||||
In addition to the logs, Zeek comes with built-in functionality for a range of
|
||||
analysis and detection tasks, including extracting files from HTTP sessions,
|
||||
detecting malware by interfacing to external registries, reporting vulnerable
|
||||
versions of software seen on the network, identifying popular web applications,
|
||||
detecting SSH brute-forcing, validating SSL certificate chains, and much more.
|
||||
|
||||
In addition to shipping such powerful functionality “out of the box,” Zeek is a
|
||||
fully customizable and extensible platform for traffic analysis. Zeek provides
|
||||
users a domain-specific, Turing-complete scripting language for expressing
|
||||
arbitrary analysis tasks. Think of the Zeek language as a “domain-specific
|
||||
Python” (or Perl): just like Python, the system comes with a large set of
|
||||
pre-built functionality (the “standard library”), yet users can also put Zeek
|
||||
to use in novel ways by writing custom code. Indeed, all of Zeek’s default
|
||||
analyses, including logging, are done via scripts; no specific analysis is
|
||||
hard-coded into the core of the system.
|
||||
|
||||
Zeek runs on commodity hardware and hence provides a low-cost alternative to
|
||||
expensive proprietary solutions. In many ways Zeek exceeds the capabilities of
|
||||
other network monitoring tools, which typically remain limited to a small set
|
||||
of hard-coded analysis tasks. Zeek is not a classic signature-based intrusion
|
||||
detection system (IDS); while it supports such standard functionality as well,
|
||||
Zeek’s scripting language facilitates a much broader spectrum of very different
|
||||
approaches to finding malicious activity. These include semantic misuse
|
||||
detection, anomaly detection, and behavioral analysis.
|
||||
|
||||
A large variety of sites deploy Zeek to protect their infrastructure, including
|
||||
many universities, research labs, supercomputing centers, open-science
|
||||
communities, major corporations, and government agencies. Zeek specifically
|
||||
targets high-speed, high-volume network monitoring, and an increasing number of
|
||||
sites are now using the system to monitor their 10GE networks, with some
|
||||
already moving on to 100GE links.
|
||||
|
||||
Zeek accommodates high-performance settings by supporting scalable
|
||||
load-balancing. Large sites typically run “Zeek Clusters” in which a high-speed
|
||||
front end load balancer distributes the traffic across an appropriate number of
|
||||
back end PCs, all running dedicated Zeek instances on their individual traffic
|
||||
slices. A central manager system coordinates the process, synchronizing state
|
||||
across the back ends and providing the operators with a central management
|
||||
interface for configuration and access to aggregated logs. Zeek’s integrated
|
||||
management framework, ZeekControl, supports such cluster setups out-of-the-box.
|
||||
|
||||
Zeek’s cluster features support single-system and multi-system setups. That's
|
||||
part of Zeek’s scalability advantages. For example, administrators can scale
|
||||
Zeek within one system for as long as possible, and then transparently add more
|
||||
systems when necessary.
|
||||
|
||||
In brief, Zeek is optimized for interpreting network traffic and generating
|
||||
logs based on that traffic. It is not optimized for byte matching, and users
|
||||
seeking signature detection approaches would be better served by trying
|
||||
intrusion detection systems such as Suricata. Zeek is also not a protocol
|
||||
analyzer in the sense of Wireshark, seeking to depict every element of network
|
||||
traffic at the frame level, or a system for storing traffic in packet capture
|
||||
(PCAP) form. Rather, Zeek sits at the “happy medium” representing compact yet
|
||||
high fidelity network logs, generating better understanding of network traffic
|
||||
and usage.
|
||||
|
||||
Why Zeek?
|
||||
=========
|
||||
|
||||
Zeek offers many advantages for security and network teams who want to better
|
||||
understand how their infrastructure is being used.
|
||||
|
||||
Security teams generally depend upon four sorts of data sources when trying to
|
||||
detect and respond to suspicious and malicious activity. These include *third
|
||||
party* sources such as law enforcement, peers, and commercial or nonprofit
|
||||
threat intelligence organizations; *network data*; *infrastructure and
|
||||
application data*, including logs from cloud environments; and *endpoint data*.
|
||||
Zeek is primarily a platform for collecting and analyzing the second form of
|
||||
data -- network data. All four are important elements of any security team’s
|
||||
program, however.
|
||||
|
||||
When looking at data derived from the network, there are four types of data
|
||||
available to analysts. As defined by the `network security monitoring paradigm
|
||||
<https://corelight.blog/2019/04/30/do-you-know-your-nsm-data-types/>`_, these
|
||||
four data types are *full content*, *transaction data*, *extracted content*,
|
||||
and *alert data*. Using these data types, one can record traffic, summarize
|
||||
traffic, extract traffic (or perhaps more accurately, extract content
|
||||
in the form of files), and judge traffic, respectively.
|
||||
|
||||
It’s critical to collect and analyze the four types of network security
|
||||
monitoring data. The question becomes one of determining the best way to
|
||||
accomplish this goal. Thankfully, Zeek as a NSM platform enables collection of
|
||||
at least two, and in some ways three, of these data forms, namely transaction
|
||||
data, extracted content, and alert data.
|
||||
|
||||
Zeek is best known for its transaction data. By default, when run and told to
|
||||
watch a network interface, Zeek will generate a collection of compact,
|
||||
high-fidelity, richly-annotated set of transaction logs. These logs describe
|
||||
the protocols and activity seen on the wire, in a judgement-free,
|
||||
policy-neutral manner. This documentation will spend a considerable amount of
|
||||
time describing the most common Zeek log files such that readers will become
|
||||
comfortable with the format and learn to apply them to their environment.
|
||||
|
||||
Zeek can also easily carve files from network traffic, thanks to its file
|
||||
extraction capabilities. Analysts can then send those files to execution
|
||||
sandboxes or other file examination tools for additional investigation. Zeek
|
||||
has some capability to perform classical byte-centric intrusion detection, but
|
||||
that job is best suited for packages like the open source Snort or Suricata
|
||||
engines. Zeek has other capabilities however that are capable of providing
|
||||
judgements in the form of alerts, through its notice mechanism.
|
||||
|
||||
Zeek is not optimized for writing traffic to disk in the spirit of a full
|
||||
content data collection, and that task is best handled by software written to
|
||||
fulfill that requirement.
|
||||
|
||||
Beyond the forms of network data that Zeek can natively collect and generate,
|
||||
Zeek has advantages that appeared in the `What Is Zeek?`_ section. These
|
||||
include its built-in functionality for a range of analysis and detection
|
||||
tasks, and its status as a fully customizable and extensible platform for
|
||||
traffic analysis. Zeek is also attractive because of its ability to run on
|
||||
commodity hardware, giving users of all types the ability to at least try Zeek
|
||||
in a low-cost manner.
|
||||
|
||||
History
|
||||
=======
|
||||
|
||||
Zeek has a rich history stretching back to the 1990s. `Vern Paxson
|
||||
<http://www.icir.org/vern/>`_ designed and implemented the initial version in
|
||||
1995 as a researcher at the `Lawrence Berkeley National Laboratory (LBNL)
|
||||
<http://www.lbl.gov/>`_. The original software was called “Bro,” as an
|
||||
“Orwellian reminder that monitoring comes hand in hand with the potential
|
||||
for privacy violations”.
|
||||
|
||||
LBNL first deployed Zeek in 1996, and the USENIX Security Symposium published
|
||||
Vern’s original paper on Zeek in 1998, and awarded it the Best Paper Award that
|
||||
year He published a refined version of the paper in 1999 as `Bro: A System for
|
||||
Detecting Network Intruders in Real-Time
|
||||
<http://www.icir.org/vern/papers/bro-CN99.pdf>`_.
|
||||
|
||||
In 2003, the `National Science Foundation (NSF) <http://www.nsf.gov/>`_ began
|
||||
supporting research and advanced development on Bro at the `International
|
||||
Computer Science Institute (ICSI) <http://www.icsi.berkeley.edu/>`_. (Vern
|
||||
still leads the ICSI `Networking and Security group <http://www.icir.org/>`_.)
|
||||
|
||||
Over the years, a growing team of ICSI researchers and students kept adding
|
||||
novel functions to Zeek, while LBNL continued its support with funding from the
|
||||
`Department of Energy (DOE) <http://www.doe.gov/>`_. Much of Zeek’s
|
||||
capabilities originate in academic research projects, with results often
|
||||
published at top-tier conferences. A key to Zeek’s success was the project’s
|
||||
ability to bridge the gap between academia and operations. This relationship
|
||||
helped ground research on Zeek in real-world challenges.
|
||||
|
||||
With a growing operational user community, the research-centric development
|
||||
model eventually became a bottleneck to the system’s evolution. Research
|
||||
grants did not support the more mundane parts of software development and
|
||||
maintenance. However, those elements were crucial for the end-user experience.
|
||||
As a result, deploying Zeek required overcoming a steep learning curve.
|
||||
|
||||
In 2010, NSF sought to address this challenge by awarding ICSI a grant from its
|
||||
Software Development for Cyberinfrastructure fund. The `National Center for
|
||||
Supercomputing Applications (NCSA) <http://www.ncsa.illinois.edu/>`_ joined the
|
||||
team as a core partner, and the Zeek project began to overhaul many of the
|
||||
user-visible parts of the system for the 2.0 release in 2012.
|
||||
|
||||
After Zeek 2.0, the project enjoyed tremendous growth in new deployments across
|
||||
a diverse range of settings, and the ongoing collaboration between ICSI (co-PI
|
||||
Robin Sommer) and NCSA (co-PI Adam Slagell) brought a number of important
|
||||
features. In 2012, Zeek added native IPv6 support, long before many enterprise
|
||||
networking monitoring tools. In 2013, NSF renewed its support with a second
|
||||
grant that established the Bro Center of Expertise at ICSI and NCSA, promoting
|
||||
Zeek as a comprehensive, low-cost security capability for research and
|
||||
education communities. To facilitate both debugging and education,
|
||||
`try.zeek.org <https://try.zeek.org>`_ (formerly try.bro.org) was launched in
|
||||
2014. This provided an interactive way for users to test a script with their
|
||||
own packet captures against a variety of Zeek versions and easily share
|
||||
sample code with others. For Zeek clusters and external communication,
|
||||
the Broker communication framework was added. Last, but not least, the
|
||||
Zeek package manager was created in 2016, funded by an additional grant
|
||||
from the Mozilla Foundation.
|
||||
|
||||
In the fall of 2018, the project leadership team decided to change the name of
|
||||
the software from Bro to Zeek. The leadership team desired a name that better
|
||||
reflected the values of the community while avoiding the negative connotations
|
||||
of so-called “bro culture” outside the computing world. The project released
|
||||
version 3.0 in the fall of 2019, the first release bearing the name Zeek. The
|
||||
year 2020 saw a renewed focus on community and growing the Zeek community, with
|
||||
increased interaction via social media, webinars, Slack channels, and related
|
||||
outreach efforts.
|
||||
|
||||
For a history of the project from 1995 to 2015, see Vern Paxson’s talk from
|
||||
BroCon 2015, `Reflecting on Twenty Years of Bro
|
||||
<https://www.youtube.com/watch?v=pb9HlmV0s2A>`_.
|
||||
|
||||
For background on the decision to rename Bro to Zeek, see Vern Paxson’s talk
|
||||
from BroCon 2018, `Renaming Bro
|
||||
<https://www.youtube.com/watch?v=L88ZYfjPzyk>`_.
|
||||
|
||||
Architecture
|
||||
============
|
||||
|
||||
.. image:: /images/architecture.png
|
||||
:align: center
|
||||
:scale: 75%
|
||||
|
||||
At a very high level, Zeek is architecturally layered into two major
|
||||
components. Its *event engine* (or *core*) reduces the incoming packet stream
|
||||
into a series of higher-level *events*. These events reflect network activity
|
||||
in policy-neutral terms, i.e., they describe *what* has been seen, but not
|
||||
*why*, or whether it is significant.
|
||||
|
||||
For example, every HTTP request on the wire turns into a corresponding
|
||||
:zeek:see:`http_request` event that carries with it the involved IP addresses
|
||||
and ports, the URI being requested, and the HTTP version in use. The event
|
||||
however does not convey any further *interpretation*, such as whether that URI
|
||||
corresponds to a known malware site.
|
||||
|
||||
The event engine component comprises a number of subcomponents, including in
|
||||
particular the packet processing pipeline consisting of: input sources,
|
||||
packet analysis, session analysis, and file analysis. Input sources ingest
|
||||
incoming network traffic from network interfaces. Packet analysis processes
|
||||
lower-level protocols, starting all the way down at the link layer. Session
|
||||
analysis handles application-layer protocols, such as HTTP, FTP, etc. File
|
||||
analysis dissects the content of files transferred over sessions. The event
|
||||
engine provides a plugin architecture for adding any of these from outside
|
||||
of the core Zeek code base, allowing to expand Zeek’s capabilities as
|
||||
needed.
|
||||
|
||||
Semantics related to the events are derived by Zeek’s second main component,
|
||||
the *script interpreter*, which executes a set of *event handlers* written in
|
||||
Zeek’s custom scripting language. These scripts can express a site’s
|
||||
security policy, such as what actions to take when the monitor detects
|
||||
different types of activity.
|
||||
|
||||
More generally scripts can derive any desired properties and statistics from
|
||||
the input traffic. In fact, all of Zeek’s default output comes from scripts
|
||||
included in the distribution. Zeek’s language comes with extensive
|
||||
domain-specific types and support functionality. Crucially, Zeek’s language
|
||||
allows scripts to maintain state over time, enabling them to track and
|
||||
correlate the evolution of what they observe across connection and host
|
||||
boundaries. Zeek scripts can generate real-time alerts and also execute
|
||||
arbitrary external programs on demand. One might use this functionality to
|
||||
trigger an active response to an attack.
|
22
doc/acknowledgements.rst
Normal file
22
doc/acknowledgements.rst
Normal file
|
@ -0,0 +1,22 @@
|
|||
================
|
||||
Acknowledgements
|
||||
================
|
||||
|
||||
Thanks to everyone who contributed in making Zeek's documentation
|
||||
(alphabetically):
|
||||
|
||||
* Johanna Amann
|
||||
* Richard Bejtlich
|
||||
* Michael Dopheide
|
||||
* Amber Graner
|
||||
* Jan Grashöfer
|
||||
* Christian Kreibich
|
||||
* Terry Leach
|
||||
* Aashish Sharma
|
||||
* Jon Siwek
|
||||
* Stephen Smoot
|
||||
* Robin Sommer
|
||||
* Aaron Soto
|
||||
* Nick Turley
|
||||
* Fatema Bannat Wala
|
||||
* Tim Wojtulewicz
|
392
doc/building-from-source.rst
Normal file
392
doc/building-from-source.rst
Normal file
|
@ -0,0 +1,392 @@
|
|||
|
||||
.. _CMake: https://www.cmake.org
|
||||
.. _SWIG: https://www.swig.org
|
||||
.. _Xcode: https://developer.apple.com/xcode/
|
||||
.. _MacPorts: https://www.macports.org
|
||||
.. _Fink: https://www.finkproject.org
|
||||
.. _Homebrew: https://brew.sh
|
||||
.. _downloads page: https://zeek.org/get-zeek
|
||||
.. _devtoolset: https://developers.redhat.com/products/developertoolset/hello-world
|
||||
.. _zkg package manager: https://docs.zeek.org/projects/package-manager/en/stable/
|
||||
.. _crosstool-NG: https://crosstool-ng.github.io/
|
||||
.. _CMake toolchain: https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html
|
||||
.. _contribute: https://github.com/zeek/zeek/wiki/Contribution-Guide
|
||||
.. _Chocolatey: https://chocolatey.org
|
||||
.. _Npcap: https://npcap.com/
|
||||
|
||||
.. _building-from-source:
|
||||
|
||||
====================
|
||||
Building from Source
|
||||
====================
|
||||
|
||||
Building Zeek from source provides the most control over your build and is the
|
||||
preferred approach for advanced users. We support a wide range of operating
|
||||
systems and distributions. Our `support policy
|
||||
<https://github.com/zeek/zeek/wiki/Platform-Support-Policy>`_ is informed by
|
||||
what we can run in our CI pipelines with reasonable effort, with the current
|
||||
status captured in our `support matrix
|
||||
<https://github.com/zeek/zeek/wiki/Zeek-Operating-System-Support-Matrix>`_.
|
||||
|
||||
Required Dependencies
|
||||
---------------------
|
||||
|
||||
Building Zeek from source requires the following dependencies, including
|
||||
development headers for libraries:
|
||||
|
||||
* Bash (for ZeekControl and BTest)
|
||||
* BIND8 library or greater (if not covered by system's libresolv)
|
||||
* Bison 3.3 or greater (https://www.gnu.org/software/bison/)
|
||||
* C/C++ compiler with C++17 support (GCC 8+ or Clang 9+)
|
||||
* CMake 3.15 or greater (https://www.cmake.org)
|
||||
* Flex (lexical analyzer generator) 2.6 or greater (https://github.com/westes/flex)
|
||||
* Libpcap (https://www.tcpdump.org)
|
||||
* Make
|
||||
* OpenSSL (https://www.openssl.org)
|
||||
* Python 3.9 or greater (https://www.python.org/)
|
||||
* SWIG (https://www.swig.org)
|
||||
* ZeroMQ (https://zeromq.org)
|
||||
* Zlib (https://zlib.net/)
|
||||
|
||||
To install these, you can use:
|
||||
|
||||
* RPM/RedHat-based Linux:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo dnf install bison cmake cppzmq-devel gcc gcc-c++ flex libpcap-devel make openssl-devel python3 python3-devel swig zlib-devel
|
||||
|
||||
On pre-``dnf`` systems, use ``yum`` instead. Additionally, on RHEL/CentOS 7,
|
||||
you can install and activate a devtoolset_ to get access to recent GCC
|
||||
versions. You will also have to install and activate CMake 3. For example:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo yum install cmake3 devtoolset-7
|
||||
scl enable devtoolset-7 bash
|
||||
|
||||
* DEB/Debian-based Linux:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo apt-get install bison cmake cppzmq-dev gcc g++ flex libfl-dev libpcap-dev libssl-dev make python3 python3-dev swig zlib1g-dev
|
||||
|
||||
If your platform doesn't offer ``cppzmq-dev``, try ``libzmq3-dev``
|
||||
instead. Zeek's build will fall back to an in-tree version of C++
|
||||
bindings to ZeroMQ in that case.
|
||||
|
||||
* FreeBSD:
|
||||
|
||||
Most required dependencies should come with a minimal FreeBSD install
|
||||
except for the following.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo pkg install -y base64 bash bison cmake cppzmq git python3 swig
|
||||
pyver=`python3 -c 'import sys; print(f"py{sys.version_info[0]}{sys.version_info[1]}")'`
|
||||
sudo pkg install -y $pyver-sqlite3
|
||||
|
||||
* macOS:
|
||||
|
||||
Compiling source code on Macs requires first installing either Xcode_
|
||||
or the "Command Line Tools" (which is a much smaller download). To check
|
||||
if either is installed, run the ``xcode-select -p`` command. If you see
|
||||
an error message, then neither is installed and you can then run
|
||||
``xcode-select --install`` which will prompt you to either get Xcode (by
|
||||
clicking "Get Xcode") or to install the command line tools (by
|
||||
clicking "Install").
|
||||
|
||||
macOS comes with all required dependencies except for CMake_, SWIG_,
|
||||
Bison, Flex, and OpenSSL (OpenSSL headers were removed in macOS 10.11,
|
||||
therefore OpenSSL must be installed manually for macOS versions 10.11
|
||||
or newer).
|
||||
|
||||
Distributions of these dependencies can likely be obtained from your
|
||||
preferred macOS package management system (e.g. Homebrew_,
|
||||
MacPorts_, or Fink_). Specifically for Homebrew, the ``bison``, ``cmake``,
|
||||
``cppzmq``, ``flex``, ``swig``, and ``openssl`` packages
|
||||
provide the required dependencies. For MacPorts, use the ``bison``, ``cmake``,
|
||||
``cppzmq``, ``flex``, ``swig``, ``swig-python``, and ``openssl`` packages.
|
||||
|
||||
* Windows
|
||||
|
||||
Windows support is experimental. These instructions are meant as a starting
|
||||
point for development on that platform, and might have issues or be missing
|
||||
steps. Notify the Zeek team if any such problems arise.
|
||||
|
||||
Compiling on Windows requires the installation of a development environment.
|
||||
Zeek currently builds on Visual Studio 2019, and you can either install the
|
||||
full version including the UI tools or you can install the command-line tools
|
||||
and build from a shell. The instructions below describe how to install the
|
||||
command-line tools, but are not necessary if you install the full VS2019
|
||||
package. You will need to install Chocolatey_ in order to install the
|
||||
dependencies as instructed below. It's possible to install them from other
|
||||
sources (msys2, cygwin, etc), which we leave to the reader.
|
||||
|
||||
Cloning the repository will also require Developer Mode to be enabled in
|
||||
Windows. This is due to the existence of a number of symbolic links in the
|
||||
repository. Without Developer Mode, ``git`` on Windows will ignore these
|
||||
links and builds will fail. There are a couple of different ways to enable
|
||||
it, and the settings may differ depending on the version of Windows.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
choco install -y --no-progress visualstudio2019buildtools --version=16.11.11.0
|
||||
choco install -y --no-progress visualstudio2019-workload-vctools --version=1.0.0 --package-parameters '--add Microsoft.VisualStudio.Component.VC.ATLMFC'
|
||||
choco install -y --no-progress sed
|
||||
choco install -y --no-progress winflexbison3
|
||||
choco install -y --no-progress msysgit
|
||||
choco install -y --no-progress python
|
||||
choco install -y --no-progress openssl --version=3.1.1
|
||||
|
||||
Once the dependencies are installed, you will need to add the Git installation
|
||||
to your PATH (``C:\Program Files\Git\bin`` by default). This is needed for the
|
||||
``sh`` command to be available during the build. Once all of the dependencies
|
||||
are in place, you will need to open a shell (PowerShell or cmd) and add the
|
||||
development environment to it. The following command is for running on an
|
||||
x86_64 host.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat x86_amd64
|
||||
|
||||
Now you can build via cmake:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake.exe .. -DCMAKE_BUILD_TYPE=release -DENABLE_ZEEK_UNIT_TESTS=yes -DENABLE_CLUSTER_BACKEND_ZEROMQ=no -DVCPKG_TARGET_TRIPLET="x64-windows-static" -G Ninja
|
||||
cmake.exe --build .
|
||||
|
||||
All of this is duplicated in the CI configuration for Windows which lives in
|
||||
the ``ci/windows`` directory, and can be used as a reference for running the
|
||||
commands by hand.
|
||||
|
||||
Note: By default, Windows links against the standard libpcap library from
|
||||
vcpkg. This version of libpcap does not support packet capture on Windows,
|
||||
unlike other platforms. In order to capture packets from live interfaces on
|
||||
Windows, you will need to link against the Npcap_ library. This library is free
|
||||
for personal use, but requires a paid license for commercial use or
|
||||
redistribution. To link against Npcap, download the SDK from their website,
|
||||
unzip it, and then pass ``-DPCAP_ROOT_DIR="<path to npcap sdk>"`` to the
|
||||
initial CMake invocation for Zeek.
|
||||
|
||||
Note also that the ZeroMQ cluster backend is not yet supported on Windows.
|
||||
|
||||
Optional Dependencies
|
||||
---------------------
|
||||
|
||||
Zeek can make use of some optional libraries and tools if they are found at
|
||||
build time:
|
||||
|
||||
* libmaxminddb (for geolocating IP addresses)
|
||||
* sendmail (enables Zeek and ZeekControl to send mail)
|
||||
* curl (used by a Zeek script that implements active HTTP)
|
||||
* gperftools (tcmalloc is used to improve memory and CPU usage)
|
||||
* jemalloc (https://github.com/jemalloc/jemalloc)
|
||||
* PF_RING (Linux only, see :ref:`pf-ring-config`)
|
||||
* krb5 libraries and headers
|
||||
* ipsumdump (for trace-summary; https://github.com/kohler/ipsumdump)
|
||||
* hiredis (for the Redis storage backend)
|
||||
|
||||
Geolocation is probably the most interesting and can be installed on most
|
||||
platforms by following the instructions for :ref:`address geolocation and AS
|
||||
lookups <geolocation>`.
|
||||
|
||||
The `zkg package manager`_, included in the Zeek installation, requires
|
||||
two external Python modules:
|
||||
|
||||
* GitPython: https://pypi.org/project/GitPython/
|
||||
* semantic-version: https://pypi.org/project/semantic-version/
|
||||
|
||||
These install easily via pip (``pip3 install GitPython
|
||||
semantic-version``) and also ship with some distributions:
|
||||
|
||||
* RPM/RedHat-based Linux:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo yum install python3-GitPython python3-semantic_version
|
||||
|
||||
* DEB/Debian-based Linux:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo apt-get install python3-git python3-semantic-version
|
||||
|
||||
``zkg`` also requires a ``git`` installation, which the above system packages
|
||||
pull in as a dependency. If you install via pip, remember that you also need
|
||||
``git`` itself.
|
||||
|
||||
Retrieving the Sources
|
||||
----------------------
|
||||
|
||||
Zeek releases are bundled into source packages for convenience and are
|
||||
available on the `downloads page`_. The source code can be manually downloaded
|
||||
from the link in the ``.tar.gz`` format to the target system for installation.
|
||||
|
||||
If you plan to `contribute`_ to Zeek or just want to try out the latest
|
||||
features under development, you should obtain Zeek's source code through its
|
||||
Git repositories hosted at https://github.com/zeek:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
git clone --recurse-submodules https://github.com/zeek/zeek
|
||||
|
||||
.. note:: If you choose to clone the ``zeek`` repository
|
||||
non-recursively for a "minimal Zeek experience", be aware that
|
||||
compiling it depends on several of the other submodules as well, so
|
||||
you'll likely have to build/install those independently first.
|
||||
|
||||
Configuring and Building
|
||||
------------------------
|
||||
|
||||
The typical way to build and install from source is as follows:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
./configure
|
||||
make
|
||||
make install
|
||||
|
||||
If the ``configure`` script fails, then it is most likely because it either
|
||||
couldn't find a required dependency or it couldn't find a sufficiently new
|
||||
version of a dependency. Assuming that you already installed all required
|
||||
dependencies, then you may need to use one of the ``--with-*`` options
|
||||
that can be given to the ``configure`` script to help it locate a dependency.
|
||||
To find out what all different options ``./configure`` supports, run
|
||||
``./configure --help``.
|
||||
|
||||
The default installation path is ``/usr/local/zeek``, which would typically
|
||||
require root privileges when doing the ``make install``. A different
|
||||
installation path can be chosen by specifying the ``configure`` script
|
||||
``--prefix`` option. Note that ``/usr``, ``/opt/bro/``, and ``/opt/zeek`` are
|
||||
the standard prefixes for binary Zeek packages to be installed, so those are
|
||||
typically not good choices unless you are creating such a package.
|
||||
|
||||
OpenBSD users, please see our `FAQ <https://zeek.org/faq/>`_ if you are having
|
||||
problems installing Zeek.
|
||||
|
||||
Depending on the Zeek package you downloaded, there may be auxiliary
|
||||
tools and libraries available in the ``auxil/`` directory. Some of them
|
||||
will be automatically built and installed along with Zeek. There are
|
||||
``--disable-*`` options that can be given to the configure script to
|
||||
turn off unwanted auxiliary projects that would otherwise be installed
|
||||
automatically. Finally, use ``make install-aux`` to install some of
|
||||
the other programs that are in the ``auxil/zeek-aux`` directory.
|
||||
|
||||
Finally, if you want to build the Zeek documentation (not required, because
|
||||
all of the documentation for the latest Zeek release is available at
|
||||
https://docs.zeek.org), there are instructions in ``doc/README`` in the source
|
||||
distribution.
|
||||
|
||||
Cross Compiling
|
||||
---------------
|
||||
|
||||
Prerequisites
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
You need three things on the host system:
|
||||
|
||||
1. The Zeek source tree.
|
||||
2. A cross-compilation toolchain, such as one built via crosstool-NG_.
|
||||
3. Pre-built Zeek dependencies from the target system. This usually
|
||||
includes libpcap, zlib, OpenSSL, and Python development headers
|
||||
and libraries.
|
||||
|
||||
Configuration and Compiling
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
You first need to compile a few build tools native to the host system
|
||||
for use during the later cross-compile build. In the root of your
|
||||
Zeek source tree:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
./configure --builddir=../zeek-buildtools
|
||||
( cd ../zeek-buildtools && make binpac bifcl )
|
||||
|
||||
Next configure Zeek to use your cross-compilation toolchain (this example
|
||||
uses a Raspberry Pi as the target system):
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
./configure --toolchain=/home/jon/x-tools/RaspberryPi-toolchain.cmake --with-binpac=$(pwd)/../zeek-buildtools/auxil/binpac/src/binpac --with-bifcl=$(pwd)/../zeek-buildtools/src/bifcl
|
||||
|
||||
Here, the :file:`RaspberryPi-toolchain.cmake` file specifies a `CMake
|
||||
toolchain`_. In the toolchain file, you need to point the toolchain and
|
||||
compiler at the cross-compilation toolchain. It might look something the
|
||||
following:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
# Operating System on which CMake is targeting.
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
|
||||
# The CMAKE_STAGING_PREFIX option may not work.
|
||||
# Given that Zeek is configured:
|
||||
#
|
||||
# ``./configure --prefix=<dir>``
|
||||
#
|
||||
# The options are:
|
||||
#
|
||||
# (1) ``make install`` and then copy over the --prefix dir from host to
|
||||
# target system.
|
||||
#
|
||||
# (2) ``DESTDIR=<staging_dir> make install`` and then copy over the
|
||||
# contents of that staging directory.
|
||||
|
||||
set(toolchain /home/jon/x-tools/arm-rpi-linux-gnueabihf)
|
||||
set(CMAKE_C_COMPILER ${toolchain}/bin/arm-rpi-linux-gnueabihf-gcc)
|
||||
set(CMAKE_CXX_COMPILER ${toolchain}/bin/arm-rpi-linux-gnueabihf-g++)
|
||||
|
||||
# The cross-compiler/linker will use these paths to locate dependencies.
|
||||
set(CMAKE_FIND_ROOT_PATH
|
||||
/home/jon/x-tools/zeek-rpi-deps
|
||||
${toolchain}/arm-rpi-linux-gnueabihf/sysroot
|
||||
)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
|
||||
If that configuration succeeds you are ready to build:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
make
|
||||
|
||||
And if that works, install on your host system:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
make install
|
||||
|
||||
Once installed, you can copy/move the files from the installation prefix on the
|
||||
host system to the target system and start running Zeek as usual.
|
||||
|
||||
Configuring the Run-Time Environment
|
||||
====================================
|
||||
|
||||
You may want to adjust your :envvar:`PATH` environment variable
|
||||
according to the platform/shell/package you're using since
|
||||
neither :file:`/usr/local/zeek/bin/` nor :file:`/opt/zeek/bin/`
|
||||
will reside in the default :envvar:`PATH`. For example:
|
||||
|
||||
Bourne-Shell Syntax:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
export PATH=/usr/local/zeek/bin:$PATH
|
||||
|
||||
C-Shell Syntax:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
setenv PATH /usr/local/zeek/bin:$PATH
|
||||
|
||||
Or substitute ``/opt/zeek/bin`` instead if you installed from a binary package.
|
||||
|
||||
Zeek supports several environment variables to adjust its behavior. Take a look
|
||||
at the ``zeek --help`` output for details.
|
507
doc/cluster-setup.rst
Normal file
507
doc/cluster-setup.rst
Normal file
|
@ -0,0 +1,507 @@
|
|||
|
||||
.. _ZeekControl documentation: https://github.com/zeek/zeekctl
|
||||
|
||||
==================
|
||||
Zeek Cluster Setup
|
||||
==================
|
||||
|
||||
.. TODO: integrate BoZ revisions
|
||||
|
||||
A *Zeek Cluster* is a set of systems jointly analyzing the traffic of
|
||||
a network link in a coordinated fashion. You can operate such a setup from
|
||||
a central manager system easily using ZeekControl because it
|
||||
hides much of the complexity of the multi-machine installation.
|
||||
|
||||
Cluster Architecture
|
||||
====================
|
||||
|
||||
Zeek is not multithreaded, so once the limitations of a single processor core
|
||||
are reached the only option currently is to spread the workload across many
|
||||
cores, or even many physical computers. The cluster deployment scenario for
|
||||
Zeek is the current solution to build these larger systems. The tools and
|
||||
scripts that accompany Zeek provide the structure to easily manage many Zeek
|
||||
processes examining packets and doing correlation activities but acting as
|
||||
a singular, cohesive entity. This section describes the Zeek cluster
|
||||
architecture. For information on how to configure a Zeek cluster,
|
||||
see the documentation for `ZeekControl <https://github.com/zeek/zeekctl>`_.
|
||||
|
||||
Architecture
|
||||
------------
|
||||
|
||||
The figure below illustrates the main components of a Zeek cluster.
|
||||
|
||||
.. image:: /images/deployment.png
|
||||
|
||||
For more specific information on the way Zeek processes are connected,
|
||||
how they function, and how they communicate with each other, see the
|
||||
:ref:`Broker Framework Documentation <broker-framework>`.
|
||||
|
||||
Tap
|
||||
***
|
||||
The tap is a mechanism that splits the packet stream in order to make a copy
|
||||
available for inspection. Examples include the monitoring port on a switch
|
||||
and an optical splitter on fiber networks.
|
||||
|
||||
Frontend
|
||||
********
|
||||
The frontend is a discrete hardware device or on-host technique that splits
|
||||
traffic into many streams or flows. The Zeek binary does not do this job.
|
||||
There are numerous ways to accomplish this task, some of which are described
|
||||
below in `Frontend Options`_.
|
||||
|
||||
Manager
|
||||
*******
|
||||
The manager is a Zeek process that has two primary jobs. It receives log
|
||||
messages and notices from the rest of the nodes in the cluster using the Zeek
|
||||
communications protocol (note that if you use a separate logger node, then the
|
||||
logger receives all logs instead of the manager). The result
|
||||
is a single log instead of many discrete logs that you have to
|
||||
combine in some manner with post-processing.
|
||||
The manager also supports other functionality and analysis which
|
||||
requires a centralized, global view of events or data.
|
||||
|
||||
Logger
|
||||
******
|
||||
A logger is an optional Zeek process that receives log messages from the
|
||||
rest of the nodes in the cluster using the Zeek communications protocol.
|
||||
The purpose of having a logger receive logs instead of the manager is
|
||||
to reduce the load on the manager. If no logger is needed, then the
|
||||
manager will receive logs instead.
|
||||
|
||||
Proxy
|
||||
*****
|
||||
A proxy is a Zeek process that may be used to offload data storage or
|
||||
any arbitrary workload. A cluster may contain multiple proxy nodes.
|
||||
The default scripts that come with Zeek make minimal use of proxies, so
|
||||
a single one may be sufficient, but customized use of them to partition
|
||||
data or workloads provides greater cluster scalability potential than
|
||||
just doing similar tasks on a single, centralized Manager node.
|
||||
|
||||
Zeek processes acting as proxies don't tend to be extremely hard on CPU
|
||||
or memory and users frequently run proxy processes on the same physical
|
||||
host as the manager.
|
||||
|
||||
Worker
|
||||
******
|
||||
The worker is the Zeek process that sniffs network traffic and does protocol
|
||||
analysis on the reassembled traffic streams. Most of the work of an active
|
||||
cluster takes place on the workers and as such, the workers typically
|
||||
represent the bulk of the Zeek processes that are running in a cluster.
|
||||
The fastest memory and CPU core speed you can afford is recommended
|
||||
since all of the protocol parsing and most analysis will take place here.
|
||||
There are no particular requirements for the disks in workers since almost all
|
||||
logging is done remotely to the manager, and normally very little is written
|
||||
to disk.
|
||||
|
||||
Frontend Options
|
||||
----------------
|
||||
|
||||
There are many options for setting up a frontend flow distributor. In many
|
||||
cases it is beneficial to do multiple stages of flow distribution
|
||||
on the network and on the host.
|
||||
|
||||
Discrete hardware flow balancers
|
||||
********************************
|
||||
|
||||
cPacket
|
||||
^^^^^^^
|
||||
|
||||
If you are monitoring one or more 10G physical interfaces, the recommended
|
||||
solution is to use either a cFlow or cVu device from cPacket because they
|
||||
are used successfully at a number of sites. These devices will perform
|
||||
layer-2 load balancing by rewriting the destination Ethernet MAC address
|
||||
to cause each packet associated with a particular flow to have the same
|
||||
destination MAC. The packets can then be passed directly to a monitoring
|
||||
host where each worker has a BPF filter to limit its visibility to only that
|
||||
stream of flows, or onward to a commodity switch to split the traffic out to
|
||||
multiple 1G interfaces for the workers. This greatly reduces
|
||||
costs since workers can use relatively inexpensive 1G interfaces.
|
||||
|
||||
On host flow balancing
|
||||
**********************
|
||||
|
||||
PF_RING
|
||||
^^^^^^^
|
||||
|
||||
The PF_RING software for Linux has a "clustering" feature which will do
|
||||
flow-based load balancing across a number of processes that are sniffing the
|
||||
same interface. This allows you to easily take advantage of multiple
|
||||
cores in a single physical host because Zeek's main event loop is single
|
||||
threaded and can't natively utilize all of the cores. If you want to use
|
||||
PF_RING, see the documentation on :ref:`how to configure Zeek with PF_RING
|
||||
<pf-ring-config>`.
|
||||
|
||||
|
||||
AF_PACKET
|
||||
^^^^^^^^^
|
||||
|
||||
On Linux, Zeek supports `AF_PACKET sockets <https://docs.kernel.org/networking/packet_mmap.html>`_ natively.
|
||||
Currently, this is provided by including the `external Zeek::AF_Packet plugin <https://github.com/zeek/zeek-af_packet-plugin>`_
|
||||
in default builds of Zeek for Linux. Additional information can be found in
|
||||
the project's README file.
|
||||
|
||||
To check the availability of the ``af_packet`` packet source, print its information using ``zeek -N``::
|
||||
|
||||
zeek -N Zeek::AF_Packet
|
||||
Zeek::AF_Packet - Packet acquisition via AF_Packet (dynamic, version 3.2.0)
|
||||
|
||||
On FreeBSD, MacOSX, or if Zeek was built with ``--disable-af-packet``, the
|
||||
plugin won't be available.
|
||||
|
||||
Single worker mode
|
||||
""""""""""""""""""
|
||||
|
||||
For the most basic usage, prefix the interface with ``af_packet::`` when invoking Zeek::
|
||||
|
||||
zeek -i af_packet::eth0
|
||||
|
||||
Generally, running Zeek this way requires a privileged user with CAP_NET_RAW
|
||||
and CAP_NET_ADMIN capabilities. Linux supports file-based capabilities: A
|
||||
process executing an executable with capabilities will receive these.
|
||||
Using this mechanism allows to run Zeek as an unprivileged user once the file
|
||||
capabilities have been added::
|
||||
|
||||
sudo setcap cap_net_raw,cap_net_admin=+eip /path/to/zeek
|
||||
|
||||
Offloading and ethtool tuning
|
||||
"""""""""""""""""""""""""""""
|
||||
|
||||
While not specific to AF_PACKET, it is recommended to disable any offloading
|
||||
features provided by the network card or Linux networking stack when running
|
||||
Zeek. This allows to see network packets as they arrive on the wire.
|
||||
See this `blog post <https://blog.securityonion.net/2011/10/when-is-full-packet-capture-not-full.html>`_
|
||||
for more background
|
||||
|
||||
Toggling these features can be done with the ``ethtool -K`` command, for example::
|
||||
|
||||
IFACE=eth0
|
||||
for offload in rx tx sg tso ufo gso gro lro; do
|
||||
ethtool -K $IFACE $offload off
|
||||
done
|
||||
|
||||
Detailed statistics about the interface can be gathered via ``ethtool -S``.
|
||||
|
||||
For more details around the involved offloads consult the
|
||||
`ethtool manpage <https://man7.org/linux/man-pages/man8/ethtool.8.html>`_.
|
||||
|
||||
Load balancing
|
||||
""""""""""""""
|
||||
|
||||
The more interesting use-case is to use AF_PACKET to run multiple Zeek workers
|
||||
and have their packet sockets join what is called a fanout group.
|
||||
In such a setup, the network traffic is load-balanced across Zeek workers.
|
||||
By default load balancing is based on symmetric flow hashes [#]_.
|
||||
|
||||
For example, running two Zeek workers listening on the same network interface,
|
||||
each worker analyzing approximately half of the network traffic, can be done
|
||||
as follows::
|
||||
|
||||
zeek -i af_packet::eth0 &
|
||||
zeek -i af_packet::eth0 &
|
||||
|
||||
The fanout group is identified by an id and configurable using the
|
||||
``AF_Packet::fanout_id`` constant which defaults to 23. In the example
|
||||
above, both Zeek workers join the same fanout group.
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
As a caveat, within the same Linux network namespace, two Zeek processes can
|
||||
not use the same fanout group id for listening on different network interfaces.
|
||||
If this is a setup you're planning on running, configure the fanout group
|
||||
ids explicitly.
|
||||
For illustration purposes, the following starts two Zeek workers each using
|
||||
a different network interface and fanout group id::
|
||||
|
||||
zeek -i af_packet::eth0 AF_Packet::fanout_id=23 &
|
||||
zeek -i af_packet::eth1 AF_Packet::fanout_id=24 &
|
||||
|
||||
.. warning::
|
||||
|
||||
Zeek workers crashing or restarting due to running out of memory can,
|
||||
for a short period of time, disturb load balancing due to their packet
|
||||
sockets being removed and later rejoining the fanout group.
|
||||
This may be visible in Zeek logs as gaps and/or duplicated connection
|
||||
entries produced by different Zeek workers.
|
||||
|
||||
See :ref:`cluster-configuration` for instructions how to configure AF_PACKET
|
||||
with ZeekControl.
|
||||
|
||||
|
||||
Netmap
|
||||
^^^^^^
|
||||
|
||||
`Netmap <https://github.com/luigirizzo/netmap>`_ is a framework for fast
|
||||
packet I/O that is natively supported on FreeBSD since version 10.
|
||||
On Linux it can be installed as an out-of-tree kernel module.
|
||||
|
||||
FreeBSD
|
||||
"""""""
|
||||
FreeBSD's libpcap library supports netmap natively. This allows to prefix
|
||||
interface names with ``netmap:`` to instruct libpcap to open the interface
|
||||
in netmap mode. For example, a single Zeek worker can leverage netmap
|
||||
transparently using Zeek's default packet source as follows::
|
||||
|
||||
zeek -i netmap:em0
|
||||
|
||||
.. warning::
|
||||
|
||||
Above command will put the em0 interface into kernel-bypass mode. Network
|
||||
packets will pass directly to Zeek without being interpreted by the kernel.
|
||||
If em0 is your primary network interface, this effectively disables
|
||||
networking, including SSH connectivity.
|
||||
|
||||
If your network card supports multiple rings, individual Zeek workers can be
|
||||
attached to these as well (this assumes the NIC does proper flow hashing in hardware)::
|
||||
|
||||
zeek -i netmap:em0-0
|
||||
zeek -i netmap:em0-1
|
||||
|
||||
For software load balancing support, the FreeBSD source tree includes the
|
||||
``lb`` tool to distribute packets into netmap pipes doing flow hashing
|
||||
in user-space.
|
||||
|
||||
To compile and install ``lb``, ensure ``/usr/src`` is available on your
|
||||
FreeBSD system, then run the following commands::
|
||||
|
||||
cd /usr/src/tools/tools/netmap/
|
||||
make
|
||||
# Installs lb into /usr/local/bin
|
||||
cp /usr/obj/usr/src/`uname -m`.`uname -m`/tools/tools/netmap/lb /usr/local/bin/
|
||||
|
||||
|
||||
To load-balance packets arriving on em0 into 4 different netmap pipes named
|
||||
``zeek}0`` through ``zeek}3``, run ``lb`` as follows::
|
||||
|
||||
lb -i em0 -p zeek:4
|
||||
410.154166 main [634] interface is em0
|
||||
411.377220 main [741] successfully opened netmap:em0
|
||||
411.377243 main [812] opening pipe named netmap:zeek{0/xT@1
|
||||
411.379200 main [829] successfully opened pipe #1 netmap:zeek{0/xT@1 (tx slots: 1024)
|
||||
411.379242 main [838] zerocopy enabled
|
||||
...
|
||||
|
||||
Now, Zeek workers can attach to these four netmap pipes. When starting Zeek
|
||||
workers manually, the respective invocations would be as follows. The ``/x``
|
||||
suffix specifies exclusive mode to prevent two Zeek processes consuming packets
|
||||
from the same netmap pipe::
|
||||
|
||||
zeek -i netmap:zeek}0/x
|
||||
zeek -i netmap:zeek}1/x
|
||||
zeek -i netmap:zeek}2/x
|
||||
zeek -i netmap:zeek}3/x
|
||||
|
||||
For packet-level debugging, you can attach ``tcpdump`` to any of the netmap
|
||||
pipes in read monitor mode even while Zeek workers are consuming from them::
|
||||
|
||||
tcpdump -i netmap:zeek}1/r
|
||||
|
||||
In case libpcap's netmap support is insufficient, the external
|
||||
`Zeek netmap plugin <https://github.com/zeek/zeek-netmap>`_ can be installed.
|
||||
|
||||
.. warning::
|
||||
|
||||
When using the zeek-netmap plugin on FreeBSD, the interface specification given to Zeek
|
||||
needs to change from ``netmap:zeek}0/x`` to ``netmap::zeek}0/x`` - a single colon more.
|
||||
In the first case, Zeek uses the default libpcap packet source and passes ``netmap:zeek}0``
|
||||
as interface name. In the second case, ``netmap::`` is interpreted by Zeek and
|
||||
the netmap packet source is instantiated. The ``zeek}0/x`` part is used as
|
||||
interface name.
|
||||
|
||||
Linux
|
||||
"""""
|
||||
|
||||
While netmap isn't included in the Linux kernel, it can be installed as
|
||||
an out-of-tree kernel module.
|
||||
See the project's `GitHub repository <https://github.com/luigirizzo/netmap>`_
|
||||
for detailed instructions. This includes the ``lb`` tool for load balancing.
|
||||
|
||||
On Linux, the external `zeek-netmap <https://github.com/zeek/zeek-netmap>`_
|
||||
packet source plugin is required, or the system's libpcap library as used by
|
||||
Zeek needs to be recompiled with native netmap support. With the netmap kernel
|
||||
module loaded and the Zeek plugin installed, running a Zeek worker as follows
|
||||
will leverage netmap on Linux::
|
||||
|
||||
zeek -i netmap::eth1
|
||||
|
||||
For using ``lb`` or libpcap with netmap support, refer to the commands shown
|
||||
in the FreeBSD section - these are essentially the same.
|
||||
|
||||
|
||||
.. _cluster-configuration:
|
||||
|
||||
Cluster Configuration
|
||||
=====================
|
||||
|
||||
A *Zeek Cluster* is a set of systems jointly analyzing the traffic of
|
||||
a network link in a coordinated fashion. You can operate such a setup from
|
||||
a central manager system easily using ZeekControl because it
|
||||
hides much of the complexity of the multi-machine installation.
|
||||
|
||||
This section gives examples of how to setup common cluster configurations
|
||||
using ZeekControl. For a full reference on ZeekControl, see the
|
||||
`ZeekControl documentation`_.
|
||||
|
||||
Preparing to Setup a Cluster
|
||||
----------------------------
|
||||
|
||||
We refer to the user account used to set up the cluster
|
||||
as the "Zeek user". When setting up a cluster the Zeek user must be set up
|
||||
on all hosts, and this user must have ssh access from the manager to all
|
||||
machines in the cluster, and it must work without being prompted for a
|
||||
password/passphrase (for example, using ssh public key authentication).
|
||||
Also, on the worker nodes this user must have access to the target
|
||||
network interface in promiscuous mode.
|
||||
|
||||
Additional storage must be available on all hosts under the same path,
|
||||
which we will call the cluster's prefix path. We refer to this directory
|
||||
as ``<prefix>``. If you build Zeek from source, then ``<prefix>`` is
|
||||
the directory specified with the ``--prefix`` configure option,
|
||||
or ``/usr/local/zeek`` by default. The Zeek user must be able to either
|
||||
create this directory or, where it already exists, must have write
|
||||
permission inside this directory on all hosts.
|
||||
|
||||
When trying to decide how to configure the Zeek nodes, keep in mind that
|
||||
there can be multiple Zeek instances running on the same host. For example,
|
||||
it's possible to run a proxy and the manager on the same host. However, it is
|
||||
recommended to run workers on a different machine than the manager because
|
||||
workers can consume a lot of CPU resources. The maximum recommended
|
||||
number of workers to run on a machine should be one or two less than
|
||||
the number of CPU cores available on that machine. Using a load-balancing
|
||||
method (such as PF_RING) along with CPU pinning can decrease the load on
|
||||
the worker machines. Also, in order to reduce the load on the manager
|
||||
process, it is recommended to have a logger in your configuration. If a
|
||||
logger is defined in your cluster configuration, then it will receive logs
|
||||
instead of the manager process.
|
||||
|
||||
Basic Cluster Configuration
|
||||
---------------------------
|
||||
|
||||
With all prerequisites in place, perform the following steps to setup
|
||||
a Zeek cluster (do this as the Zeek user on the manager host only):
|
||||
|
||||
- Edit the ZeekControl configuration file, ``<prefix>/etc/zeekctl.cfg``,
|
||||
and change the value of any options to be more suitable for
|
||||
your environment. You will most likely want to change the value of
|
||||
the ``MailTo`` and ``LogRotationInterval`` options. A complete
|
||||
reference of all ZeekControl options can be found in the
|
||||
`ZeekControl documentation`_.
|
||||
|
||||
- Edit the ZeekControl node configuration file, ``<prefix>/etc/node.cfg``
|
||||
to define where logger, manager, proxies, and workers are to run. For a
|
||||
cluster configuration, you must comment-out (or remove) the standalone node
|
||||
in that file, and either uncomment or add node entries for each node
|
||||
in your cluster (logger, manager, proxy, and workers). For example, if you
|
||||
wanted to run five Zeek nodes (two workers, one proxy, a logger, and a
|
||||
manager) on a cluster consisting of three machines, your cluster
|
||||
configuration would look like this::
|
||||
|
||||
[logger]
|
||||
type=logger
|
||||
host=10.0.0.10
|
||||
|
||||
[manager]
|
||||
type=manager
|
||||
host=10.0.0.10
|
||||
|
||||
[proxy-1]
|
||||
type=proxy
|
||||
host=10.0.0.10
|
||||
|
||||
[worker-1]
|
||||
type=worker
|
||||
host=10.0.0.11
|
||||
interface=eth0
|
||||
|
||||
[worker-2]
|
||||
type=worker
|
||||
host=10.0.0.12
|
||||
interface=eth0
|
||||
|
||||
For a complete reference of all options that are allowed in the ``node.cfg``
|
||||
file, see the `ZeekControl documentation`_.
|
||||
|
||||
- Edit the network configuration file ``<prefix>/etc/networks.cfg``. This
|
||||
file lists all of the networks which the cluster should consider as local
|
||||
to the monitored environment.
|
||||
|
||||
- Install Zeek on all machines in the cluster using ZeekControl::
|
||||
|
||||
> zeekctl install
|
||||
|
||||
- See the `ZeekControl documentation`_
|
||||
for information on setting up a cron job on the manager host that can
|
||||
monitor the cluster.
|
||||
|
||||
AF_PACKET Cluster Configuration
|
||||
-------------------------------
|
||||
|
||||
Since version 5.2, Zeek includes AF_PACKET as a native packet source. This
|
||||
provides an easy and efficient capture mechanism for Linux users.
|
||||
|
||||
Adapt the worker section in ZeekControl's ``node.cfg`` file with the
|
||||
following entries, assuming running four worker processes listening on ``eth0`` ::
|
||||
|
||||
[worker-1]
|
||||
type=worker
|
||||
host=10.0.0.11
|
||||
interface=eth0
|
||||
lb_method=af_packet
|
||||
lb_procs=4
|
||||
|
||||
The specific options are ``lb_method=af_packet`` and ``lb_procs=4``.
|
||||
If listening on two or more interfaces on the same host is a requirement,
|
||||
remember to set a unique ``fanout_id`` using the node option ``af_packet_fanout_id``::
|
||||
|
||||
[worker-1-eth0]
|
||||
type=worker
|
||||
host=10.0.0.11
|
||||
interface=eth0
|
||||
lb_method=af_packet
|
||||
lb_procs=4
|
||||
af_packet_fanout_id=20
|
||||
|
||||
[worker-1-eth1]
|
||||
type=worker
|
||||
host=10.0.0.11
|
||||
interface=eth1
|
||||
lb_method=af_packet
|
||||
lb_procs=4
|
||||
af_packet_fanout_id=21
|
||||
|
||||
Pinning the worker processes to individual CPU cores can improve performance.
|
||||
Use the node's option ``pin_cpus=4,5,6,7``, listing as many CPU numbers as
|
||||
processes at appropriate offsets.
|
||||
|
||||
.. _pf-ring-config:
|
||||
|
||||
PF_RING Cluster Configuration
|
||||
-----------------------------
|
||||
|
||||
`PF_RING <http://www.ntop.org/products/pf_ring/>`_ allows speeding up the
|
||||
packet capture process by installing a new type of socket in Linux systems.
|
||||
It supports 10Gbit hardware packet filtering using standard network adapters,
|
||||
and user-space DNA (Direct NIC Access) for fast packet capture/transmission.
|
||||
|
||||
.. note::
|
||||
|
||||
Unless you have evaluated to specifically require PF_RING, consider using
|
||||
AF_PACKET first and test if it fulfills your requirements. AF_PACKET has
|
||||
been integrated into Zeek since version 5.2. It's a bit easier to get
|
||||
started with as it does not require an out of tree Linux kernel module.
|
||||
|
||||
Head over to :ref:`cluster-pf-ring` for more details.
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
|
||||
cluster/pf_ring
|
||||
|
||||
|
||||
.. [#] Some Linux kernel versions between 3.10 and 4.7 might exhibit
|
||||
a bug that prevents the required symmetric hashing. The script available
|
||||
in the GitHub project `can-i-use-afpacket-fanout <https://github.com/JustinAzoff/can-i-use-afpacket-fanout>`_
|
||||
can be used to verify whether ``PACKET_FANOUT`` works as expected.
|
||||
|
||||
This issue has been fixed in all stable kernels for at least 5 years.
|
||||
You're unlikely to be affected.
|
141
doc/cluster/pf_ring.rst
Normal file
141
doc/cluster/pf_ring.rst
Normal file
|
@ -0,0 +1,141 @@
|
|||
.. _cluster-pf-ring:
|
||||
|
||||
===================
|
||||
PF_RING Setup Guide
|
||||
===================
|
||||
|
||||
Installing PF_RING
|
||||
******************
|
||||
|
||||
1. Download and install PF_RING for your system following the instructions
|
||||
`here <http://www.ntop.org/get-started/download/#PF_RING>`_. The following
|
||||
commands will install the PF_RING libraries and kernel module (replace
|
||||
the version number 5.6.2 in this example with the version that you
|
||||
downloaded)::
|
||||
|
||||
cd /usr/src
|
||||
tar xvzf PF_RING-5.6.2.tar.gz
|
||||
cd PF_RING-5.6.2/userland/lib
|
||||
./configure --prefix=/opt/pfring
|
||||
make install
|
||||
|
||||
cd ../libpcap
|
||||
./configure --prefix=/opt/pfring
|
||||
make install
|
||||
|
||||
cd ../tcpdump-4.1.1
|
||||
./configure --prefix=/opt/pfring
|
||||
make install
|
||||
|
||||
cd ../../kernel
|
||||
make
|
||||
make install
|
||||
|
||||
modprobe pf_ring enable_tx_capture=0 min_num_slots=32768
|
||||
|
||||
Refer to the documentation for your Linux distribution on how to load the
|
||||
pf_ring module at boot time. You will need to install the PF_RING
|
||||
library files and kernel module on all of the workers in your cluster.
|
||||
|
||||
2. Download the Zeek source code.
|
||||
|
||||
3. Configure and install Zeek using the following commands::
|
||||
|
||||
./configure --with-pcap=/opt/pfring
|
||||
make
|
||||
make install
|
||||
|
||||
4. Make sure Zeek is correctly linked to the PF_RING libpcap libraries::
|
||||
|
||||
ldd /usr/local/zeek/bin/zeek | grep pcap
|
||||
libpcap.so.1 => /opt/pfring/lib/libpcap.so.1 (0x00007fa6d7d24000)
|
||||
|
||||
5. Configure ZeekControl to use PF_RING (explained below).
|
||||
|
||||
6. Run "zeekctl install" on the manager. This command will install Zeek and
|
||||
required scripts to all machines in your cluster.
|
||||
|
||||
Using PF_RING
|
||||
*************
|
||||
|
||||
In order to use PF_RING, you need to specify the correct configuration
|
||||
options for your worker nodes in ZeekControl's node configuration file.
|
||||
Edit the ``node.cfg`` file and specify ``lb_method=pf_ring`` for each of
|
||||
your worker nodes. Next, use the ``lb_procs`` node option to specify how
|
||||
many Zeek processes you'd like that worker node to run, and optionally pin
|
||||
those processes to certain CPU cores with the ``pin_cpus`` option (CPU
|
||||
numbering starts at zero). The correct ``pin_cpus`` setting to use is
|
||||
dependent on your CPU architecture (Intel and AMD systems enumerate
|
||||
processors in different ways). Using the wrong ``pin_cpus`` setting
|
||||
can cause poor performance. Here is what a worker node entry should
|
||||
look like when using PF_RING and CPU pinning::
|
||||
|
||||
[worker-1]
|
||||
type=worker
|
||||
host=10.0.0.50
|
||||
interface=eth0
|
||||
lb_method=pf_ring
|
||||
lb_procs=10
|
||||
pin_cpus=2,3,4,5,6,7,8,9,10,11
|
||||
|
||||
|
||||
Using PF_RING+DNA with symmetric RSS
|
||||
************************************
|
||||
|
||||
You must have a PF_RING+DNA license in order to do this. You can sniff
|
||||
each packet only once.
|
||||
|
||||
1. Load the DNA NIC driver (i.e. ixgbe) on each worker host.
|
||||
|
||||
2. Run "ethtool -L dna0 combined 10" (this will establish 10 RSS queues
|
||||
on your NIC) on each worker host. You must make sure that you set the
|
||||
number of RSS queues to the same as the number you specify for the
|
||||
lb_procs option in the node.cfg file.
|
||||
|
||||
3. On the manager, configure your worker(s) in node.cfg::
|
||||
|
||||
[worker-1]
|
||||
type=worker
|
||||
host=10.0.0.50
|
||||
interface=dna0
|
||||
lb_method=pf_ring
|
||||
lb_procs=10
|
||||
|
||||
|
||||
Using PF_RING+DNA with pfdnacluster_master
|
||||
******************************************
|
||||
|
||||
You must have a PF_RING+DNA license and a libzero license in order to do
|
||||
this. You can load balance between multiple applications and sniff the
|
||||
same packets multiple times with different tools.
|
||||
|
||||
1. Load the DNA NIC driver (i.e. ixgbe) on each worker host.
|
||||
|
||||
2. Run "ethtool -L dna0 1" (this will establish 1 RSS queues on your NIC)
|
||||
on each worker host.
|
||||
|
||||
3. Run the pfdnacluster_master command on each worker host. For example::
|
||||
|
||||
pfdnacluster_master -c 21 -i dna0 -n 10
|
||||
|
||||
Make sure that your cluster ID (21 in this example) matches the interface
|
||||
name you specify in the node.cfg file. Also make sure that the number
|
||||
of processes you're balancing across (10 in this example) matches
|
||||
the lb_procs option in the node.cfg file.
|
||||
|
||||
4. If you are load balancing to other processes, you can use the
|
||||
pfringfirstappinstance variable in zeekctl.cfg to set the first
|
||||
application instance that Zeek should use. For example, if you are running
|
||||
pfdnacluster_master with "-n 10,4" you would set
|
||||
pfringfirstappinstance=4. Unfortunately that's still a global setting
|
||||
in zeekctl.cfg at the moment but we may change that to something you can
|
||||
set in node.cfg eventually.
|
||||
|
||||
5. On the manager, configure your worker(s) in node.cfg::
|
||||
|
||||
[worker-1]
|
||||
type=worker
|
||||
host=10.0.0.50
|
||||
interface=dnacluster:21
|
||||
lb_method=pf_ring
|
||||
lb_procs=10
|
33
doc/components/index.rst
Normal file
33
doc/components/index.rst
Normal file
|
@ -0,0 +1,33 @@
|
|||
|
||||
=============
|
||||
Subcomponents
|
||||
=============
|
||||
|
||||
To find documentation for the various subcomponents of Zeek, see their
|
||||
respective GitHub repositories or documentation:
|
||||
|
||||
* `Spicy <https://docs.zeek.org/projects/spicy>`__
|
||||
- C++ parser generator for dissecting protocols & files.
|
||||
* `BinPAC <https://github.com/zeek/binpac>`__
|
||||
- A protocol parser generator
|
||||
* `ZeekControl <https://github.com/zeek/zeekctl>`__
|
||||
- Interactive Zeek management shell
|
||||
* `Zeek-Aux <https://github.com/zeek/zeek-aux>`__
|
||||
- Small auxiliary tools for Zeek
|
||||
* `BTest <https://github.com/zeek/btest>`__
|
||||
- A system testing framework
|
||||
* `Capstats <https://github.com/zeek/capstats>`__
|
||||
- Command-line packet statistic tool
|
||||
* `PySubnetTree <https://github.com/zeek/pysubnettree>`__
|
||||
- Python module for CIDR lookups
|
||||
* `trace-summary <https://github.com/zeek/trace-summary>`__
|
||||
- Script for generating break-downs of network traffic
|
||||
* `Broker <https://github.com/zeek/broker>`__
|
||||
- Zeek's Messaging Library
|
||||
- `(Docs) <https://docs.zeek.org/projects/broker>`__
|
||||
* `Package Manager <https://github.com/zeek/package-manager>`__
|
||||
- A package manager for Zeek
|
||||
- `(Docs) <https://docs.zeek.org/projects/package-manager>`__
|
||||
* `Paraglob <https://github.com/zeek/paraglob>`__
|
||||
- A pattern matching data structure for Zeek.
|
||||
- `(Docs) <https://github.com/zeek/paraglob/blob/master/README.md>`__
|
305
doc/conf.py
Normal file
305
doc/conf.py
Normal file
|
@ -0,0 +1,305 @@
|
|||
#
|
||||
# Zeek documentation build configuration file, created by sphinx-quickstart
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
extensions = []
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
sys.path.insert(0, os.path.abspath("ext"))
|
||||
|
||||
# -- General configuration -----------------------------------------------------
|
||||
|
||||
# If your documentation needs a minimal Sphinx version, state it here.
|
||||
# needs_sphinx = '1.0'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||
extensions += [
|
||||
"zeek",
|
||||
"sphinx.ext.todo",
|
||||
"zeek_pygments",
|
||||
"spicy-pygments",
|
||||
"literal-emph",
|
||||
"sphinx.ext.extlinks",
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ["_templates"]
|
||||
|
||||
# The suffix of source filenames.
|
||||
source_suffix = ".rst"
|
||||
|
||||
# The encoding of source files.
|
||||
# source_encoding = 'utf-8-sig'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = "index"
|
||||
|
||||
# General information about the project.
|
||||
project = "Zeek"
|
||||
copyright = "by the Zeek Project"
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
#
|
||||
|
||||
version = "source"
|
||||
|
||||
try:
|
||||
# Use the actual Zeek version if available
|
||||
with open("../VERSION") as f:
|
||||
version = f.readline().strip()
|
||||
except:
|
||||
try:
|
||||
import re
|
||||
|
||||
import git
|
||||
|
||||
repo = git.Repo(os.path.abspath("."))
|
||||
version = "git/master"
|
||||
|
||||
version_tag_re = r"v\d+\.\d+(\.\d+)?"
|
||||
version_tags = [
|
||||
t
|
||||
for t in repo.tags
|
||||
if t.commit == repo.head.commit and re.match(version_tag_re, str(t))
|
||||
]
|
||||
# Note: sorting by tag date doesn't necessarily give correct
|
||||
# order in terms of version numbers, but doubtful that will ever be
|
||||
# a problem (if we ever do re-tag an old version number on a given
|
||||
# commit such that it is incorrectly found as the most recent version,
|
||||
# we can just re-tag all the other version numbers on that same commit)
|
||||
version_tags = sorted(version_tags, key=lambda t: t.tag.tagged_date)
|
||||
|
||||
if version_tags:
|
||||
version = str(version_tags[-1])
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = version
|
||||
|
||||
# In terms of the actual hyperlink URL, a more ideal/stable way to reference
|
||||
# source code on GitHub would be by commit hash, but that can be tricky to
|
||||
# update in a way that produces stable Sphinx/reST configuration: don't want
|
||||
# to update the commit-hash for every Zeek commit unless it actually produces
|
||||
# new content, and also don't want to accidentally make it easy for people to
|
||||
# insert unreachable commits when manually running
|
||||
# `zeek/ci/update-zeekygen-docs.sh`.
|
||||
#
|
||||
# We only have a few versions of docs that actually matter: `master` and
|
||||
# `release/.*`, and the tip of those branches will always be in sync with
|
||||
# auto-generated content by simply having `zeek/ci/update-zeekygen-docs.sh`
|
||||
# change this to `release/.*` when needed.
|
||||
zeek_code_version = "master"
|
||||
zeek_code_url = f"https://github.com/zeek/zeek/blob/{zeek_code_version}"
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
# language = None
|
||||
|
||||
# There are two options for replacing |today|: either, you set today to some
|
||||
# non-false value, then it is used:
|
||||
# today = ''
|
||||
# Else, today_fmt is used as the format for a strftime call.
|
||||
today_fmt = "%B %d, %Y"
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
exclude_patterns = [".#*", "script-reference/autogenerated-*"]
|
||||
|
||||
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||
# default_role = None
|
||||
|
||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||
# add_function_parentheses = True
|
||||
|
||||
# If true, the current module name will be prepended to all description
|
||||
# unit titles (such as .. function::).
|
||||
# add_module_names = True
|
||||
|
||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||
# output. They are ignored by default.
|
||||
show_authors = True
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = "sphinx"
|
||||
|
||||
highlight_language = "none"
|
||||
|
||||
# A list of ignored prefixes for module index sorting.
|
||||
# modindex_common_prefix = []
|
||||
|
||||
|
||||
# -- Options for HTML output ---------------------------------------------------
|
||||
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
|
||||
# Set canonical URL from the Read the Docs Domain
|
||||
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
|
||||
|
||||
# Tell Jinja2 templates the build is running on Read the Docs
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
if "html_context" not in globals():
|
||||
html_context = {}
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
html_last_updated_fmt = "%B %d, %Y"
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
html_theme_options = {
|
||||
"analytics_id": "UA-144186885-1",
|
||||
"collapse_navigation": False,
|
||||
"style_external_links": True,
|
||||
}
|
||||
|
||||
# Add any paths that contain custom themes here, relative to this directory.
|
||||
# html_theme_path = []
|
||||
|
||||
# The name for this set of Sphinx documents. If None, it defaults to
|
||||
# "<project> v<release> Documentation".
|
||||
html_title = f"Book of Zeek ({release})"
|
||||
|
||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||
# html_short_title = None
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top
|
||||
# of the sidebar.
|
||||
html_logo = "images/zeek-logo-sidebar.png"
|
||||
|
||||
# The name of an image file (within the static path) to use as favicon of the
|
||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||
# pixels large.
|
||||
html_favicon = "images/zeek-favicon.ico"
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ["_static"]
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_css_file("theme_overrides.css")
|
||||
from sphinx.highlighting import lexers
|
||||
from zeek_pygments import ZeekLexer
|
||||
|
||||
lexers["zeek"] = ZeekLexer()
|
||||
app.add_config_value("zeek-code-url", zeek_code_url, "env")
|
||||
|
||||
|
||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||
# using the given strftime format.
|
||||
# html_last_updated_fmt = '%b %d, %Y'
|
||||
|
||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||
# typographically correct entities.
|
||||
# html_use_smartypants = True
|
||||
|
||||
# Custom sidebar templates, maps document names to template names.
|
||||
# html_sidebars = {
|
||||
#'**': ['localtoc.html', 'sourcelink.html', 'searchbox.html'],
|
||||
# }
|
||||
|
||||
# Additional templates that should be rendered to pages, maps page names to
|
||||
# template names.
|
||||
# html_additional_pages = {}
|
||||
|
||||
# If false, no module index is generated.
|
||||
# html_domain_indices = True
|
||||
|
||||
# If false, no index is generated.
|
||||
# html_use_index = True
|
||||
|
||||
# If true, the index is split into individual pages for each letter.
|
||||
# html_split_index = False
|
||||
|
||||
# If true, links to the reST sources are added to the pages.
|
||||
# html_show_sourcelink = True
|
||||
|
||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||
# html_show_sphinx = True
|
||||
|
||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||
# html_show_copyright = True
|
||||
|
||||
# If true, an OpenSearch description file will be output, and all pages will
|
||||
# contain a <link> tag referring to it. The value of this option must be the
|
||||
# base URL from which the finished HTML is served.
|
||||
# html_use_opensearch = ''
|
||||
|
||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||
# html_file_suffix = None
|
||||
|
||||
# Output file base name for HTML help builder.
|
||||
htmlhelp_basename = "zeek-docs"
|
||||
|
||||
# -- Options for LaTeX output --------------------------------------------------
|
||||
|
||||
# The paper size ('letter' or 'a4').
|
||||
# latex_paper_size = 'letter'
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
# latex_font_size = '10pt'
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||
latex_documents = [
|
||||
("index", "Zeek.tex", "Zeek Documentation", "The Zeek Project", "manual"),
|
||||
]
|
||||
|
||||
# The name of an image file (relative to this directory) to place at the top of
|
||||
# the title page.
|
||||
# latex_logo = None
|
||||
|
||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||
# not chapters.
|
||||
# latex_use_parts = False
|
||||
|
||||
# If true, show page references after internal links.
|
||||
# latex_show_pagerefs = False
|
||||
|
||||
# If true, show URL addresses after external links.
|
||||
# latex_show_urls = False
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
# latex_preamble = ''
|
||||
|
||||
# Documents to append as an appendix to all manuals.
|
||||
# latex_appendices = []
|
||||
|
||||
# If false, no module index is generated.
|
||||
# latex_domain_indices = True
|
||||
|
||||
# -- Options for manual page output --------------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [("index", "zeek", "Zeek Documentation", ["The Zeek Project"], 1)]
|
||||
|
||||
# -- Options for todo plugin --------------------------------------------
|
||||
todo_include_todos = True
|
||||
|
||||
extlinks = {
|
||||
"slacklink": ("https://zeek.org/slack%s", None),
|
||||
"discourselink": ("https://community.zeek.org/%s", None),
|
||||
"spicylink": ("https://docs.zeek.org/projects/spicy/en/latest/%s", None),
|
||||
}
|
||||
extlinks_detect_hardcoded_links = True
|
318
doc/customizations.rst
Normal file
318
doc/customizations.rst
Normal file
|
@ -0,0 +1,318 @@
|
|||
.. _popular-customizations:
|
||||
|
||||
======================
|
||||
Popular Customizations
|
||||
======================
|
||||
|
||||
This page outlines customizations and additions that are popular
|
||||
among Zeek users.
|
||||
|
||||
.. note::
|
||||
|
||||
This page lists externally-maintained Zeek packages. The Zeek team does not
|
||||
provide support or maintenance for these packages. If you find bugs or have
|
||||
feature requests, please reach out to the respective package maintainers directly.
|
||||
|
||||
You may also post in the :slacklink:`Zeek Slack <>` #packages
|
||||
channel or :discourselink:`forum <>` to get help from the broader
|
||||
Zeek community.
|
||||
|
||||
|
||||
Log Enrichment
|
||||
==============
|
||||
|
||||
Community ID
|
||||
------------
|
||||
|
||||
.. versionadded:: 6.0
|
||||
|
||||
Zeek includes native `Community ID Flow Hashing`_ support. This functionality
|
||||
has previously been provided through the `zeek-community-id`_ package.
|
||||
|
||||
.. note::
|
||||
|
||||
At this point, the external `zeek-community-id`_ package is still
|
||||
available to support Zeek deployments running older versions. However,
|
||||
the scripts provided by the package cause conflicts with those provided in
|
||||
Zeek 6.0 - do not load both.
|
||||
|
||||
Loading the
|
||||
:doc:`/scripts/policy/protocols/conn/community-id-logging.zeek`
|
||||
and
|
||||
:doc:`/scripts/policy/frameworks/notice/community-id.zeek`
|
||||
scripts adds an additional ``community_id`` field to the
|
||||
:zeek:see:`Conn::Info` and :zeek:see:`Notice::Info` record.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ zeek -r ./traces/get.trace protocols/conn/community-id-logging LogAscii::use_json=T
|
||||
$ jq < conn.log
|
||||
{
|
||||
"ts": 1362692526.869344,
|
||||
"uid": "CoqLmg1Ds5TE61szq1",
|
||||
"id.orig_h": "141.142.228.5",
|
||||
"id.orig_p": 59856,
|
||||
"id.resp_h": "192.150.187.43",
|
||||
"id.resp_p": 80,
|
||||
"proto": "tcp",
|
||||
...
|
||||
"community_id": "1:yvyB8h+3dnggTZW0UEITWCst97w="
|
||||
}
|
||||
|
||||
|
||||
The Community ID Flow Hash of a :zeek:see:`conn_id` instance can be computed
|
||||
with the :zeek:see:`community_id_v1` builtin function directly on the command-line
|
||||
or used in custom scripts.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ zeek -e 'print community_id_v1([$orig_h=141.142.228.5, $orig_p=59856/tcp, $resp_h=192.150.187.43, $resp_p=80/tcp])'
|
||||
1:yvyB8h+3dnggTZW0UEITWCst97w=
|
||||
|
||||
.. _Community ID Flow Hashing: https://github.com/corelight/community-id-spec
|
||||
.. _zeek-community-id: https://github.com/corelight/zeek-community-id/>`_
|
||||
|
||||
.. _geolocation:
|
||||
|
||||
Address geolocation and AS lookups
|
||||
----------------------------------
|
||||
|
||||
.. _libmaxminddb: https://github.com/maxmind/libmaxminddb
|
||||
|
||||
Zeek supports IP address geolocation as well as AS (autonomous system)
|
||||
lookups. This requires two things:
|
||||
|
||||
* Compilation of Zeek with the `libmaxminddb`_ library and development
|
||||
headers. If you're using our :ref:`Docker images <docker-images>` or
|
||||
:ref:`binary packages <binary-packages>`, there's nothing to do: they ship
|
||||
with GeoIP support.
|
||||
* Installation of corresponding MaxMind database files on your
|
||||
system.
|
||||
|
||||
To check whether your Zeek supports geolocation, run ``zeek-config --have-geoip``
|
||||
(available since Zeek 6.2) or simply try an address lookup. The following
|
||||
indicates that your Zeek lacks support:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ zeek -e 'lookup_location(1.2.3.4)'
|
||||
error in <command line>, line 1: Zeek was not configured for GeoIP support (lookup_location(1.2.3.4))
|
||||
|
||||
Read on for more details about building Zeek with GeoIP support, and how to
|
||||
configure access to the database files.
|
||||
|
||||
Building Zeek with libmaxminddb
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If you build Zeek yourself, you need to install libmaxminddb prior to
|
||||
configuring your build.
|
||||
|
||||
* RPM/RedHat-based Linux:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo yum install libmaxminddb-devel
|
||||
|
||||
* DEB/Debian-based Linux:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo apt-get install libmaxminddb-dev
|
||||
|
||||
* FreeBSD:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
sudo pkg install libmaxminddb
|
||||
|
||||
* Mac OS X:
|
||||
|
||||
You need to install from your preferred package management system
|
||||
(e.g. Homebrew, MacPorts, or Fink). For Homebrew, the name of the package
|
||||
that you need is libmaxminddb.
|
||||
|
||||
The ``configure`` script's output indicates whether it successfully located
|
||||
libmaxminddb. If your system's MaxMind library resides in a non-standard path,
|
||||
you may need to specify it via ``./configure --with-geoip=<path>``.
|
||||
|
||||
Installing and configuring GeoIP databases
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
MaxMind's databases ship as individual files that you can `download
|
||||
<https://www.maxmind.com/en/accounts/current/geoip/downloads>`_ from their
|
||||
website after `signing up <https://www.maxmind.com/en/geolite2/signup>`_ for an
|
||||
account. Some Linux distributions also offer free databases in their package
|
||||
managers.
|
||||
|
||||
There are three types of databases: city-level geolocation, country-level
|
||||
geolocation, and mapping of IP addresses to autonomous systems (AS number and
|
||||
organization). Download these and decide on a place to put them on your
|
||||
file system. If you use automated tooling or system packages for the
|
||||
installation, that path may be chosen for you, such as ``/usr/share/GeoIP``.
|
||||
|
||||
Zeek provides three ways to configure access to the databases:
|
||||
|
||||
* Specifying the path and filenames via script variables. Use the
|
||||
:zeek:see:`mmdb_dir` variable, unset by default, to point to the directory
|
||||
containing the database(s). By default Zeek looks for databases called
|
||||
``GeoLite2-City.mmdb``, ``GeoLite2-Country.mmdb``, and
|
||||
``GeoLite2-ASN.mmdb``. Starting with Zeek 6.2 you can adjust these names by
|
||||
redefining the :zeek:see:`mmdb_city_db`, :zeek:see:`mmdb_country_db`, and
|
||||
:zeek:see:`mmdb_asn_db` variables.
|
||||
* Relying on Zeek's pre-configured search paths and filenames. The
|
||||
:zeek:see:`mmdb_dir_fallbacks` variable contains default
|
||||
search paths that Zeek will try in turn when :zeek:see:`mmdb_dir` is not
|
||||
set. Prior to Zeek 6.2 these paths were hardcoded; they're now redefinable.
|
||||
For geolocation, Zeek first attempts the city-level databases due to their
|
||||
greater precision, and falls back to the city-level one. You can adjust the
|
||||
database filenames via :zeek:see:`mmdb_city_db` and related variables, as
|
||||
covered above.
|
||||
* Opening databases explicitly via scripting. The
|
||||
:zeek:see:`mmdb_open_location_db` and :zeek:see:`mmdb_open_asn_db`
|
||||
functions take full paths to database files. Zeek only ever uses one
|
||||
geolocation and one ASN database, and these loads override any databases
|
||||
previously loaded. These loads can occur at any point.
|
||||
|
||||
Querying the databases
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Two built-in functions provide GeoIP functionality:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
function lookup_location(a:addr): geo_location
|
||||
function lookup_autonomous_system(a:addr): geo_autonomous_system
|
||||
|
||||
:zeek:see:`lookup_location` returns a :zeek:see:`geo_location` record with
|
||||
country/region/etc fields, while :zeek:see:`lookup_autonomous_system` returns a
|
||||
:zeek:see:`geo_autonomous_system` record indicating the AS number and
|
||||
organization. Depending on the queried IP address some fields may be
|
||||
uninitialized, so you should guard access with an ``a?$b`` :ref:`existence test
|
||||
<record-field-operators>`.
|
||||
|
||||
Zeek tests the database files for staleness. If it detects that a database has
|
||||
been updated, it will automatically reload it. Zeek does not automatically add
|
||||
GeoIP intelligence to its logs, but several add-on scripts and packages provide
|
||||
such functionality. These include:
|
||||
|
||||
* The :ref:`notice framework <notice-framework>` lets you configure notice types
|
||||
that you'd like to augment with location information. See
|
||||
:zeek:see:`Notice::lookup_location_types` and
|
||||
:zeek:see:`Notice::ACTION_ADD_GEODATA` for details.
|
||||
* The :doc:`/scripts/policy/protocols/smtp/detect-suspicious-orig.zeek` and
|
||||
:doc:`/scripts/policy/protocols/ssh/geo-data.zeek` policy scripts.
|
||||
* Several `Zeek packages <https://packages.zeek.org>`_.
|
||||
|
||||
Testing
|
||||
^^^^^^^
|
||||
|
||||
Before using the GeoIP functionality it is a good idea to verify that
|
||||
everything is setup correctly. You can quickly check if the GeoIP
|
||||
functionality works by running commands like these:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
zeek -e "print lookup_location(8.8.8.8);"
|
||||
|
||||
If you see an error message similar to "Failed to open GeoIP location database",
|
||||
then your database configuration is broken. You may need to rename or move your
|
||||
GeoIP database files.
|
||||
|
||||
Example
|
||||
^^^^^^^
|
||||
|
||||
The following shows every FTP connection from hosts in Ohio, US:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool)
|
||||
{
|
||||
local client = c$id$orig_h;
|
||||
local loc = lookup_location(client);
|
||||
|
||||
if (loc?$region && loc$region == "OH" && loc?$country_code && loc$country_code == "US")
|
||||
{
|
||||
local city = loc?$city ? loc$city : "<unknown>";
|
||||
|
||||
print fmt("FTP Connection from:%s (%s,%s,%s)", client, city,
|
||||
loc$region, loc$country_code);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Log Writers
|
||||
===========
|
||||
|
||||
Kafka
|
||||
-----
|
||||
|
||||
For exporting logs to `Apache Kafka`_ in a streaming fashion, the externally-maintained
|
||||
`zeek-kafka`_ package is a popular choice and easy to configure. It relies on `librdkafka`_.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
redef Log::default_writer = Log::WRITER_KAFKAWRITER;
|
||||
|
||||
redef Kafka::kafka_conf += {
|
||||
["metadata.broker.list"] = "192.168.0.1:9092"
|
||||
};
|
||||
|
||||
.. _Apache Kafka: https://kafka.apache.org/
|
||||
.. _zeek-kafka: https://github.com/SeisoLLC/zeek-kafka/
|
||||
.. _librdkafka: https://github.com/confluentinc/librdkafka
|
||||
|
||||
|
||||
Logging
|
||||
=======
|
||||
|
||||
JSON Streaming Logs
|
||||
-------------------
|
||||
|
||||
The externally-maintained `json-streaming-logs`_ package tailors Zeek
|
||||
for use with log shippers like `Filebeat`_ or `fluentd`_. It configures
|
||||
additional log files prefixed with ``json_streaming_``, adds ``_path``
|
||||
and ``_write_ts`` fields to log records and configures log rotation
|
||||
appropriately.
|
||||
|
||||
If you do not use a logging archive and want to stream all logs away
|
||||
from the system where Zeek is running without leveraging Kafka, this
|
||||
package helps you with that.
|
||||
|
||||
.. _json-streaming-logs: https://github.com/corelight/json-streaming-logs
|
||||
.. _Filebeat: https://www.elastic.co/beats/filebeat
|
||||
.. _fluentd: https://www.fluentd.org/
|
||||
|
||||
|
||||
Long Connections
|
||||
----------------
|
||||
|
||||
Zeek logs connection entries into the :file:`conn.log` only upon termination
|
||||
or due to expiration of inactivity timeouts. Depending on the protocol and
|
||||
chosen timeout values this can significantly delay the appearance of a log
|
||||
entry for a given connection. The delay may be up to an hour for lingering
|
||||
SSH connections or connections where the final FIN or RST packets were missed.
|
||||
|
||||
The `zeek-long-connections`_ package alleviates this by creating a :file:`conn_long.log`
|
||||
log with the same format as :file:`conn.log`, but containing entries for connections
|
||||
that have been existing for configurable intervals.
|
||||
By default, the first entry for a connection is logged after 10mins. Depending on
|
||||
the environment, this can be lowered as even a 10 minute delay may be significant
|
||||
for detection purposes in streaming setup.
|
||||
|
||||
.. _zeek-long-connections: https://github.com/corelight/zeek-long-connections
|
||||
|
||||
|
||||
Profiling and Debugging
|
||||
=======================
|
||||
|
||||
jemalloc profiling
|
||||
------------------
|
||||
|
||||
For investigation of memory leaks or state-growth issues within Zeek,
|
||||
jemalloc's profiling is invaluable. A package providing a bit support
|
||||
for configuring jemalloc's profiling facilities is `zeek-jemalloc-profiling`_.
|
||||
|
||||
Some general information about memory profiling exists in the :ref:`Troubleshooting <troubleshooting>`
|
||||
section.
|
||||
|
||||
.. _zeek-jemalloc-profiling: https://github.com/JustinAzoff/zeek-jemalloc-profiling
|
120
doc/devel/cluster-backend-zeromq.rst
Normal file
120
doc/devel/cluster-backend-zeromq.rst
Normal file
|
@ -0,0 +1,120 @@
|
|||
.. _cluster_backend_zeromq:
|
||||
|
||||
======================
|
||||
ZeroMQ Cluster Backend
|
||||
======================
|
||||
|
||||
.. versionadded:: 7.1
|
||||
|
||||
*Experimental*
|
||||
|
||||
Quickstart
|
||||
==========
|
||||
|
||||
To switch a Zeek cluster with a static cluster layout over to use ZeroMQ
|
||||
as cluster backend, add the following snippet to ``local.zeek``:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
@load frameworks/cluster/backend/zeromq/connect
|
||||
|
||||
|
||||
Note that the function :zeek:see:`Broker::publish` will be non-functional
|
||||
and a warning emitted when used - use :zeek:see:`Cluster::publish` instead.
|
||||
|
||||
By default, a configuration based on hard-coded endpoints and cluster layout
|
||||
information is created. For more customization, refer to the module documentation
|
||||
at :doc:`cluster/backend/zeromq/main.zeek </scripts/policy/frameworks/cluster/backend/zeromq/main.zeek>`.
|
||||
|
||||
|
||||
Architecture
|
||||
============
|
||||
|
||||
Publish-Subscribe of Zeek Events
|
||||
--------------------------------
|
||||
|
||||
The `ZeroMQ <https://zeromq.org/>`_ based cluster backend uses a central
|
||||
XPUB/XSUB broker for publish-subscribe functionality. Zeek events published
|
||||
via :zeek:see:`Cluster::publish` are distributed by this central broker to
|
||||
interested nodes.
|
||||
|
||||
.. figure:: /images/cluster/zeromq-pubsub.png
|
||||
|
||||
|
||||
As depicted in the figure above, each cluster node connects to the central
|
||||
broker twice, once via its XPUB socket and once via its XSUB socket. This
|
||||
results in two TCP connections from every cluster node to the central broker.
|
||||
This setup allows every node in the cluster to see messages from all other
|
||||
nodes, avoiding the need for cluster topology awareness.
|
||||
|
||||
.. note::
|
||||
|
||||
Scalability of the central broker in production setups, but for small
|
||||
clusters on a single node, may be fast enough.
|
||||
|
||||
On a cluster node, the XPUB socket provides notifications about subscriptions
|
||||
created by other nodes: For every subscription created by any node in
|
||||
the cluster, the :zeek:see:`Cluster::Backend::ZeroMQ::subscription` event is
|
||||
raised locally on every other node (unless another node had created the same
|
||||
subscription previously).
|
||||
|
||||
This mechanism is used to discover the existence of other cluster nodes by
|
||||
matching the topics with the prefix for node specific subscriptions as produced
|
||||
by :zeek:see:`Cluster::nodeid_topic`.
|
||||
|
||||
As of now, the implementation of the central broker calls ZeroMQ's
|
||||
``zmq::proxy()`` function to forward messages between the XPUB and
|
||||
XSUB socket.
|
||||
|
||||
While the diagram above indicates the central broker being deployed separately
|
||||
from Zeek cluster nodes, by default the manager node will start and run this
|
||||
broker using a separate thread. There's nothing that would prevent from running
|
||||
a long running central broker independently from the Zeek cluster nodes, however.
|
||||
|
||||
The serialization of Zeek events is done by the selected
|
||||
:zeek:see:`Cluster::event_serializer` and is independent of ZeroMQ.
|
||||
The central broker needs no knowledge about the chosen format, it is
|
||||
only shuffling messages between nodes.
|
||||
|
||||
|
||||
Logging
|
||||
-------
|
||||
|
||||
While remote events always pass through the central broker, nodes connect and
|
||||
send log writes directly to logger nodes in a cluster. The ZeroMQ cluster backend
|
||||
leverages ZeroMQ's pipeline pattern for this functionality. That is, logger nodes
|
||||
(including the manager if configured using :zeek:see:`Cluster::manager_is_logger`)
|
||||
open a ZeroMQ PULL socket to receive log writes. All other nodes connect their
|
||||
PUSH socket to all available PULL sockets. These connections are separate from
|
||||
the publish-subscribe setup outlined above.
|
||||
|
||||
When sending log-writes over a PUSH socket, load balancing is done by ZeroMQ.
|
||||
Individual cluster nodes do not have control over the decision which logger
|
||||
node receives log writes at any given time.
|
||||
|
||||
.. figure:: /images/cluster/zeromq-logging.png
|
||||
|
||||
While the previous paragraph used "log writes", a single message to a logger
|
||||
node actually contains a batch of log writes. The options :zeek:see:`Log::flush_interval`
|
||||
and :zeek:see:`Log::write_buffer_size` control the frequency and maximum size
|
||||
of these batches.
|
||||
|
||||
The serialization format used to encode such batches is controlled by the
|
||||
selected :zeek:see:`Cluster::log_serializer` and is independent of ZeroMQ.
|
||||
|
||||
With the default serializer (:zeek:see:`Cluster::LOG_SERIALIZER_ZEEK_BIN_V1`),
|
||||
every log batch on the wire has a header prepended that describes it. This allows
|
||||
interpretation of log writes even by non-Zeek processes. This opens the possibility
|
||||
to implement non-Zeek logger processes as long as the chosen serializer format
|
||||
is understood by the receiving process. In the future, a JSON lines serialization
|
||||
may be provided, allowing easier interpretation than a proprietary binary format.
|
||||
|
||||
|
||||
Summary
|
||||
-------
|
||||
|
||||
Combining the diagrams above, the connections between the different socket
|
||||
types in a Zeek cluster looks something like the following.
|
||||
|
||||
.. figure:: /images/cluster/zeromq-cluster.png
|
||||
|
111
doc/devel/contributors.rst
Normal file
111
doc/devel/contributors.rst
Normal file
|
@ -0,0 +1,111 @@
|
|||
|
||||
===================
|
||||
Contributor's Guide
|
||||
===================
|
||||
|
||||
See below for selection of some of the more common contribution guidelines
|
||||
maintained directly in `Zeek wiki
|
||||
<https://github.com/zeek/zeek/wiki#contributors>`_.
|
||||
|
||||
General Contribution Process
|
||||
============================
|
||||
|
||||
See https://github.com/zeek/zeek/wiki/Contribution-Guide
|
||||
|
||||
Coding Style and Conventions
|
||||
============================
|
||||
|
||||
See https://github.com/zeek/zeek/wiki/Coding-Style-and-Conventions
|
||||
|
||||
General Documentation Structure/Process
|
||||
=======================================
|
||||
|
||||
See the :doc:`README </README>` file of https://github.com/zeek/zeek-docs
|
||||
|
||||
Documentation Style and Conventions
|
||||
===================================
|
||||
|
||||
See https://github.com/zeek/zeek/wiki/Documentation-Style-and-Conventions
|
||||
|
||||
Checking for Memory Errors and Leaks
|
||||
====================================
|
||||
|
||||
See https://github.com/zeek/zeek/wiki/Checking-for-Memory-Errors-and-Leaks
|
||||
|
||||
Maintaining long-lived forks of Zeek
|
||||
====================================
|
||||
|
||||
Consistent formatting of the Zeek codebase is enforced automatically by
|
||||
configurations tracked in the repository. Upstream updates to these
|
||||
configurations can lead to formatting changes which could cause merge conflicts
|
||||
for long-lived forks.
|
||||
|
||||
Currently the following configuration files in the root directory are used:
|
||||
|
||||
- ``.pre-commit-config.yaml``: Configuration for `pre-commit <https://pre-commit.com/>`_.
|
||||
We use pre-commit to manage and orchestrate formatters and linters.
|
||||
- ``.clang-format``: Configuration for `clang-format
|
||||
<https://clang.llvm.org/docs/ClangFormat.html>`_ for formatting C++ files.
|
||||
- ``.style.yapf``: Configuration for `YAPF <https://github.com/google/yapf>`_
|
||||
for formatting Python files.
|
||||
- ``.cmake-format.json``: Configuration for `cmake-format
|
||||
<https://github.com/cheshirekow/cmake_format>`_ for formatting CMake files.
|
||||
|
||||
With these configuration files present ``pre-commit run --all-files`` will
|
||||
install all needed formatters and reformat all files in the repository
|
||||
according to the current configuration.
|
||||
|
||||
.. rubric:: Workflow: Zeek ``master`` branch regularly merged into fork
|
||||
|
||||
If Zeek's master branch is regularly merged into the fork, merge conflicts can
|
||||
be resolved once and their resolution is tracked in the repository. Similarly,
|
||||
we can explicitly reformat the fork once and then merge the upstream branch.
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
## Get and stage latest versions of configuration files from master.
|
||||
git checkout master -- .pre-commit-config.yaml .clang-format .style.yapf .cmake-format.json
|
||||
|
||||
## Reformat fork according to new configuration.
|
||||
pre-commit run -a
|
||||
|
||||
## Record reformatted state of fork.
|
||||
git add -u && git commit -m 'Reformat'
|
||||
|
||||
# Merge in master, resolve merge conflicts as usual.
|
||||
git merge master
|
||||
|
||||
.. rubric:: Workflow: Fork regularly rebased onto Zeek ``master`` branch
|
||||
|
||||
If the target for a rebase has been reformatted individual diff hunks might not
|
||||
apply cleanly anymore. There are different approaches to work around that. The
|
||||
approach with the least conflicts is likely to first reformat the fork
|
||||
according to upstream style without pulling in changes, and only after that
|
||||
rebase on upstream and resolve potential semantic conflicts.
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# Create a commit updating the configuration files.
|
||||
git checkout master -- .pre-commit-config.yaml .clang-format .style.yapf .cmake-format.json
|
||||
git commit -m 'Bump formatter configurations'
|
||||
|
||||
# With a fork branched from upstream at commit FORK_COMMIT, rebase the
|
||||
# config update commit 'Bump formatter configurations' to the start of the
|
||||
# fork, but do not yet rebase on master (interactively move the last patch
|
||||
# to the start of the list of patches).
|
||||
git rebase -i FORK_COMMIT
|
||||
|
||||
# Reformat all commits according to configs at the base. We use the '--exec'
|
||||
# flag of 'git rebase' to execute pre-commit after applying each patch. If
|
||||
# 'git rebase' detects uncommitted changes it stops automatic progress so
|
||||
# one can inspect and apply the changes.
|
||||
git rebase -i FORK_COMMIT --exec 'pre-commit run --all-files'
|
||||
# When this stops, inspect changes and stage them.
|
||||
git add -u
|
||||
# Continue rebasing. This prompts for a commit message and amends the last
|
||||
# patch.
|
||||
git rebase --continue
|
||||
|
||||
# The fork is now formatted according to upstream style. Rebase on master,
|
||||
# and drop the 'Bump formatter configurations' patch from the list of patches.
|
||||
git rebase -i master
|
21
doc/devel/index.rst
Normal file
21
doc/devel/index.rst
Normal file
|
@ -0,0 +1,21 @@
|
|||
|
||||
================
|
||||
Developer Guides
|
||||
================
|
||||
|
||||
In addition to documentation found or mentioned below, some developer-oriented
|
||||
content is maintained directly in the `Zeek wiki
|
||||
<https://github.com/zeek/zeek/wiki#development-guides>`_ due to the nature of
|
||||
the content (e.g. the author finds it to be more dynamic, informal, meta,
|
||||
transient, etc. compared to other documentation).
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
plugins
|
||||
spicy/index
|
||||
websocket-api
|
||||
Documentation Guide </README.rst>
|
||||
contributors
|
||||
maintainers
|
||||
cluster-backend-zeromq
|
13
doc/devel/maintainers.rst
Normal file
13
doc/devel/maintainers.rst
Normal file
|
@ -0,0 +1,13 @@
|
|||
|
||||
==================
|
||||
Maintainer's Guide
|
||||
==================
|
||||
|
||||
Some notable guidelines for maintainers are linked below for convenience, but
|
||||
they are generally maintained directly in the `Zeek wiki
|
||||
<https://github.com/zeek/zeek/wiki#maintainers>`_.
|
||||
|
||||
Release Process
|
||||
===============
|
||||
|
||||
See https://github.com/zeek/zeek/wiki/Release-Process
|
505
doc/devel/plugins.rst
Normal file
505
doc/devel/plugins.rst
Normal file
|
@ -0,0 +1,505 @@
|
|||
.. _zkg package manager: https://docs.zeek.org/projects/package-manager/en/stable/
|
||||
|
||||
.. _writing-plugins:
|
||||
|
||||
===============
|
||||
Writing Plugins
|
||||
===============
|
||||
|
||||
Zeek provides a plugin API that enables extending
|
||||
the system dynamically, without modifying the core code base. That way,
|
||||
custom code remains self-contained and can be maintained, compiled,
|
||||
and installed independently. Currently, plugins can add the following
|
||||
functionality to Zeek:
|
||||
|
||||
- Zeek scripts.
|
||||
|
||||
- Builtin functions/events/types for the scripting language.
|
||||
|
||||
- Protocol analyzers.
|
||||
|
||||
- File analyzers.
|
||||
|
||||
- Packet sources and packet dumpers.
|
||||
|
||||
- Logging framework backends.
|
||||
|
||||
- Input framework readers.
|
||||
|
||||
A plugin's functionality is available to the user just as if Zeek had
|
||||
the corresponding code built-in. Indeed, internally many of Zeek's
|
||||
pieces are structured as plugins as well, they are just statically
|
||||
compiled into the binary rather than loaded dynamically at runtime.
|
||||
|
||||
.. note::
|
||||
|
||||
Plugins and Zeek packages are related but separate concepts. Both extend
|
||||
Zeek's functionality without modifying Zeek's source code. A plugin achieves
|
||||
this via compiled, native code that Zeek links into its core at runtime. A Zeek
|
||||
package, on the other hand, is a modular addition to Zeek, managed via the
|
||||
`zkg package manager`_, that may or may not include a plugin. More commonly,
|
||||
packages consist of script-layer additions to Zeek's functionality. Packages
|
||||
also feature more elaborate metadata, enabling dependencies on other packages,
|
||||
Zeek versions, etc.
|
||||
|
||||
Quick Start
|
||||
===========
|
||||
|
||||
Writing a basic plugin is quite straight-forward as long as one
|
||||
follows a few conventions. In the following, we create a simple example
|
||||
plugin that adds a new Built-In Function (BIF) to Zeek: we'll add
|
||||
``rot13(s: string) : string``, a function that rotates every letter
|
||||
in a string by 13 places.
|
||||
|
||||
Generally, a plugin comes in the form of a directory following a
|
||||
certain structure. To get started, Zeek's distribution provides a
|
||||
helper script ``auxil/zeek-aux/plugin-support/init-plugin`` that creates
|
||||
a skeleton plugin that can then be customized. Let's use that::
|
||||
|
||||
# init-plugin ./rot13-plugin Demo Rot13
|
||||
|
||||
As you can see, the script takes three arguments. The first is a
|
||||
directory inside which the plugin skeleton will be created. The second
|
||||
is the namespace the plugin will live in, and the third is a descriptive
|
||||
name for the plugin itself relative to the namespace. Zeek uses the
|
||||
combination of namespace and name to identify a plugin. The namespace
|
||||
serves to avoid naming conflicts between plugins written by independent
|
||||
developers; pick, e.g., the name of your organisation. The namespaces
|
||||
``Bro`` (legacy) and ``Zeek`` are reserved for functionality distributed
|
||||
by the Zeek Project. In
|
||||
our example, the plugin will be called ``Demo::Rot13``.
|
||||
|
||||
The ``init-plugin`` script puts a number of files in place. The full
|
||||
layout is described later. For now, all we need is
|
||||
``src/rot13.bif``. It's initially empty, but we'll add our new BIF
|
||||
there as follows::
|
||||
|
||||
# cat src/rot13.bif
|
||||
%%{
|
||||
#include <cstring>
|
||||
#include <cctype>
|
||||
#include "zeek/util.h"
|
||||
#include "zeek/ZeekString.h"
|
||||
#include "zeek/Val.h"
|
||||
%%}
|
||||
|
||||
module Demo;
|
||||
|
||||
function rot13%(s: string%) : string
|
||||
%{
|
||||
char* rot13 = util::copy_string(s->CheckString());
|
||||
|
||||
for ( char* p = rot13; *p; p++ )
|
||||
{
|
||||
char b = islower(*p) ? 'a' : 'A';
|
||||
char d = *p - b + 13;
|
||||
|
||||
if ( d >= 13 && d <= 38 )
|
||||
*p = d % 26 + b;
|
||||
}
|
||||
|
||||
zeek::String* zs = new zeek::String(1, reinterpret_cast<byte_vec>(rot13),
|
||||
strlen(rot13));
|
||||
return make_intrusive<StringVal>(zs);
|
||||
%}
|
||||
|
||||
The syntax of this file is just like any other ``*.bif`` file; we
|
||||
won't go into it here.
|
||||
|
||||
Now we are ready to compile our plugin. The configure script will just
|
||||
need to be able to find the location of either a Zeek installation-tree or
|
||||
a Zeek source-tree.
|
||||
|
||||
When building a plugin against a Zeek installation-tree, simply have the
|
||||
installation's associated ``zeek-config`` in your :envvar:`PATH` and the
|
||||
configure script will detect it and use it to obtain all the information
|
||||
it needs::
|
||||
|
||||
# which zeek-config
|
||||
/usr/local/zeek/bin/zeek-config
|
||||
# cd rot13-plugin
|
||||
# ./configure && make
|
||||
[... cmake output ...]
|
||||
|
||||
When building a plugin against a Zeek source-tree (which itself needs
|
||||
to have first been built), the configure script has to explicitly be
|
||||
told its location::
|
||||
|
||||
# cd rot13-plugin
|
||||
# ./configure --zeek-dist=/path/to/zeek/dist && make
|
||||
[... cmake output ...]
|
||||
|
||||
This builds the plugin in a subdirectory ``build/``. In fact, that
|
||||
subdirectory *becomes* the plugin: when ``make`` finishes, ``build/``
|
||||
has everything it needs for Zeek to recognize it as a dynamic plugin.
|
||||
|
||||
Let's try that. Once we point Zeek to the ``build/`` directory, it will
|
||||
pull in our new plugin automatically, as we can check with the ``-N``
|
||||
option::
|
||||
|
||||
# export ZEEK_PLUGIN_PATH=/path/to/rot13-plugin/build
|
||||
# zeek -N
|
||||
[...]
|
||||
Demo::Rot13 - <Insert description> (dynamic, version 0.1.0)
|
||||
[...]
|
||||
|
||||
That looks quite good, except for the dummy description that we should
|
||||
replace with something nicer so that users will know what our plugin
|
||||
is about. We do this by editing the ``config.description`` line in
|
||||
``src/Plugin.cc``, like this::
|
||||
|
||||
[...]
|
||||
plugin::Configuration Plugin::Configure()
|
||||
{
|
||||
plugin::Configuration config;
|
||||
config.name = "Demo::Rot13";
|
||||
config.description = "Caesar cipher rotating a string's letters by 13 places.";
|
||||
config.version.major = 0;
|
||||
config.version.minor = 1;
|
||||
config.version.patch = 0;
|
||||
return config;
|
||||
}
|
||||
[...]
|
||||
|
||||
Now rebuild and verify that the description is visible::
|
||||
|
||||
# make
|
||||
[...]
|
||||
# zeek -N | grep Rot13
|
||||
Demo::Rot13 - Caesar cipher rotating a string's letters by 13 places. (dynamic, version 0.1.0)
|
||||
|
||||
Zeek can also show us what exactly the plugin provides with the
|
||||
more verbose option ``-NN``::
|
||||
|
||||
# zeek -NN
|
||||
[...]
|
||||
Demo::Rot13 - Caesar cipher rotating a string's letters by 13 places. (dynamic, version 0.1.0)
|
||||
[Function] Demo::rot13
|
||||
[...]
|
||||
|
||||
There's our function. Now let's use it::
|
||||
|
||||
# zeek -e 'print Demo::rot13("Hello")'
|
||||
Uryyb
|
||||
|
||||
It works. We next install the plugin along with Zeek itself, so that it
|
||||
will find it directly without needing the ``ZEEK_PLUGIN_PATH``
|
||||
environment variable. If we first unset the variable, the function
|
||||
will no longer be available::
|
||||
|
||||
# unset ZEEK_PLUGIN_PATH
|
||||
# zeek -e 'print Demo::rot13("Hello")'
|
||||
error in <command line>, line 1: unknown identifier Demo::rot13, at or near "Demo::rot13"
|
||||
|
||||
Once we install it, it works again::
|
||||
|
||||
# make install
|
||||
# zeek -e 'print Demo::rot13("Hello")'
|
||||
Uryyb
|
||||
|
||||
The installed version went into
|
||||
``<zeek-install-prefix>/lib/zeek/plugins/Demo_Rot13``.
|
||||
|
||||
One can distribute the plugin independently of Zeek for others to use.
|
||||
To distribute in source form, just remove the ``build/`` directory
|
||||
(``make distclean`` does that) and then tar up the whole ``rot13-plugin/``
|
||||
directory. Others then follow the same process as above after
|
||||
unpacking.
|
||||
|
||||
To distribute the plugin in binary form, the build process
|
||||
conveniently creates a corresponding tarball in ``build/dist/``. In
|
||||
this case, it's called ``Demo_Rot13-0.1.0.tar.gz``, with the version
|
||||
number coming out of the ``VERSION`` file that ``init-plugin`` put
|
||||
into place. The binary tarball has everything needed to run the
|
||||
plugin, but no further source files. Optionally, one can include
|
||||
further files by specifying them in the plugin's ``CMakeLists.txt``
|
||||
through the ``zeek_plugin_dist_files`` macro; the skeleton does that
|
||||
for ``README``, ``VERSION``, ``CHANGES``, and ``COPYING``. To use the
|
||||
plugin through the binary tarball, just unpack it into
|
||||
``<zeek-install-prefix>/lib/zeek/plugins/``. Alternatively, if you unpack
|
||||
it in another location, then you need to point ``ZEEK_PLUGIN_PATH`` there.
|
||||
|
||||
Before distributing your plugin, you should edit some of the meta
|
||||
files that ``init-plugin`` puts in place. Edit ``README`` and
|
||||
``VERSION``, and update ``CHANGES`` when you make changes. Also put a
|
||||
license file in place as ``COPYING``; if BSD is fine, you will find a
|
||||
template in ``COPYING.edit-me``.
|
||||
|
||||
Plugin Directory Layout
|
||||
=======================
|
||||
|
||||
A plugin's directory needs to follow a set of conventions so that Zeek
|
||||
(1) recognizes it as a plugin, and (2) knows what to load. While
|
||||
``init-plugin`` takes care of most of this, the following is the full
|
||||
story. We'll use ``<base>`` to represent a plugin's top-level
|
||||
directory. With the skeleton, ``<base>`` corresponds to ``build/``.
|
||||
|
||||
``<base>/__zeek_plugin__``
|
||||
A file that marks a directory as containing a Zeek plugin. The file
|
||||
must exist, and its content must consist of a single line with the
|
||||
qualified name of the plugin (e.g., "Demo::Rot13").
|
||||
|
||||
``<base>/lib/<plugin-name>.<os>-<arch>.so``
|
||||
The shared library containing the plugin's compiled code. Zeek will
|
||||
load this in dynamically at run-time if OS and architecture match
|
||||
the current platform.
|
||||
|
||||
``scripts/``
|
||||
A directory with the plugin's custom Zeek scripts. When the plugin
|
||||
gets activated, this directory will be automatically added to
|
||||
``ZEEKPATH``, so that any scripts/modules inside can be
|
||||
"@load"ed.
|
||||
|
||||
``scripts``/__load__.zeek
|
||||
A Zeek script that will be loaded when the plugin gets activated.
|
||||
When this script executes, any BIF elements that the plugin
|
||||
defines will already be available. See below for more information
|
||||
on activating plugins.
|
||||
|
||||
``scripts``/__preload__.zeek
|
||||
A Zeek script that will be loaded when the plugin gets activated,
|
||||
but before any BIF elements become available. See below for more
|
||||
information on activating plugins.
|
||||
|
||||
``lib/bif/``
|
||||
Directory with auto-generated Zeek scripts that declare the plugin's
|
||||
BIF elements. The files here are produced by ``bifcl``.
|
||||
|
||||
Any other files in ``<base>`` are ignored by Zeek.
|
||||
|
||||
By convention, a plugin should put its custom scripts into sub folders
|
||||
of ``scripts/``, i.e., ``scripts/<plugin-namespace>/<plugin-name>/<script>.zeek``
|
||||
to avoid conflicts. As usual, you can then put a ``__load__.zeek`` in
|
||||
there as well so that, e.g., ``@load Demo/Rot13`` could load a whole
|
||||
module in the form of multiple individual scripts.
|
||||
|
||||
Note that in addition to the paths above, the ``init-plugin`` helper
|
||||
puts some more files and directories in place that help with
|
||||
development and installation (e.g., ``CMakeLists.txt``, ``Makefile``,
|
||||
and source code in ``src/``). However, all these do not have a special
|
||||
meaning for Zeek at runtime and aren't necessary for a plugin to
|
||||
function.
|
||||
|
||||
``init-plugin``
|
||||
===============
|
||||
|
||||
``init-plugin`` puts a basic plugin structure in place that follows
|
||||
the above layout and augments it with a CMake build and installation
|
||||
system. Plugins with this structure can be used both directly out of
|
||||
their source directory (after ``make`` and setting Zeek's
|
||||
``ZEEK_PLUGIN_PATH``), and when installed alongside Zeek (after ``make
|
||||
install``).
|
||||
|
||||
Upon completion, ``init-plugin`` initializes a git repository and stages its
|
||||
produced files for committing, but does not yet commit the files. This allows
|
||||
you to tweak the new plugin as needed prior to the initial commit.
|
||||
|
||||
``make install`` copies over the ``lib`` and ``scripts`` directories,
|
||||
as well as the ``__zeek_plugin__`` magic file and any further
|
||||
distribution files specified in ``CMakeLists.txt`` (e.g., README,
|
||||
VERSION). You can find a full list of files installed in
|
||||
``build/MANIFEST``. Behind the scenes, ``make install`` really just
|
||||
unpacks the binary tarball from ``build/dist`` into the destination
|
||||
directory.
|
||||
|
||||
``init-plugin`` will never overwrite existing files. If its target
|
||||
directory already exists, it will by default decline to do anything.
|
||||
You can run it with ``-u`` instead to update an existing plugin,
|
||||
however it will never overwrite any existing files; it will only put
|
||||
in place files it doesn't find yet. To revert a file back to what
|
||||
``init-plugin`` created originally, delete it first and then rerun
|
||||
with ``-u``.
|
||||
|
||||
``init-plugin`` puts a ``configure`` script in place that wraps
|
||||
``cmake`` with a more familiar configure-style configuration. By
|
||||
default, the script provides two options for specifying paths to the
|
||||
Zeek source (``--zeek-dist``) and to the plugin's installation directory
|
||||
(``--install-root``). To extend ``configure`` with plugin-specific
|
||||
options (such as search paths for its dependencies) don't edit the
|
||||
script directly but instead extend ``configure.plugin``, which
|
||||
``configure`` includes. That way you will be able to more easily
|
||||
update ``configure`` in the future when the distribution version
|
||||
changes. In ``configure.plugin`` you can use the predefined shell
|
||||
function ``append_cache_entry`` to seed values into the CMake cache;
|
||||
see the installed skeleton version and existing plugins for examples.
|
||||
|
||||
.. note::
|
||||
|
||||
In the past ``init-plugin`` also generated a ``zkg.meta`` file, automatically
|
||||
creating a Zeek package containing a plugin. ``init-plugin`` now focuses
|
||||
purely on plugins, as its name suggests. To bootstrap new Zeek packages
|
||||
(possibly containing plugins), use the more featureful templating
|
||||
functionality provided by the ``zkg create`` command, explained `here
|
||||
<https://docs.zeek.org/projects/package-manager/en/stable/package.html>`_.
|
||||
|
||||
Activating a Plugin
|
||||
===================
|
||||
|
||||
A plugin needs to be *activated* to make it available to the user.
|
||||
Activating a plugin will:
|
||||
|
||||
1. Load the dynamic module
|
||||
2. Make any BIF items available
|
||||
3. Add the ``scripts/`` directory to ``ZEEKPATH``
|
||||
4. Load ``scripts/__preload__.zeek``
|
||||
5. Make BIF elements available to scripts.
|
||||
6. Load ``scripts/__load__.zeek``
|
||||
|
||||
By default, Zeek will automatically activate all dynamic plugins found
|
||||
in its search path ``ZEEK_PLUGIN_PATH``. However, in bare mode (``zeek
|
||||
-b``), no dynamic plugins will be activated by default; instead the
|
||||
user can selectively enable individual plugins in scriptland using the
|
||||
``@load-plugin <qualified-plugin-name>`` directive (e.g.,
|
||||
``@load-plugin Demo::Rot13``). Alternatively, one can activate a
|
||||
plugin from the command-line by specifying its full name
|
||||
(``Demo::Rot13``), or set the environment variable
|
||||
``ZEEK_PLUGIN_ACTIVATE`` to a list of comma-separated names of
|
||||
plugins to unconditionally activate, even in bare mode.
|
||||
|
||||
``zeek -N`` shows activated plugins separately from found but not yet
|
||||
activated plugins. Note that plugins compiled statically into Zeek are
|
||||
always activated, and hence show up as such even in bare mode.
|
||||
|
||||
Plugin Components
|
||||
=================
|
||||
|
||||
It's easy for a plugin to provide custom scripts: just put them into
|
||||
``scripts/``, as described above. The CMake infrastructure will automatically
|
||||
install them, as well include them into the source and binary plugin
|
||||
distributions.
|
||||
|
||||
Any number or combination of other components can be provided by a single
|
||||
plugin. For example a plugin can provide multiple different protocol
|
||||
analyzers, or both a log writer and input reader.
|
||||
|
||||
The best place to look for examples or templates for a specific type of plugin
|
||||
component are the source code of Zeek itself since every one of its components
|
||||
uses the same API as any external plugin.
|
||||
|
||||
Each component type also has a simple integration test, found
|
||||
in the Zeek source-tree's ``testing/btest/plugins/`` directory,
|
||||
that can serve useful for creating basic plugin skeletons.
|
||||
|
||||
Testing Plugins
|
||||
===============
|
||||
|
||||
A plugin should come with a test suite to exercise its functionality.
|
||||
The ``init-plugin`` script puts in place a basic
|
||||
`BTest <https://github.com/zeek/btest>`_ setup
|
||||
to start with. Initially, it comes with a single test that just checks
|
||||
that Zeek loads the plugin correctly::
|
||||
|
||||
# cd tests
|
||||
# btest -A
|
||||
[ 0%] rot13.show-plugin ... ok
|
||||
all 1 tests successful
|
||||
|
||||
You can also run this via the Makefile::
|
||||
|
||||
# cd ..
|
||||
# make test
|
||||
make -C tests
|
||||
make[1]: Entering directory `tests'
|
||||
all 1 tests successful
|
||||
make[1]: Leaving directory `tests'
|
||||
|
||||
Now let's add a custom test that ensures that our BIF works correctly::
|
||||
|
||||
# cd tests
|
||||
# cat >rot13/bif-rot13.zeek
|
||||
|
||||
# @TEST-EXEC: zeek %INPUT >output
|
||||
# @TEST-EXEC: btest-diff output
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
print Demo::rot13("Hello");
|
||||
}
|
||||
|
||||
Check the output::
|
||||
|
||||
# btest -d rot13/bif-rot13.zeek
|
||||
[ 0%] rot13.bif-rot13 ... failed
|
||||
% 'btest-diff output' failed unexpectedly (exit code 100)
|
||||
% cat .diag
|
||||
== File ===============================
|
||||
Uryyb
|
||||
== Error ===============================
|
||||
test-diff: no baseline found.
|
||||
=======================================
|
||||
|
||||
% cat .stderr
|
||||
|
||||
1 of 1 test failed
|
||||
|
||||
Install the baseline::
|
||||
|
||||
# btest -U rot13/bif-rot13.zeek
|
||||
all 1 tests successful
|
||||
|
||||
Run the test-suite::
|
||||
|
||||
# btest
|
||||
all 2 tests successful
|
||||
|
||||
Debugging Plugins
|
||||
=================
|
||||
|
||||
If your plugin isn't loading as expected, Zeek's debugging facilities
|
||||
can help illuminate what's going on. To enable, recompile Zeek
|
||||
with debugging support (``./configure --enable-debug``), and
|
||||
afterwards rebuild your plugin as well. If you then run Zeek with ``-B
|
||||
plugins``, it will produce a file :file:`debug.log` that records details
|
||||
about the process for searching, loading, and activating plugins.
|
||||
|
||||
To generate your own debugging output from inside your plugin, you can
|
||||
add a custom debug stream by using the ``PLUGIN_DBG_LOG(<plugin>,
|
||||
<args>)`` macro (defined in ``DebugLogger.h``), where ``<plugin>`` is
|
||||
the ``Plugin`` instance and ``<args>`` are printf-style arguments,
|
||||
just as with Zeek's standard debugging macros (grep for ``DBG_LOG`` in
|
||||
Zeek's ``src/`` to see examples). At runtime, you can then activate
|
||||
your plugin's debugging output with ``-B plugin-<name>``, where
|
||||
``<name>`` is the name of the plugin as returned by its
|
||||
``Configure()`` method, yet with the namespace-separator ``::``
|
||||
replaced with a simple dash. Example: If the plugin is called
|
||||
``Demo::Rot13``, use ``-B plugin-Demo-Rot13``. As usual, the debugging
|
||||
output will be recorded to :file:`debug.log` if Zeek's compiled in debug
|
||||
mode.
|
||||
|
||||
.. _building-plugins-statically:
|
||||
|
||||
Building Plugins Statically into Zeek
|
||||
=====================================
|
||||
|
||||
Plugins can be built statically into a Zeek binary using the
|
||||
``--include-plugins`` option passed to Zeek's ``configure``. This argument
|
||||
takes a semicolon-separated list of absolute paths to plugin sources. Each
|
||||
path needs to contain a ``CMakeLists.txt`` file, as is commonly the case at the
|
||||
toplevel of plugin source trees, and usually also in Zeek packages. Building
|
||||
plugins in this manner includes them directly into the Zeek binary
|
||||
and installation. They are loaded automatically by Zeek at startup
|
||||
without needing to install them separately.
|
||||
|
||||
Building plugins into Zeek is a handy way to build them consistently with
|
||||
sanitizers, as you can use Zeek's existing ``./configure --sanitizers=...``
|
||||
infrastructure to apply transparently to built-in plugins.
|
||||
|
||||
The configure run lists built-in plugins at the end, so you can verify
|
||||
successful inclusion of your plugin there. Your plugin should also
|
||||
show up in the resulting build's ``zeek -NN`` output.
|
||||
|
||||
Headers for built-in plugins are installed into a subdirectory of
|
||||
``<zeek-install-prefix>/include/zeek/builtin-plugins`` specific to
|
||||
each plugin. Scripts are installed into a subdirectory of
|
||||
``<zeek-install-prefix>/share/zeek/builtin-plugins`` specific to
|
||||
each plugin. The scripts directory is also automatically added to
|
||||
the default ``ZEEKPATH``.
|
||||
|
||||
Plugin Tutorials
|
||||
================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
plugins/connkey-plugin
|
||||
plugins/event-metadata-plugin
|
205
doc/devel/plugins/connkey-plugin.rst
Normal file
205
doc/devel/plugins/connkey-plugin.rst
Normal file
|
@ -0,0 +1,205 @@
|
|||
.. _connkey-plugin:
|
||||
|
||||
===============================
|
||||
Writing a Connection Key Plugin
|
||||
===============================
|
||||
|
||||
.. versionadded:: 8.0
|
||||
|
||||
By default, Zeek looks up internal connection state using the classic five-tuple
|
||||
of originator and responder IP addresses, ports, and the numeric protocol
|
||||
identifier (for TCP, UDP, etc). Zeek's data structure driving this is called a
|
||||
connection key, or ``ConnKey``.
|
||||
|
||||
In certain environments the classic five-tuple does not sufficiently distinguish
|
||||
connections. Consider traffic mirrored from multiple VLANs with overlapping IP
|
||||
address ranges. Concretely, a connection between 10.0.0.1 and 10.0.0.2 in one
|
||||
VLAN is distinct from a connection between the same IPs in another VLAN. Here,
|
||||
Zeek should include the VLAN identifier into the connection key, and you can
|
||||
instruct Zeek to do so by loading the
|
||||
:doc:`/scripts/policy/frameworks/conn_key/vlan_fivetuple.zeek` policy script.
|
||||
|
||||
Zeek's plugin API allows adding support for additional custom connection keys.
|
||||
This section provides a tutorial on how to do so, using the example of VXLAN-enabled
|
||||
flow tuples. If you're not familiar with plugin development, head over to the
|
||||
:ref:`Writing Plugins <writing-plugins>` section.
|
||||
|
||||
Our goal is to implement a custom connection key to scope connections
|
||||
transported within a `VXLAN <https://datatracker.ietf.org/doc/html/rfc7348/index.html>`_
|
||||
tunnel by the VXLAN Network Identifier (VNI).
|
||||
|
||||
As a test case, we have encapsulated the `HTTP GET trace <https://github.com/zeek/zeek/raw/refs/heads/master/testing/btest/Traces/http/get.trace>`_
|
||||
from the Zeek repository twice with VXLAN using VNIs 4711 and 4242, respectively,
|
||||
and merged the resulting two PCAP files with the original PCAP.
|
||||
The :download:`resulting PCAP <connkey-vxlan-fivetuple-plugin-src/Traces/vxlan-overlapping-http-get.pcap>`
|
||||
contains three HTTP connections, two of which are VXLAN-encapsulated.
|
||||
|
||||
By default, Zeek will create the same connection key for the original and
|
||||
encapsulated HTTP connections, since they have identical inner five-tuples.
|
||||
Therefore, Zeek creates only a single ``http.log`` entry, and two entries
|
||||
in ``conn.log``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ zeek -C -r Traces/vxlan-overlapping-http-get.pcap
|
||||
$ zeek-cut -m uid method host uri < http.log
|
||||
uid method host uri
|
||||
CpWF5etn1l2rpaLu3 GET bro.org /download/CHANGES.bro-aux.txt
|
||||
|
||||
$ zeek-cut -m uid service history orig_pkts resp_pkts < conn.log
|
||||
uid service history orig_pkts resp_pkts
|
||||
Cq2CY245oGGbibJ8k9 http ShADTadtFf 21 21
|
||||
CMleDu4xANIMzePYd7 vxlan D 28 0
|
||||
|
||||
Note that just two of the HTTP connections are encapsulated.
|
||||
That is why the VXLAN connection shows only 28 packets.
|
||||
Each HTTP connection has 14 packets total, 7 in each direction. Zeek aggregates
|
||||
all packets into the single HTTP connection, but only 28 of them were
|
||||
transported within the VXLAN tunnel connection. Note also the ``t`` and ``T``
|
||||
flags in the :zeek:field:`Conn::Info$history` field. These stand for retransmissions,
|
||||
caused by Zeek not discriminating between the different HTTP connections.
|
||||
|
||||
The plugin we'll develop below adds the VXLAN VNI to the connection key.
|
||||
As a result, Zeek will correctly report three HTTP connections, tracked
|
||||
and logged separately. We'll add the VNI as
|
||||
:zeek:field:`vxlan_vni` to the :zeek:see:`conn_id_ctx` record, making it available
|
||||
in ``http.log`` and ``conn.log`` via the ``id.ctx.vxlan_vni`` column.
|
||||
|
||||
After activating the plugin Zeek tracks each HTTP connection individually and
|
||||
the logs will look as follows:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ zeek-cut -m uid method host uri id.ctx.vxlan_vni < http.log
|
||||
uid method host uri id.ctx.vxlan_vni
|
||||
CBifsS2vqGEg8Fa5ac GET bro.org /download/CHANGES.bro-aux.txt 4711
|
||||
CEllEz13txeSrbGqBe GET bro.org /download/CHANGES.bro-aux.txt 4242
|
||||
CRfbJw1kBBvHDQQBta GET bro.org /download/CHANGES.bro-aux.txt -
|
||||
|
||||
$ zeek-cut -m uid service history orig_pkts resp_pkts id.ctx.vxlan_vni < conn.log
|
||||
uid service history orig_pkts resp_pkts id.ctx.vxlan_vni
|
||||
CRfbJw1kBBvHDQQBta http ShADadFf 7 7 -
|
||||
CEllEz13txeSrbGqBe http ShADadFf 7 7 4242
|
||||
CBifsS2vqGEg8Fa5ac http ShADadFf 7 7 4711
|
||||
CC6Ald2LejCS1qcDy4 vxlan D 28 0 -
|
||||
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
Adding alternative connection keys involves implementing two classes.
|
||||
First, a factory class producing ``zeek::ConnKey`` instances. This
|
||||
is the class created through the added ``zeek::conn_key::Component``.
|
||||
Second, a custom connection key class derived from ``zeek::ConnKey``.
|
||||
Instances of this class are created by the factory. This is a typical
|
||||
abstract factory pattern.
|
||||
|
||||
Our plugin's ``Configure()`` method follows the standard pattern of setting up
|
||||
basic information about the plugin and registering our own ``ConnKey`` component.
|
||||
|
||||
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/src/Plugin.cc
|
||||
:caption: Plugin.cc
|
||||
:language: cpp
|
||||
:lines: 16-
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
|
||||
Next, in the ``Factory.cc`` file, we're implementing a custom ``zeek::ConnKey`` class.
|
||||
This class is named ``VxlanVniConnKey`` and inherits from ``zeek::IPBasedConnKey``.
|
||||
While ``zeek::ConnKey`` is technically the base class, in this tutorial we'll
|
||||
derive from ``zeek::IPBasedConnKey``.
|
||||
Currently, Zeek only supports IP-based connection tracking via the
|
||||
``IPBasedAnalyzer`` analyzer. This analyzer requires ``zeek::IPBasedConnKey``
|
||||
instances.
|
||||
|
||||
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/src/Factory.cc
|
||||
:caption: VxlanVniConnKey class in Factory.cc
|
||||
:language: cpp
|
||||
:linenos:
|
||||
:lines: 18-78
|
||||
:tab-width: 4
|
||||
|
||||
The current pattern for custom connection keys is to embed the bytes used for
|
||||
the ``zeek::session::detail::Key`` as a packed struct within a ``ConnKey`` instance.
|
||||
We override ``DoPopulateConnIdVal()`` to set the :zeek:field:`vxlan_vni` field
|
||||
of the :zeek:see:`conn_id_ctx` record value to the extracted VXLAN VNI. A small trick
|
||||
employed is that we default the most significant byte of ``key.vxlan_vni`` to 0xFF.
|
||||
As a VNI has only 24 bits, this allows us to determine if a VNI was actually
|
||||
extracted, or whether it remained unset.
|
||||
|
||||
The ``DoInit()`` implementation is the actual place for connection key customization.
|
||||
This is where we extract the VXLAN VNI from packet data. To do so, we're using the relatively
|
||||
new ``GetAnalyzerData()`` API of the packet analysis manager.
|
||||
This API allows generic access to the raw data layers analyzed by a give packet analyzer.
|
||||
For our use-case, we take the most outer VXLAN layer, if any, and extract the VNI
|
||||
into ``key.vxlan_vni``.
|
||||
|
||||
There's no requirement to use the ``GetAnalyzerData()`` API. If the ``zeek::Packet``
|
||||
instance passed to ``DoInit()`` contains the needed information, e.g. VLAN identifiers
|
||||
or information from the packet's raw bytes, you can use them directly.
|
||||
Specifically, ``GetAnalyzerData()`` may introduce additional overhead into the
|
||||
packet path that you can avoid if the information is readily available
|
||||
elsewhere.
|
||||
Using other Zeek APIs to determine connection key information is of course
|
||||
also possible.
|
||||
|
||||
The next part shown concerns the ``Factory`` class itself. The
|
||||
``DoConnKeyFromVal()`` method contains logic to produce a ``VxlanVniConnKey``
|
||||
instance from an existing :zeek:see:`conn_id` record.
|
||||
This is needed in order for the :zeek:see:`lookup_connection` builtin function to work properly.
|
||||
The implementation re-uses the ``DoConnKeyFromVal()`` implementation of the
|
||||
default ``fivetuple::Factory`` that our factory inherits from to extract the
|
||||
classic five-tuple information.
|
||||
|
||||
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/src/Factory.cc
|
||||
:caption: Factory class in Factory.cc
|
||||
:language: cpp
|
||||
:linenos:
|
||||
:lines: 80-103
|
||||
:tab-width: 4
|
||||
|
||||
Calling the ``fivetuple::Factory::DoConnKeyFromVal()`` in turn calls our
|
||||
own factory's ``DoNewConnKey()`` method through virtual dispatch. Since our
|
||||
factory overrides this method to always return a ``VxlanVniConnKey`` instance,
|
||||
the static cast later is safe.
|
||||
|
||||
Last, the plugin's ``__load__.zeek`` file is shown. It includes the extension
|
||||
of the :zeek:see:`conn_id_ctx` identifier by the :zeek:field:`vxlan_vni` field.
|
||||
|
||||
.. literalinclude:: connkey-vxlan-fivetuple-plugin-src/scripts/__load__.zeek
|
||||
:caption: The conn_id redefinition in __load__.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
|
||||
Using the custom Connection Key
|
||||
===============================
|
||||
|
||||
After installing the plugin, the new connection key implementation can be
|
||||
selected by redefining the script-level :zeek:see:`ConnKey::factory` variable.
|
||||
This can either be done in a separate script, but we do it directly on the
|
||||
command-line for simplicity. The ``ConnKey::CONNKEY_VXLAN_VNI_FIVETUPLE`` is
|
||||
registered in Zeek during the plugin's ``AddComponent()`` call during
|
||||
``Configure()``, where the component has the name ``VXLAN_VNI_FIVETUPLE``.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ zeek -C -r Traces/vxlan-overlapping-http-get.pcap ConnKey::factory=ConnKey::CONNKEY_VXLAN_VNI_FIVETUPLE
|
||||
|
||||
|
||||
Viewing the ``conn.log`` now shows three separate HTTP connections,
|
||||
two of which have a ``vxlan_vni`` value set in their logs.
|
||||
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ zeek-cut -m uid service history orig_pkts resp_pkts id.ctx.vxlan_vni < conn.log
|
||||
uid service history orig_pkts resp_pkts id.ctx.vxlan_vni
|
||||
CRfbJw1kBBvHDQQBta http ShADadFf 7 7 -
|
||||
CEllEz13txeSrbGqBe http ShADadFf 7 7 4242
|
||||
CBifsS2vqGEg8Fa5ac http ShADadFf 7 7 4711
|
||||
CC6Ald2LejCS1qcDy4 vxlan D 28 0 -
|
||||
|
||||
Pretty cool, isn't it?
|
|
@ -0,0 +1,9 @@
|
|||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
|
||||
project(ZeekPluginConnKeyVxlanVniFivetuple)
|
||||
|
||||
include(ZeekPlugin)
|
||||
|
||||
zeek_add_plugin(
|
||||
Zeek ConnKey_Vxlan_Vni_Fivetuple
|
||||
SOURCES src/Factory.cc src/Plugin.cc SCRIPT_FILES scripts/__load__.zeek)
|
26
doc/devel/plugins/connkey-vxlan-fivetuple-plugin-src/COPYING
Normal file
26
doc/devel/plugins/connkey-vxlan-fivetuple-plugin-src/COPYING
Normal file
|
@ -0,0 +1,26 @@
|
|||
Copyright (c) 2025 by the Zeek Project. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Convenience Makefile providing a few common top-level targets.
|
||||
#
|
||||
|
||||
cmake_build_dir=build
|
||||
arch=`uname -s | tr A-Z a-z`-`uname -m`
|
||||
|
||||
all: build-it
|
||||
|
||||
build-it:
|
||||
( cd $(cmake_build_dir) && make )
|
||||
|
||||
install:
|
||||
( cd $(cmake_build_dir) && make install )
|
||||
|
||||
clean:
|
||||
( cd $(cmake_build_dir) && make clean )
|
||||
|
||||
distclean:
|
||||
rm -rf $(cmake_build_dir)
|
||||
|
||||
test:
|
||||
make -C tests
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
0.1.0
|
193
doc/devel/plugins/connkey-vxlan-fivetuple-plugin-src/configure
vendored
Executable file
193
doc/devel/plugins/connkey-vxlan-fivetuple-plugin-src/configure
vendored
Executable file
|
@ -0,0 +1,193 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Wrapper for viewing/setting options that the plugin's CMake
|
||||
# scripts will recognize.
|
||||
#
|
||||
# Don't edit this. Edit configure.plugin to add plugin-specific options.
|
||||
#
|
||||
|
||||
set -e
|
||||
command="$0 $*"
|
||||
|
||||
if [ -e $(dirname $0)/configure.plugin ]; then
|
||||
# Include custom additions.
|
||||
. $(dirname $0)/configure.plugin
|
||||
fi
|
||||
|
||||
usage() {
|
||||
|
||||
cat 1>&2 <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Plugin Options:
|
||||
--cmake=PATH Path to CMake binary
|
||||
--zeek-dist=DIR Path to Zeek source tree
|
||||
--install-root=DIR Path where to install plugin into
|
||||
--with-binpac=DIR Path to BinPAC installation root
|
||||
--with-broker=DIR Path to Broker installation root
|
||||
--with-bifcl=PATH Path to bifcl executable
|
||||
--enable-debug Compile in debugging mode
|
||||
--disable-cpp-tests Don't build C++ unit tests
|
||||
EOF
|
||||
|
||||
if type plugin_usage >/dev/null 2>&1; then
|
||||
plugin_usage 1>&2
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Function to append a CMake cache entry definition to the
|
||||
# CMakeCacheEntries variable
|
||||
# $1 is the cache entry variable name
|
||||
# $2 is the cache entry variable type
|
||||
# $3 is the cache entry variable value
|
||||
append_cache_entry() {
|
||||
CMakeCacheEntries="$CMakeCacheEntries -D $1:$2=$3"
|
||||
}
|
||||
|
||||
# set defaults
|
||||
builddir=build
|
||||
zeekdist=""
|
||||
installroot="default"
|
||||
zeek_plugin_begin_opts=""
|
||||
CMakeCacheEntries=""
|
||||
|
||||
while [ $# -ne 0 ]; do
|
||||
case "$1" in
|
||||
-*=*) optarg=$(echo "$1" | sed 's/[-_a-zA-Z0-9]*=//') ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case "$1" in
|
||||
--help | -h)
|
||||
usage
|
||||
;;
|
||||
|
||||
--cmake=*)
|
||||
CMakeCommand=$optarg
|
||||
;;
|
||||
|
||||
--zeek-dist=*)
|
||||
zeekdist=$(cd $optarg && pwd)
|
||||
;;
|
||||
|
||||
--install-root=*)
|
||||
installroot=$optarg
|
||||
;;
|
||||
|
||||
--with-binpac=*)
|
||||
append_cache_entry BinPAC_ROOT_DIR PATH $optarg
|
||||
binpac_root=$optarg
|
||||
;;
|
||||
|
||||
--with-broker=*)
|
||||
append_cache_entry BROKER_ROOT_DIR PATH $optarg
|
||||
broker_root=$optarg
|
||||
;;
|
||||
|
||||
--with-bifcl=*)
|
||||
append_cache_entry BifCl_EXE PATH $optarg
|
||||
;;
|
||||
|
||||
--enable-debug)
|
||||
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
|
||||
;;
|
||||
|
||||
--disable-cpp-tests)
|
||||
zeek_plugin_begin_opts="DISABLE_CPP_TESTS;$zeek_plugin_begin_opts"
|
||||
;;
|
||||
|
||||
*)
|
||||
if type plugin_option >/dev/null 2>&1; then
|
||||
plugin_option $1 && shift && continue
|
||||
fi
|
||||
|
||||
echo "Invalid option '$1'. Try $0 --help to see available options."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ -z "$CMakeCommand" ]; then
|
||||
# prefer cmake3 over "regular" cmake (cmake == cmake2 on RHEL)
|
||||
if command -v cmake3 >/dev/null 2>&1; then
|
||||
CMakeCommand="cmake3"
|
||||
elif command -v cmake >/dev/null 2>&1; then
|
||||
CMakeCommand="cmake"
|
||||
else
|
||||
echo "This plugin requires CMake, please install it first."
|
||||
echo "Then you may use this script to configure the CMake build."
|
||||
echo "Note: pass --cmake=PATH to use cmake in non-standard locations."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$zeekdist" ]; then
|
||||
if type zeek-config >/dev/null 2>&1; then
|
||||
zeek_config="zeek-config"
|
||||
else
|
||||
echo "Either 'zeek-config' must be in PATH or '--zeek-dist=<path>' used"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
append_cache_entry BRO_CONFIG_PREFIX PATH $(${zeek_config} --prefix)
|
||||
append_cache_entry BRO_CONFIG_INCLUDE_DIR PATH $(${zeek_config} --include_dir)
|
||||
append_cache_entry BRO_CONFIG_PLUGIN_DIR PATH $(${zeek_config} --plugin_dir)
|
||||
append_cache_entry BRO_CONFIG_LIB_DIR PATH $(${zeek_config} --lib_dir)
|
||||
append_cache_entry BRO_CONFIG_CMAKE_DIR PATH $(${zeek_config} --cmake_dir)
|
||||
append_cache_entry CMAKE_MODULE_PATH PATH $(${zeek_config} --cmake_dir)
|
||||
|
||||
build_type=$(${zeek_config} --build_type)
|
||||
|
||||
if [ "$build_type" = "debug" ]; then
|
||||
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
|
||||
fi
|
||||
|
||||
if [ -z "$binpac_root" ]; then
|
||||
append_cache_entry BinPAC_ROOT_DIR PATH $(${zeek_config} --binpac_root)
|
||||
fi
|
||||
|
||||
if [ -z "$broker_root" ]; then
|
||||
append_cache_entry BROKER_ROOT_DIR PATH $(${zeek_config} --broker_root)
|
||||
fi
|
||||
else
|
||||
if [ ! -e "$zeekdist/zeek-path-dev.in" ]; then
|
||||
echo "$zeekdist does not appear to be a valid Zeek source tree."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# BRO_DIST is the canonical/historical name used by plugin CMake scripts
|
||||
# ZEEK_DIST doesn't serve a function at the moment, but set/provided anyway
|
||||
append_cache_entry BRO_DIST PATH $zeekdist
|
||||
append_cache_entry ZEEK_DIST PATH $zeekdist
|
||||
append_cache_entry CMAKE_MODULE_PATH PATH $zeekdist/cmake
|
||||
fi
|
||||
|
||||
if [ "$installroot" != "default" ]; then
|
||||
mkdir -p $installroot
|
||||
append_cache_entry BRO_PLUGIN_INSTALL_ROOT PATH $installroot
|
||||
fi
|
||||
|
||||
if [ -n "$zeek_plugin_begin_opts" ]; then
|
||||
append_cache_entry ZEEK_PLUGIN_BEGIN_OPTS STRING "$zeek_plugin_begin_opts"
|
||||
fi
|
||||
|
||||
if type plugin_addl >/dev/null 2>&1; then
|
||||
plugin_addl
|
||||
fi
|
||||
|
||||
echo "Build Directory : $builddir"
|
||||
echo "Zeek Source Directory : $zeekdist"
|
||||
|
||||
mkdir -p $builddir
|
||||
cd $builddir
|
||||
|
||||
"$CMakeCommand" $CMakeCacheEntries ..
|
||||
|
||||
echo "# This is the command used to configure this build" >config.status
|
||||
echo $command >>config.status
|
||||
chmod u+x config.status
|
|
@ -0,0 +1,3 @@
|
|||
redef record conn_id_ctx += {
|
||||
vxlan_vni: count &log &optional;
|
||||
};
|
|
@ -0,0 +1 @@
|
|||
# Empty
|
|
@ -0,0 +1,105 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include "Factory.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "zeek/ID.h"
|
||||
#include "zeek/Val.h"
|
||||
#include "zeek/iosource/Packet.h"
|
||||
#include "zeek/packet_analysis/Analyzer.h"
|
||||
#include "zeek/packet_analysis/Manager.h"
|
||||
#include "zeek/packet_analysis/protocol/ip/conn_key/IPBasedConnKey.h"
|
||||
#include "zeek/packet_analysis/protocol/ip/conn_key/fivetuple/Factory.h"
|
||||
#include "zeek/util-types.h"
|
||||
|
||||
namespace zeek::conn_key::vxlan_vni_fivetuple {
|
||||
|
||||
class VxlanVniConnKey : public zeek::IPBasedConnKey {
|
||||
public:
|
||||
VxlanVniConnKey() {
|
||||
// Ensure padding holes in the key struct are filled with zeroes.
|
||||
memset(static_cast<void*>(&key), 0, sizeof(key));
|
||||
}
|
||||
|
||||
detail::PackedConnTuple& PackedTuple() override { return key.tuple; }
|
||||
|
||||
const detail::PackedConnTuple& PackedTuple() const override { return key.tuple; }
|
||||
|
||||
protected:
|
||||
zeek::session::detail::Key DoSessionKey() const override {
|
||||
return {reinterpret_cast<const void*>(&key), sizeof(key), session::detail::Key::CONNECTION_KEY_TYPE};
|
||||
}
|
||||
|
||||
void DoPopulateConnIdVal(zeek::RecordVal& conn_id, zeek::RecordVal& ctx) override {
|
||||
// Base class populates conn_id fields (orig_h, orig_p, resp_h, resp_p)
|
||||
zeek::IPBasedConnKey::DoPopulateConnIdVal(conn_id, ctx);
|
||||
|
||||
if ( conn_id.GetType() != id::conn_id )
|
||||
return;
|
||||
|
||||
if ( (key.vxlan_vni & 0xFF000000) == 0 ) // High-bits unset: Have VNI
|
||||
ctx.Assign(GetVxlanVniOffset(), static_cast<zeek_uint_t>(key.vxlan_vni));
|
||||
else
|
||||
ctx.Remove(GetVxlanVniOffset());
|
||||
}
|
||||
|
||||
// Extract VNI from most outer VXLAN layer.
|
||||
void DoInit(const Packet& pkt) override {
|
||||
static const auto& analyzer = zeek::packet_mgr->GetAnalyzer("VXLAN");
|
||||
|
||||
// Set the high-bits: This is needed because keys can get reused.
|
||||
key.vxlan_vni = 0xFF000000;
|
||||
|
||||
if ( ! analyzer || ! analyzer->IsEnabled() )
|
||||
return;
|
||||
|
||||
auto spans = zeek::packet_mgr->GetAnalyzerData(analyzer);
|
||||
|
||||
if ( spans.empty() || spans[0].size() < 8 )
|
||||
return;
|
||||
|
||||
key.vxlan_vni = spans[0][4] << 16 | spans[0][5] << 8 | spans[0][6];
|
||||
}
|
||||
|
||||
static int GetVxlanVniOffset() {
|
||||
static const auto& conn_id_ctx = zeek::id::find_type<zeek::RecordType>("conn_id_ctx");
|
||||
static int vxlan_vni_offset = conn_id_ctx->FieldOffset("vxlan_vni");
|
||||
return vxlan_vni_offset;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class Factory;
|
||||
|
||||
struct {
|
||||
struct detail::PackedConnTuple tuple;
|
||||
uint32_t vxlan_vni;
|
||||
} __attribute__((packed, aligned)) key; // packed and aligned due to usage for hashing
|
||||
};
|
||||
|
||||
zeek::ConnKeyPtr Factory::DoNewConnKey() const { return std::make_unique<VxlanVniConnKey>(); }
|
||||
|
||||
zeek::expected<zeek::ConnKeyPtr, std::string> Factory::DoConnKeyFromVal(const zeek::Val& v) const {
|
||||
if ( v.GetType() != id::conn_id )
|
||||
return zeek::unexpected<std::string>{"unexpected value type"};
|
||||
|
||||
auto ck = zeek::conn_key::fivetuple::Factory::DoConnKeyFromVal(v);
|
||||
if ( ! ck.has_value() )
|
||||
return ck;
|
||||
|
||||
int vxlan_vni_offset = VxlanVniConnKey::GetVxlanVniOffset();
|
||||
static int ctx_offset = id::conn_id->FieldOffset("ctx");
|
||||
|
||||
auto* k = static_cast<VxlanVniConnKey*>(ck.value().get());
|
||||
auto* ctx = v.AsRecordVal()->GetFieldAs<zeek::RecordVal>(ctx_offset);
|
||||
|
||||
if ( vxlan_vni_offset < 0 )
|
||||
return zeek::unexpected<std::string>{"missing vlxan_vni field"};
|
||||
|
||||
if ( ctx->HasField(vxlan_vni_offset) )
|
||||
k->key.vxlan_vni = ctx->GetFieldAs<zeek::CountVal>(vxlan_vni_offset);
|
||||
|
||||
return ck;
|
||||
}
|
||||
|
||||
} // namespace zeek::conn_key::vxlan_vni_fivetuple
|
|
@ -0,0 +1,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "zeek/ConnKey.h"
|
||||
#include "zeek/packet_analysis/protocol/ip/conn_key/fivetuple/Factory.h"
|
||||
|
||||
namespace zeek::conn_key::vxlan_vni_fivetuple {
|
||||
|
||||
class Factory : public zeek::conn_key::fivetuple::Factory {
|
||||
public:
|
||||
static zeek::conn_key::FactoryPtr Instantiate() { return std::make_unique<Factory>(); }
|
||||
|
||||
private:
|
||||
// Returns a VxlanVniConnKey instance.
|
||||
zeek::ConnKeyPtr DoNewConnKey() const override;
|
||||
zeek::expected<zeek::ConnKeyPtr, std::string> DoConnKeyFromVal(const zeek::Val& v) const override;
|
||||
};
|
||||
|
||||
} // namespace zeek::conn_key::vxlan_vni_fivetuple
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
#include "Plugin.h"
|
||||
|
||||
#include <zeek/conn_key/Component.h>
|
||||
|
||||
#include "Factory.h"
|
||||
|
||||
namespace plugin {
|
||||
namespace Zeek_ConnKey_Vxlan_Vni_Fivetuple {
|
||||
Plugin plugin;
|
||||
}
|
||||
} // namespace plugin
|
||||
|
||||
using namespace plugin::Zeek_ConnKey_Vxlan_Vni_Fivetuple;
|
||||
|
||||
zeek::plugin::Configuration Plugin::Configure() {
|
||||
zeek::plugin::Configuration config;
|
||||
config.name = "Zeek::ConnKey_Vxlan_Vni_Fivetuple";
|
||||
config.description = "ConnKey implementation using the most outer VXLAN VNI";
|
||||
config.version = {0, 1, 0};
|
||||
|
||||
AddComponent(new zeek::conn_key::Component("VXLAN_VNI_FIVETUPLE",
|
||||
zeek::conn_key::vxlan_vni_fivetuple::Factory::Instantiate));
|
||||
|
||||
return config;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <zeek/plugin/Plugin.h>
|
||||
|
||||
namespace plugin {
|
||||
namespace Zeek_ConnKey_Vxlan_Vni_Fivetuple {
|
||||
|
||||
class Plugin : public zeek::plugin::Plugin {
|
||||
protected:
|
||||
zeek::plugin::Configuration Configure() override;
|
||||
};
|
||||
|
||||
extern Plugin plugin;
|
||||
|
||||
} // namespace Zeek_ConnKey_Vxlan_Vni_Fivetuple
|
||||
} // namespace plugin
|
3
doc/devel/plugins/event-metadata-plugin-src/.gitignore
vendored
Normal file
3
doc/devel/plugins/event-metadata-plugin-src/.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
build
|
||||
*.log
|
||||
.state
|
|
@ -0,0 +1,9 @@
|
|||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
|
||||
|
||||
project(ZeekPluginEventLatency)
|
||||
|
||||
include(ZeekPlugin)
|
||||
|
||||
zeek_add_plugin(
|
||||
Zeek EventLatency
|
||||
SOURCES src/Plugin.cc SCRIPT_FILES scripts/__load__.zeek)
|
26
doc/devel/plugins/event-metadata-plugin-src/COPYING
Normal file
26
doc/devel/plugins/event-metadata-plugin-src/COPYING
Normal file
|
@ -0,0 +1,26 @@
|
|||
Copyright (c) 2025 by the Zeek Project. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
23
doc/devel/plugins/event-metadata-plugin-src/Makefile
Normal file
23
doc/devel/plugins/event-metadata-plugin-src/Makefile
Normal file
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Convenience Makefile providing a few common top-level targets.
|
||||
#
|
||||
|
||||
cmake_build_dir=build
|
||||
arch=`uname -s | tr A-Z a-z`-`uname -m`
|
||||
|
||||
all: build-it
|
||||
|
||||
build-it:
|
||||
( cd $(cmake_build_dir) && make )
|
||||
|
||||
install:
|
||||
( cd $(cmake_build_dir) && make install )
|
||||
|
||||
clean:
|
||||
( cd $(cmake_build_dir) && make clean )
|
||||
|
||||
distclean:
|
||||
rm -rf $(cmake_build_dir)
|
||||
|
||||
test:
|
||||
make -C tests
|
0
doc/devel/plugins/event-metadata-plugin-src/README
Normal file
0
doc/devel/plugins/event-metadata-plugin-src/README
Normal file
1
doc/devel/plugins/event-metadata-plugin-src/VERSION
Normal file
1
doc/devel/plugins/event-metadata-plugin-src/VERSION
Normal file
|
@ -0,0 +1 @@
|
|||
0.1.0
|
193
doc/devel/plugins/event-metadata-plugin-src/configure
vendored
Executable file
193
doc/devel/plugins/event-metadata-plugin-src/configure
vendored
Executable file
|
@ -0,0 +1,193 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Wrapper for viewing/setting options that the plugin's CMake
|
||||
# scripts will recognize.
|
||||
#
|
||||
# Don't edit this. Edit configure.plugin to add plugin-specific options.
|
||||
#
|
||||
|
||||
set -e
|
||||
command="$0 $*"
|
||||
|
||||
if [ -e $(dirname $0)/configure.plugin ]; then
|
||||
# Include custom additions.
|
||||
. $(dirname $0)/configure.plugin
|
||||
fi
|
||||
|
||||
usage() {
|
||||
|
||||
cat 1>&2 <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Plugin Options:
|
||||
--cmake=PATH Path to CMake binary
|
||||
--zeek-dist=DIR Path to Zeek source tree
|
||||
--install-root=DIR Path where to install plugin into
|
||||
--with-binpac=DIR Path to BinPAC installation root
|
||||
--with-broker=DIR Path to Broker installation root
|
||||
--with-bifcl=PATH Path to bifcl executable
|
||||
--enable-debug Compile in debugging mode
|
||||
--disable-cpp-tests Don't build C++ unit tests
|
||||
EOF
|
||||
|
||||
if type plugin_usage >/dev/null 2>&1; then
|
||||
plugin_usage 1>&2
|
||||
fi
|
||||
|
||||
echo
|
||||
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Function to append a CMake cache entry definition to the
|
||||
# CMakeCacheEntries variable
|
||||
# $1 is the cache entry variable name
|
||||
# $2 is the cache entry variable type
|
||||
# $3 is the cache entry variable value
|
||||
append_cache_entry() {
|
||||
CMakeCacheEntries="$CMakeCacheEntries -D $1:$2=$3"
|
||||
}
|
||||
|
||||
# set defaults
|
||||
builddir=build
|
||||
zeekdist=""
|
||||
installroot="default"
|
||||
zeek_plugin_begin_opts=""
|
||||
CMakeCacheEntries=""
|
||||
|
||||
while [ $# -ne 0 ]; do
|
||||
case "$1" in
|
||||
-*=*) optarg=$(echo "$1" | sed 's/[-_a-zA-Z0-9]*=//') ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case "$1" in
|
||||
--help | -h)
|
||||
usage
|
||||
;;
|
||||
|
||||
--cmake=*)
|
||||
CMakeCommand=$optarg
|
||||
;;
|
||||
|
||||
--zeek-dist=*)
|
||||
zeekdist=$(cd $optarg && pwd)
|
||||
;;
|
||||
|
||||
--install-root=*)
|
||||
installroot=$optarg
|
||||
;;
|
||||
|
||||
--with-binpac=*)
|
||||
append_cache_entry BinPAC_ROOT_DIR PATH $optarg
|
||||
binpac_root=$optarg
|
||||
;;
|
||||
|
||||
--with-broker=*)
|
||||
append_cache_entry BROKER_ROOT_DIR PATH $optarg
|
||||
broker_root=$optarg
|
||||
;;
|
||||
|
||||
--with-bifcl=*)
|
||||
append_cache_entry BifCl_EXE PATH $optarg
|
||||
;;
|
||||
|
||||
--enable-debug)
|
||||
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
|
||||
;;
|
||||
|
||||
--disable-cpp-tests)
|
||||
zeek_plugin_begin_opts="DISABLE_CPP_TESTS;$zeek_plugin_begin_opts"
|
||||
;;
|
||||
|
||||
*)
|
||||
if type plugin_option >/dev/null 2>&1; then
|
||||
plugin_option $1 && shift && continue
|
||||
fi
|
||||
|
||||
echo "Invalid option '$1'. Try $0 --help to see available options."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ -z "$CMakeCommand" ]; then
|
||||
# prefer cmake3 over "regular" cmake (cmake == cmake2 on RHEL)
|
||||
if command -v cmake3 >/dev/null 2>&1; then
|
||||
CMakeCommand="cmake3"
|
||||
elif command -v cmake >/dev/null 2>&1; then
|
||||
CMakeCommand="cmake"
|
||||
else
|
||||
echo "This plugin requires CMake, please install it first."
|
||||
echo "Then you may use this script to configure the CMake build."
|
||||
echo "Note: pass --cmake=PATH to use cmake in non-standard locations."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$zeekdist" ]; then
|
||||
if type zeek-config >/dev/null 2>&1; then
|
||||
zeek_config="zeek-config"
|
||||
else
|
||||
echo "Either 'zeek-config' must be in PATH or '--zeek-dist=<path>' used"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
append_cache_entry BRO_CONFIG_PREFIX PATH $(${zeek_config} --prefix)
|
||||
append_cache_entry BRO_CONFIG_INCLUDE_DIR PATH $(${zeek_config} --include_dir)
|
||||
append_cache_entry BRO_CONFIG_PLUGIN_DIR PATH $(${zeek_config} --plugin_dir)
|
||||
append_cache_entry BRO_CONFIG_LIB_DIR PATH $(${zeek_config} --lib_dir)
|
||||
append_cache_entry BRO_CONFIG_CMAKE_DIR PATH $(${zeek_config} --cmake_dir)
|
||||
append_cache_entry CMAKE_MODULE_PATH PATH $(${zeek_config} --cmake_dir)
|
||||
|
||||
build_type=$(${zeek_config} --build_type)
|
||||
|
||||
if [ "$build_type" = "debug" ]; then
|
||||
append_cache_entry BRO_PLUGIN_ENABLE_DEBUG BOOL true
|
||||
fi
|
||||
|
||||
if [ -z "$binpac_root" ]; then
|
||||
append_cache_entry BinPAC_ROOT_DIR PATH $(${zeek_config} --binpac_root)
|
||||
fi
|
||||
|
||||
if [ -z "$broker_root" ]; then
|
||||
append_cache_entry BROKER_ROOT_DIR PATH $(${zeek_config} --broker_root)
|
||||
fi
|
||||
else
|
||||
if [ ! -e "$zeekdist/zeek-path-dev.in" ]; then
|
||||
echo "$zeekdist does not appear to be a valid Zeek source tree."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# BRO_DIST is the canonical/historical name used by plugin CMake scripts
|
||||
# ZEEK_DIST doesn't serve a function at the moment, but set/provided anyway
|
||||
append_cache_entry BRO_DIST PATH $zeekdist
|
||||
append_cache_entry ZEEK_DIST PATH $zeekdist
|
||||
append_cache_entry CMAKE_MODULE_PATH PATH $zeekdist/cmake
|
||||
fi
|
||||
|
||||
if [ "$installroot" != "default" ]; then
|
||||
mkdir -p $installroot
|
||||
append_cache_entry BRO_PLUGIN_INSTALL_ROOT PATH $installroot
|
||||
fi
|
||||
|
||||
if [ -n "$zeek_plugin_begin_opts" ]; then
|
||||
append_cache_entry ZEEK_PLUGIN_BEGIN_OPTS STRING "$zeek_plugin_begin_opts"
|
||||
fi
|
||||
|
||||
if type plugin_addl >/dev/null 2>&1; then
|
||||
plugin_addl
|
||||
fi
|
||||
|
||||
echo "Build Directory : $builddir"
|
||||
echo "Zeek Source Directory : $zeekdist"
|
||||
|
||||
mkdir -p $builddir
|
||||
cd $builddir
|
||||
|
||||
"$CMakeCommand" $CMakeCacheEntries ..
|
||||
|
||||
echo "# This is the command used to configure this build" >config.status
|
||||
echo $command >>config.status
|
||||
chmod u+x config.status
|
|
@ -0,0 +1,11 @@
|
|||
module EventLatency;
|
||||
|
||||
redef enum EventMetadata::ID += {
|
||||
## Identifier for the absolute time at which Zeek published this event.
|
||||
WALLCLOCK_TIMESTAMP = 10001000,
|
||||
};
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
assert EventMetadata::register(WALLCLOCK_TIMESTAMP, time);
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
# Empty
|
65
doc/devel/plugins/event-metadata-plugin-src/src/Plugin.cc
Normal file
65
doc/devel/plugins/event-metadata-plugin-src/src/Plugin.cc
Normal file
|
@ -0,0 +1,65 @@
|
|||
|
||||
#include "Plugin.h"
|
||||
|
||||
#include <zeek/Event.h>
|
||||
#include <zeek/Val.h>
|
||||
#include <zeek/cluster/Backend.h>
|
||||
#include <zeek/plugin/Plugin.h>
|
||||
#include <zeek/telemetry/Manager.h>
|
||||
|
||||
namespace plugin {
|
||||
namespace Zeek_EventLatency {
|
||||
Plugin plugin;
|
||||
}
|
||||
} // namespace plugin
|
||||
|
||||
using namespace plugin::Zeek_EventLatency;
|
||||
|
||||
zeek::plugin::Configuration Plugin::Configure() {
|
||||
zeek::plugin::Configuration config;
|
||||
config.name = "Zeek::EventLatency";
|
||||
config.description = "Track remote event latencies";
|
||||
config.version = {0, 1, 0};
|
||||
EnableHook(zeek::plugin::HOOK_PUBLISH_EVENT);
|
||||
EnableHook(zeek::plugin::HOOK_QUEUE_EVENT);
|
||||
return config;
|
||||
}
|
||||
|
||||
void Plugin::InitPostScript() {
|
||||
double bounds[] = {0.0002, 0.0004, 0.0006, 0.0008, 0.0010, 0.0012, 0.0014, 0.0016, 0.0018, 0.0020};
|
||||
histogram =
|
||||
zeek::telemetry_mgr->HistogramInstance("zeek", "cluster_event_latency_seconds", {}, bounds, "event latency");
|
||||
}
|
||||
|
||||
bool Plugin::HookPublishEvent(zeek::cluster::Backend& backend, const std::string& topic,
|
||||
zeek::cluster::detail::Event& event) {
|
||||
static const auto& wallclock_id = zeek::id::find_val<zeek::EnumVal>("EventLatency::WALLCLOCK_TIMESTAMP");
|
||||
|
||||
auto now_val = zeek::make_intrusive<zeek::TimeVal>(zeek::util::current_time(/*real=*/true));
|
||||
|
||||
if ( ! event.AddMetadata(wallclock_id, now_val) )
|
||||
zeek::reporter->FatalError("failed to add wallclock timestamp metadata");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Plugin::HookQueueEvent(zeek::Event* event) {
|
||||
static const auto& wallclock_id = zeek::id::find_val<zeek::EnumVal>("EventLatency::WALLCLOCK_TIMESTAMP");
|
||||
|
||||
if ( event->Source() == zeek::util::detail::SOURCE_LOCAL )
|
||||
return false;
|
||||
|
||||
auto timestamps = event->MetadataValues(wallclock_id);
|
||||
|
||||
if ( timestamps->Size() > 0 ) {
|
||||
double remote_ts = timestamps->ValAt(0)->AsTime();
|
||||
auto now = zeek::util::current_time(/*real=*/true);
|
||||
auto latency = std::max(0.0, now - remote_ts);
|
||||
|
||||
histogram->Observe(latency);
|
||||
}
|
||||
else
|
||||
zeek::reporter->Warning("missing wallclock timestamp metadata");
|
||||
|
||||
return false;
|
||||
}
|
29
doc/devel/plugins/event-metadata-plugin-src/src/Plugin.h
Normal file
29
doc/devel/plugins/event-metadata-plugin-src/src/Plugin.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <zeek/plugin/Plugin.h>
|
||||
#include <zeek/telemetry/Histogram.h>
|
||||
|
||||
namespace plugin {
|
||||
namespace Zeek_EventLatency {
|
||||
|
||||
class Plugin : public zeek::plugin::Plugin {
|
||||
protected:
|
||||
// Overridden from zeek::plugin::Plugin.
|
||||
zeek::plugin::Configuration Configure() override;
|
||||
|
||||
void InitPostScript() override;
|
||||
|
||||
bool HookPublishEvent(zeek::cluster::Backend& backend, const std::string& topic,
|
||||
zeek::cluster::detail::Event& event) override;
|
||||
|
||||
bool HookQueueEvent(zeek::Event* event) override;
|
||||
|
||||
private:
|
||||
zeek::telemetry::HistogramPtr histogram;
|
||||
};
|
||||
|
||||
extern Plugin plugin;
|
||||
|
||||
} // namespace Zeek_EventLatency
|
||||
} // namespace plugin
|
103
doc/devel/plugins/event-metadata-plugin.rst
Normal file
103
doc/devel/plugins/event-metadata-plugin.rst
Normal file
|
@ -0,0 +1,103 @@
|
|||
.. _event-metadata-plugin:
|
||||
|
||||
=====================
|
||||
Event Metadata Plugin
|
||||
=====================
|
||||
|
||||
.. versionadded:: 8.0
|
||||
|
||||
|
||||
Zeek's plugin API allows adding metadata to Zeek events. In the Zeek-script
|
||||
layer, the :zeek:see:`EventMetadata::current` and :zeek:see:`EventMetadata::current_all`
|
||||
functions can be used to introspect metadata attached to events. In a Zeek cluster,
|
||||
metadata is transported via remote events for consumption by other Zeek nodes.
|
||||
This section describes the functionality in form of a tutorial. We'll
|
||||
be using custom event metadata to track the latency of Zeek events in a
|
||||
cluster and expose them as a Prometheus histogram.
|
||||
|
||||
If you're unfamiliar with plugin development, head over to the
|
||||
:ref:`Writing Plugins <writing-plugins>` section. For more information
|
||||
about telemetry and Prometheus, see also the :ref:`Telemetry framework's <framework-telemetry>`
|
||||
documentation.
|
||||
|
||||
|
||||
Registering Metadata
|
||||
====================
|
||||
|
||||
Initially, we make Zeek's core aware of the metadata to attach to events. This
|
||||
requires two steps.
|
||||
First, redefining the :zeek:see:`EventMetadata::ID` enumeration with our
|
||||
custom enumeration value ``WALLCLOCK_TIMESTAMP``. This is our metadata identifier.
|
||||
Its value represents the Unix timestamps when an event was published.
|
||||
Second, registering the metadata identifier with Zeek's :zeek:see:`time` type
|
||||
by calling :zeek:see:`EventMetadata::register` in a :zeek:see:`zeek_init` handler.
|
||||
This instructs Zeek to convert metadata items in received remote events with
|
||||
identifier ``10001000`` to a :zeek:see:`time` value.
|
||||
|
||||
For simplicity, the second step is done in the plugin's ``scripts/__init__.zeek`` file
|
||||
that's loaded automatically when Zeek loads the plugin.
|
||||
|
||||
.. literalinclude:: event-metadata-plugin-src/scripts/__load__.zeek
|
||||
:caption: main.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
The ``10001000`` represents the metadata identifier for serialization purposes. It
|
||||
needs to be unique and have a defined meaning and consistent type for a given Zeek
|
||||
deployment. Metadata identifiers below ``200`` are reserved for Zeek's internal use.
|
||||
Users are free to choose any other value. Zeek will fail to start or fail to
|
||||
register the type in the case of conflicting identifiers in third-party packages.
|
||||
|
||||
|
||||
Implementing the Plugin
|
||||
=======================
|
||||
|
||||
Next, we implement the ``InitPostScript()``, ``HookPublishEvent()`` and
|
||||
``HookQueueEvent()`` methods in our plugin.
|
||||
In the ``InitPostScript()`` method, a histogram instance is initialized using
|
||||
Zeek's telemetry manager with hard-coded bounds. These define buckets for latency
|
||||
monitoring.
|
||||
The ``HookPublishEvent()`` method adds ``WALLCLOCK_TIMESTAMP`` metadata with
|
||||
the current time to the event, while the ``HookQueueEvent()`` method extracts
|
||||
the sender's timestamp and computes the latency based on its own local time.
|
||||
Finally, the latency is recorded with the histogram by calling ``Observe()``.
|
||||
|
||||
|
||||
.. literalinclude:: event-metadata-plugin-src/src/Plugin.cc
|
||||
:caption: main.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:lines: 28-
|
||||
:tab-width: 4
|
||||
|
||||
|
||||
Resulting Prometheus Metrics
|
||||
============================
|
||||
|
||||
Deploying the plugin outlined above in a cluster and querying the manager's
|
||||
metrics endpoint presents the following result::
|
||||
|
||||
$ curl -s localhost:10001/metrics | grep '^zeek_cluster_event_latency'
|
||||
zeek_cluster_event_latency_seconds_count{endpoint="manager"} 11281
|
||||
zeek_cluster_event_latency_seconds_sum{endpoint="manager"} 7.960928916931152
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0002"} 37
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0004"} 583
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0005999999999999999"} 3858
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0008"} 7960
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.001"} 10185
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0012"} 10957
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0014"} 11239
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0016"} 11269
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.0018"} 11279
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="0.002"} 11281
|
||||
zeek_cluster_event_latency_seconds_bucket{endpoint="manager",le="+Inf"} 11281
|
||||
|
||||
|
||||
This example indicates that there were a total of 11281 latencies observed,
|
||||
the summed up latency was around 8 seconds, 37 events had a latency less or equal
|
||||
to 0.2 milliseconds, 583 with less or equal than 0.4 milliseconds and none
|
||||
that took more than 2 milliseconds.
|
||||
|
||||
This sort of data is usually scraped and ingested by a `Prometheus server <https://prometheus.io/>`_ and
|
||||
then visualized using `Grafana <https://grafana.com/>`_.
|
46
doc/devel/spicy/autogen-spicy-docs
Executable file
46
doc/devel/spicy/autogen-spicy-docs
Executable file
|
@ -0,0 +1,46 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2020-2023 by the Zeek Project. See LICENSE for details.
|
||||
#
|
||||
# Tool to update autogenerated docs that require external files. Must be
|
||||
# run manually and requires access to the Spicy TFTP analyzer.
|
||||
|
||||
set -e
|
||||
|
||||
if [ $# != 1 ]; then
|
||||
echo "usage: $(basename "$0") <spicy-tftp-repo>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TFTP=$1
|
||||
|
||||
if [ ! -d "${TFTP}"/analyzer ]; then
|
||||
echo "${TFTP} does not seem to point to a spicy-tftp repository."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
|
||||
ZEEK="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)/../../.."
|
||||
DOC="${ZEEK}/doc"
|
||||
SPICY="${ZEEK}/auxil/spicy"
|
||||
SPICYDOC="${ZEEK}/build/auxil/spicy/bin/spicy-doc"
|
||||
AUTOGEN_FINAL="${ZEEK}/doc/devel/spicy/autogen"
|
||||
|
||||
if [ ! -x "${SPICYDOC}" ]; then
|
||||
>&2 echo "Warning: Could not find spicy-doc in build directory, aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
"${SPICY}/doc/scripts/autogen-spicy-lib" functions zeek <"${ZEEK}/scripts/spicy/zeek.spicy" >"${AUTOGEN_FINAL}/zeek-functions.spicy" || exit 1
|
||||
|
||||
# Copy some static files over.
|
||||
cp "${TFTP}"/scripts/main.zeek "${AUTOGEN_FINAL}"/tftp.zeek || exit 1
|
||||
cp "${TFTP}"/analyzer/tftp.spicy "${AUTOGEN_FINAL}"/tftp.spicy || exit 1
|
||||
cp "${TFTP}"/analyzer/tftp.evt "${AUTOGEN_FINAL}"/tftp.evt || exit 1
|
||||
|
||||
# Copy some files from the Zeek source tree so that zeek-docs remains standaline for CI.
|
||||
cp "${ZEEK}/scripts/base/frameworks/spicy/init-bare.zeek" "${AUTOGEN_FINAL}/"
|
||||
cp "${ZEEK}/scripts/base/frameworks/spicy/init-framework.zeek" "${AUTOGEN_FINAL}/"
|
||||
cp "${ZEEK}/auxil/spicy/doc/scripts/spicy-pygments.py" "${DOC}/ext"
|
38
doc/devel/spicy/autogen/init-bare.zeek
Normal file
38
doc/devel/spicy/autogen/init-bare.zeek
Normal file
|
@ -0,0 +1,38 @@
|
|||
|
||||
module Spicy;
|
||||
|
||||
export {
|
||||
# doc-options-start
|
||||
## Constant for testing if Spicy is available.
|
||||
const available = T;
|
||||
|
||||
## Show output of Spicy print statements.
|
||||
const enable_print = F &redef;
|
||||
|
||||
# Record and display profiling information, if compiled into analyzer.
|
||||
const enable_profiling = F &redef;
|
||||
|
||||
## abort() instead of throwing HILTI exceptions.
|
||||
const abort_on_exceptions = F &redef;
|
||||
|
||||
## Include backtraces when reporting unhandled exceptions.
|
||||
const show_backtraces = F &redef;
|
||||
|
||||
## Maximum depth of recursive file analysis (Spicy analyzers only)
|
||||
const max_file_depth: count = 5 &redef;
|
||||
# doc-options-end
|
||||
|
||||
# doc-types-start
|
||||
## Result type for :zeek:see:`Spicy::resource_usage`. The values reflect resource
|
||||
## usage as reported by the Spicy runtime system.
|
||||
type ResourceUsage: record {
|
||||
user_time : interval; ##< user CPU time of the Zeek process
|
||||
system_time :interval; ##< system CPU time of the Zeek process
|
||||
memory_heap : count; ##< memory allocated on the heap by the Zeek process
|
||||
num_fibers : count; ##< number of fibers currently in use
|
||||
max_fibers: count; ##< maximum number of fibers ever in use
|
||||
max_fiber_stack_size: count; ##< maximum fiber stack size ever in use
|
||||
cached_fibers: count; ##< number of fibers currently cached
|
||||
};
|
||||
# doc-types-end
|
||||
}
|
85
doc/devel/spicy/autogen/init-framework.zeek
Normal file
85
doc/devel/spicy/autogen/init-framework.zeek
Normal file
|
@ -0,0 +1,85 @@
|
|||
# doc-common-start
|
||||
module Spicy;
|
||||
|
||||
export {
|
||||
# doc-functions-start
|
||||
## Enable a specific Spicy protocol analyzer if not already active. If this
|
||||
## analyzer replaces an standard analyzer, that one will automatically be
|
||||
## disabled.
|
||||
##
|
||||
## tag: analyzer to toggle
|
||||
##
|
||||
## Returns: true if the operation succeeded
|
||||
global enable_protocol_analyzer: function(tag: Analyzer::Tag) : bool;
|
||||
|
||||
## Disable a specific Spicy protocol analyzer if not already inactive. If
|
||||
## this analyzer replaces an standard analyzer, that one will automatically
|
||||
## be re-enabled.
|
||||
##
|
||||
## tag: analyzer to toggle
|
||||
##
|
||||
## Returns: true if the operation succeeded
|
||||
global disable_protocol_analyzer: function(tag: Analyzer::Tag) : bool;
|
||||
|
||||
|
||||
## Enable a specific Spicy file analyzer if not already active. If this
|
||||
## analyzer replaces an standard analyzer, that one will automatically be
|
||||
## disabled.
|
||||
##
|
||||
## tag: analyzer to toggle
|
||||
##
|
||||
## Returns: true if the operation succeeded
|
||||
global enable_file_analyzer: function(tag: Files::Tag) : bool;
|
||||
|
||||
## Disable a specific Spicy file analyzer if not already inactive. If
|
||||
## this analyzer replaces an standard analyzer, that one will automatically
|
||||
## be re-enabled.
|
||||
##
|
||||
## tag: analyzer to toggle
|
||||
##
|
||||
## Returns: true if the operation succeeded
|
||||
global disable_file_analyzer: function(tag: Files::Tag) : bool;
|
||||
|
||||
## Returns current resource usage as reported by the Spicy runtime system.
|
||||
global resource_usage: function() : ResourceUsage;
|
||||
# doc-functions-end
|
||||
}
|
||||
|
||||
# Marked with &is_used to suppress complaints when there aren't any
|
||||
# Spicy file analyzers loaded, and hence this event can't be generated.
|
||||
event spicy_analyzer_for_mime_type(a: Files::Tag, mt: string) &is_used
|
||||
{
|
||||
Files::register_for_mime_type(a, mt);
|
||||
}
|
||||
|
||||
# Marked with &is_used to suppress complaints when there aren't any
|
||||
# Spicy protocol analyzers loaded, and hence this event can't be generated.
|
||||
event spicy_analyzer_for_port(a: Analyzer::Tag, p: port) &is_used
|
||||
{
|
||||
Analyzer::register_for_port(a, p);
|
||||
}
|
||||
|
||||
function enable_protocol_analyzer(tag: Analyzer::Tag) : bool
|
||||
{
|
||||
return Spicy::__toggle_analyzer(tag, T);
|
||||
}
|
||||
|
||||
function disable_protocol_analyzer(tag: Analyzer::Tag) : bool
|
||||
{
|
||||
return Spicy::__toggle_analyzer(tag, F);
|
||||
}
|
||||
|
||||
function enable_file_analyzer(tag: Files::Tag) : bool
|
||||
{
|
||||
return Spicy::__toggle_analyzer(tag, T);
|
||||
}
|
||||
|
||||
function disable_file_analyzer(tag: Files::Tag) : bool
|
||||
{
|
||||
return Spicy::__toggle_analyzer(tag, F);
|
||||
}
|
||||
|
||||
function resource_usage() : ResourceUsage
|
||||
{
|
||||
return Spicy::__resource_usage();
|
||||
}
|
16
doc/devel/spicy/autogen/tftp.evt
Normal file
16
doc/devel/spicy/autogen/tftp.evt
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
|
||||
#
|
||||
# Note: When line numbers change in this file, update the documentation that pulls it in.
|
||||
|
||||
protocol analyzer spicy::TFTP over UDP:
|
||||
parse with TFTP::Packet,
|
||||
port 69/udp;
|
||||
|
||||
import TFTP;
|
||||
|
||||
on TFTP::Request if ( is_read ) -> event tftp::read_request($conn, $is_orig, self.filename, self.mode);
|
||||
on TFTP::Request if ( ! is_read ) -> event tftp::write_request($conn, $is_orig, self.filename, self.mode);
|
||||
|
||||
on TFTP::Data -> event tftp::data($conn, $is_orig, self.num, self.data);
|
||||
on TFTP::Acknowledgement -> event tftp::ack($conn, $is_orig, self.num);
|
||||
on TFTP::Error -> event tftp::error($conn, $is_orig, self.code, self.msg);
|
95
doc/devel/spicy/autogen/tftp.spicy
Normal file
95
doc/devel/spicy/autogen/tftp.spicy
Normal file
|
@ -0,0 +1,95 @@
|
|||
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
|
||||
#
|
||||
# Trivial File Transfer Protocol
|
||||
#
|
||||
# Specs from https://tools.ietf.org/html/rfc1350
|
||||
|
||||
module TFTP;
|
||||
|
||||
import spicy;
|
||||
|
||||
# Common header for all messages:
|
||||
#
|
||||
# 2 bytes
|
||||
# ---------------
|
||||
# | TFTP Opcode |
|
||||
# ---------------
|
||||
|
||||
public type Packet = unit {
|
||||
# public top-level entry point for parsing
|
||||
op: uint16 &convert=Opcode($$);
|
||||
switch (self.op) {
|
||||
Opcode::RRQ -> rrq: Request(True);
|
||||
Opcode::WRQ -> wrq: Request(False);
|
||||
Opcode::DATA -> data: Data;
|
||||
Opcode::ACK -> ack: Acknowledgement;
|
||||
Opcode::ERROR -> error: Error;
|
||||
};
|
||||
};
|
||||
|
||||
# TFTP supports five types of packets [...]:
|
||||
#
|
||||
# opcode operation
|
||||
# 1 Read request (RRQ)
|
||||
# 2 Write request (WRQ)
|
||||
# 3 Data (DATA)
|
||||
# 4 Acknowledgment (ACK)
|
||||
# 5 Error (ERROR)
|
||||
type Opcode = enum {
|
||||
RRQ = 0x01,
|
||||
WRQ = 0x02,
|
||||
DATA = 0x03,
|
||||
ACK = 0x04,
|
||||
ERROR = 0x05,
|
||||
};
|
||||
|
||||
# Figure 5-1: RRQ/WRQ packet
|
||||
#
|
||||
# 2 bytes string 1 byte string 1 byte
|
||||
# ------------------------------------------------
|
||||
# | Opcode | Filename | 0 | Mode | 0 |
|
||||
# ------------------------------------------------
|
||||
|
||||
type Request = unit(is_read: bool) {
|
||||
filename: bytes &until=b"\x00";
|
||||
mode: bytes &until=b"\x00";
|
||||
|
||||
on %done {
|
||||
spicy::accept_input();
|
||||
}
|
||||
};
|
||||
|
||||
# Figure 5-2: DATA packet
|
||||
#
|
||||
# 2 bytes 2 bytes n bytes
|
||||
# ----------------------------------
|
||||
# | Opcode | Block # | Data |
|
||||
# ----------------------------------
|
||||
|
||||
type Data = unit {
|
||||
num: uint16;
|
||||
data: bytes &eod;
|
||||
};
|
||||
|
||||
# Figure 5-3: ACK packet
|
||||
#
|
||||
# 2 bytes 2 bytes
|
||||
# ---------------------
|
||||
# | Opcode | Block # |
|
||||
# ---------------------
|
||||
|
||||
type Acknowledgement = unit {
|
||||
num: uint16;
|
||||
};
|
||||
|
||||
# Figure 5-4: ERROR packet
|
||||
#
|
||||
# 2 bytes 2 bytes string 1 byte
|
||||
# -----------------------------------------
|
||||
# | Opcode | ErrorCode | ErrMsg | 0 |
|
||||
# -----------------------------------------
|
||||
|
||||
type Error = unit {
|
||||
code: uint16;
|
||||
msg: bytes &until=b"\x00";
|
||||
};
|
162
doc/devel/spicy/autogen/tftp.zeek
Normal file
162
doc/devel/spicy/autogen/tftp.zeek
Normal file
|
@ -0,0 +1,162 @@
|
|||
# Copyright (c) 2021 by the Zeek Project. See LICENSE for details.
|
||||
|
||||
module TFTP;
|
||||
|
||||
export {
|
||||
redef enum Log::ID += { LOG };
|
||||
|
||||
type Info: record {
|
||||
## Timestamp for when the request happened.
|
||||
ts: time &log;
|
||||
## Unique ID for the connection.
|
||||
uid: string &log;
|
||||
## The connection's 4-tuple of endpoint addresses/ports.
|
||||
id: conn_id &log;
|
||||
## True for write requests, False for read request.
|
||||
wrq: bool &log;
|
||||
## File name of request.
|
||||
fname: string &log;
|
||||
## Mode of request.
|
||||
mode: string &log;
|
||||
## UID of data connection
|
||||
uid_data: string &optional &log;
|
||||
## Number of bytes sent.
|
||||
size: count &default=0 &log;
|
||||
## Highest block number sent.
|
||||
block_sent: count &default=0 &log;
|
||||
## Highest block number ackknowledged.
|
||||
block_acked: count &default=0 &log;
|
||||
## Any error code encountered.
|
||||
error_code: count &optional &log;
|
||||
## Any error message encountered.
|
||||
error_msg: string &optional &log;
|
||||
|
||||
# Set to block number of final piece of data once received.
|
||||
final_block: count &optional;
|
||||
|
||||
# Set to true once logged.
|
||||
done: bool &default=F;
|
||||
};
|
||||
|
||||
## Event that can be handled to access the TFTP logging record.
|
||||
global log_tftp: event(rec: Info);
|
||||
}
|
||||
|
||||
# Maps a partial data connection ID to the request's Info record.
|
||||
global expected_data_conns: table[addr, port, addr] of Info;
|
||||
|
||||
redef record connection += {
|
||||
tftp: Info &optional;
|
||||
};
|
||||
|
||||
event zeek_init() &priority=5
|
||||
{
|
||||
Log::create_stream(TFTP::LOG, [$columns = Info, $ev = log_tftp, $path="tftp"]);
|
||||
}
|
||||
|
||||
function log_pending(c: connection)
|
||||
{
|
||||
if ( ! c?$tftp || c$tftp$done )
|
||||
return;
|
||||
|
||||
Log::write(TFTP::LOG, c$tftp);
|
||||
c$tftp$done = T;
|
||||
}
|
||||
|
||||
function init_request(c: connection, is_orig: bool, fname: string, mode: string, is_read: bool)
|
||||
{
|
||||
log_pending(c);
|
||||
|
||||
local info: Info;
|
||||
info$ts = network_time();
|
||||
info$uid = c$uid;
|
||||
info$id = c$id;
|
||||
info$fname = fname;
|
||||
info$mode = mode;
|
||||
info$wrq = (! is_read);
|
||||
c$tftp = info;
|
||||
|
||||
# The data will come in from a different source port.
|
||||
Analyzer::schedule_analyzer(c$id$resp_h, c$id$orig_h, c$id$orig_p, Analyzer::ANALYZER_SPICY_TFTP, 1min);
|
||||
expected_data_conns[c$id$resp_h, c$id$orig_p, c$id$orig_h] = info;
|
||||
}
|
||||
|
||||
event scheduled_analyzer_applied(c: connection, a: Analyzer::Tag) &priority=10
|
||||
{
|
||||
local id = c$id;
|
||||
if ( [c$id$orig_h, c$id$resp_p, c$id$resp_h] in expected_data_conns )
|
||||
{
|
||||
c$tftp = expected_data_conns[c$id$orig_h, c$id$resp_p, c$id$resp_h];
|
||||
c$tftp$uid_data = c$uid;
|
||||
add c$service["spicy_tftp_data"];
|
||||
}
|
||||
}
|
||||
|
||||
event tftp::read_request(c: connection, is_orig: bool, fname: string, mode: string)
|
||||
{
|
||||
init_request(c, is_orig, fname, mode, T);
|
||||
}
|
||||
|
||||
event tftp::write_request(c: connection, is_orig: bool, fname: string, mode: string)
|
||||
{
|
||||
init_request(c, is_orig, fname, mode, F);
|
||||
}
|
||||
|
||||
event tftp::data(c: connection, is_orig: bool, block_num: count, data: string)
|
||||
{
|
||||
if ( ! c?$tftp || c$tftp$done )
|
||||
return;
|
||||
|
||||
local info = c$tftp;
|
||||
|
||||
if ( block_num <= info$block_sent )
|
||||
# Duplicate (or previous gap; we don't track that)
|
||||
return;
|
||||
|
||||
info$size += |data|;
|
||||
info$block_sent = block_num;
|
||||
|
||||
if ( |data| < 512 )
|
||||
# Last block, per spec.
|
||||
info$final_block = block_num;
|
||||
}
|
||||
|
||||
event tftp::ack(c: connection, is_orig: bool, block_num: count)
|
||||
{
|
||||
if ( ! c?$tftp || c$tftp$done )
|
||||
return;
|
||||
|
||||
local info = c$tftp;
|
||||
|
||||
info$block_acked = block_num;
|
||||
|
||||
if ( block_num <= info$block_acked )
|
||||
# Duplicate (or previous gap, we don't track that)
|
||||
return;
|
||||
|
||||
info$block_acked = block_num;
|
||||
|
||||
# If it's an ack for the last block, we're done.
|
||||
if ( info?$final_block && info$final_block == block_num )
|
||||
log_pending(c);
|
||||
}
|
||||
|
||||
event tftp::error(c: connection, is_orig: bool, code: count, msg: string)
|
||||
{
|
||||
if ( ! c?$tftp || c$tftp$done )
|
||||
return;
|
||||
|
||||
local info = c$tftp;
|
||||
|
||||
info$error_code = code;
|
||||
info$error_msg = msg;
|
||||
log_pending(c);
|
||||
}
|
||||
|
||||
event connection_state_remove(c: connection)
|
||||
{
|
||||
if ( ! c?$tftp || c$tftp$done )
|
||||
return;
|
||||
|
||||
log_pending(c);
|
||||
}
|
736
doc/devel/spicy/autogen/zeek-functions.spicy
Normal file
736
doc/devel/spicy/autogen/zeek-functions.spicy
Normal file
|
@ -0,0 +1,736 @@
|
|||
.. _spicy_confirm_protocol:
|
||||
|
||||
.. rubric:: ``function zeek::confirm_protocol()``
|
||||
|
||||
[Deprecated] Triggers a DPD protocol confirmation for the current connection.
|
||||
|
||||
This function has been deprecated and will be removed. Use ``spicy::accept_input``
|
||||
instead, which will have the same effect with Zeek.
|
||||
|
||||
.. _spicy_reject_protocol:
|
||||
|
||||
.. rubric:: ``function zeek::reject_protocol(reason: string)``
|
||||
|
||||
[Deprecated] Triggers a DPD protocol violation for the current connection.
|
||||
|
||||
This function has been deprecated and will be removed. Use ``spicy::decline_input``
|
||||
instead, which will have the same effect with Zeek.
|
||||
|
||||
.. _spicy_weird:
|
||||
|
||||
.. rubric:: ``function zeek::weird(id: string, addl: string = "") : &cxxname="zeek::spicy::rt::weird";``
|
||||
|
||||
Reports a "weird" to Zeek. This should be used with similar semantics as in
|
||||
Zeek: something quite unexpected happening at the protocol level, which however
|
||||
does not prevent us from continuing to process the connection.
|
||||
|
||||
id: the name of the weird, which (just like in Zeek) should be a *static*
|
||||
string identifying the situation reported (e.g., ``unexpected_command``).
|
||||
|
||||
addl: additional information to record along with the weird
|
||||
|
||||
.. _spicy_is_orig:
|
||||
|
||||
.. rubric:: ``function zeek::is_orig() : bool``
|
||||
|
||||
Returns true if we're currently parsing the originator side of a connection.
|
||||
|
||||
.. _spicy_uid:
|
||||
|
||||
.. rubric:: ``function zeek::uid() : string``
|
||||
|
||||
Returns the current connection's UID.
|
||||
|
||||
.. _spicy_conn_id:
|
||||
|
||||
.. rubric:: ``function zeek::conn_id() : tuple<orig_h: addr, orig_p: port, resp_h: addr, resp_p: port>``
|
||||
|
||||
Returns the current connection's 4-tuple ID to make IP address and port information available.
|
||||
|
||||
.. _spicy_flip_roles:
|
||||
|
||||
.. rubric:: ``function zeek::flip_roles()``
|
||||
|
||||
Instructs Zeek to flip the directionality of the current connection.
|
||||
|
||||
.. _spicy_number_packets:
|
||||
|
||||
.. rubric:: ``function zeek::number_packets() : uint64``
|
||||
|
||||
Returns the number of packets seen so far on the current side of the current connection.
|
||||
|
||||
.. _spicy_has_analyzer:
|
||||
|
||||
.. rubric:: ``function zeek::has_analyzer(analyzer: string, if_enabled: bool = True) : bool``
|
||||
|
||||
Checks if there is a Zeek analyzer of a given name.
|
||||
|
||||
analyzer: the Zeek-side name of the analyzer to check for
|
||||
if_enabled: if true, only checks for analyzers that are enabled
|
||||
|
||||
Returns the type of the analyzer if it exists, or ``Undef`` if it does not.
|
||||
|
||||
.. _spicy_analyzer_type:
|
||||
|
||||
.. rubric:: ``function zeek::analyzer_type(analyzer: string, if_enabled: bool = True) : AnalyzerType``
|
||||
|
||||
Returns the type of a Zeek analyzer of a given name.
|
||||
|
||||
analyzer: the Zeek-side name of the analyzer to check
|
||||
if_enabled: if true, only checks for analyzers that are enabled
|
||||
|
||||
Returns the type of the analyzer if it exists, or ``Undef`` if it does not.
|
||||
|
||||
.. _spicy_protocol_begin:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_begin(analyzer: optional<string>, protocol: spicy::Protocol = spicy::Protocol::TCP)``
|
||||
|
||||
Adds a Zeek-side child protocol analyzer to the current connection.
|
||||
|
||||
If the same analyzer was added previously with `protocol_handle_get_or_create` or
|
||||
`protocol_begin` with same argument, and not closed with `protocol_handle_close`
|
||||
or `protocol_end`, no new analyzer will be added.
|
||||
|
||||
See `protocol_handle_get_or_create` for lifetime and error semantics.
|
||||
|
||||
analyzer: type of analyzer to instantiate, specified through its Zeek-side
|
||||
name (similar to what Zeek's signature action `enable` takes)
|
||||
|
||||
protocol: the transport-layer protocol that the analyzer uses; only TCP is
|
||||
currently supported here
|
||||
|
||||
Note: For backwards compatibility, the analyzer argument can be left unset to add
|
||||
a DPD analyzer. This use is deprecated, though; use the single-argument version of
|
||||
`protocol_begin` for that instead.
|
||||
|
||||
.. _spicy_protocol_begin_2:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_begin(protocol: spicy::Protocol = spicy::Protocol::TCP)``
|
||||
|
||||
Adds a Zeek-side DPD child protocol analyzer performing dynamic protocol detection
|
||||
on subsequently provided data.
|
||||
|
||||
If the same DPD analyzer was added previously with `protocol_handle_get_or_create` or
|
||||
`protocol_begin` with same argument, and not closed with `protocol_handle_close`
|
||||
or `protocol_end`, no new analyzer will be added.
|
||||
|
||||
See `protocol_handle_get_or_create` for lifetime and error semantics.
|
||||
|
||||
protocol: the transport-layer protocol on which to perform protocol detection;
|
||||
only TCP is currently supported here
|
||||
|
||||
.. _spicy_protocol_handle_get_or_create:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_handle_get_or_create(analyzer: string, protocol: spicy::Protocol = spicy::Protocol::TCP) : ProtocolHandle``
|
||||
|
||||
Gets a handle to a Zeek-side child protocol analyzer for the current connection.
|
||||
|
||||
If no such child exists yet it will be added; otherwise a handle to the
|
||||
existing child protocol analyzer will be returned.
|
||||
|
||||
This function will return an error if:
|
||||
|
||||
- not called from a protocol analyzer, or
|
||||
- the requested child protocol analyzer is of unknown type or not support by the requested transport protocol, or
|
||||
- creation of a child analyzer of the requested type was prevented by a
|
||||
previous call of `disable_analyzer` with `prevent=T`
|
||||
|
||||
By default, any newly created child protocol analyzer will remain alive
|
||||
until Zeek expires the current connection's state. Alternatively, one
|
||||
can call `protocol_handle_close` or `protocol_end` to delete the analyzer
|
||||
earlier.
|
||||
|
||||
analyzer: type of analyzer to get or instantiate, specified through its Zeek-side
|
||||
name (similar to what Zeek's signature action `enable` takes).
|
||||
|
||||
protocol: the transport-layer protocol that the analyser uses; only TCP is
|
||||
currently supported here
|
||||
|
||||
|
||||
.. _spicy_protocol_data_in:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_data_in(is_orig: bool, data: bytes, protocol: spicy::Protocol = spicy::Protocol::TCP)``
|
||||
|
||||
Forwards protocol data to all previously instantiated Zeek-side child protocol analyzers of a given transport-layer.
|
||||
|
||||
is_orig: true to feed the data to the child's originator side, false for the responder
|
||||
|
||||
data: chunk of data to forward to child analyzer
|
||||
|
||||
protocol: the transport-layer protocol of the children to forward to; only TCP is currently supported here
|
||||
|
||||
.. _spicy_protocol_data_in_2:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_data_in(is_orig: bool, data: bytes, h: ProtocolHandle)``
|
||||
|
||||
Forwards protocol data to a specific previously instantiated Zeek-side child analyzer.
|
||||
|
||||
is_orig: true to feed the data to the child's originator side, false for the responder
|
||||
|
||||
data: chunk of data to forward to child analyzer
|
||||
|
||||
h: handle to the child analyzer to forward data into
|
||||
|
||||
.. _spicy_protocol_gap:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_gap(is_orig: bool, offset: uint64, len: uint64, h: optional<ProtocolHandle> = Null)``
|
||||
|
||||
Signals a gap in input data to all previously instantiated Zeek-side child protocol analyzers.
|
||||
|
||||
is_orig: true to signal gap to the child's originator side, false for the responder
|
||||
|
||||
offset: start offset of gap in input stream
|
||||
|
||||
len: size of gap
|
||||
|
||||
h: optional handle to the child analyzer signal a gap to, else signal to all child analyzers
|
||||
|
||||
.. _spicy_protocol_end:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_end()``
|
||||
|
||||
Signals end-of-data to all previously instantiated Zeek-side child protocol
|
||||
analyzers and removes them.
|
||||
|
||||
.. _spicy_protocol_handle_close:
|
||||
|
||||
.. rubric:: ``function zeek::protocol_handle_close(handle: ProtocolHandle)``
|
||||
|
||||
Signals end-of-data to the given child analyzer and removes it.
|
||||
|
||||
The given handle must be live, i.e., it must not have been used in a
|
||||
previous protocol_handle_close call, and must not have been live when
|
||||
protocol_end was called. If the handle is not live a runtime error will
|
||||
be triggered.
|
||||
|
||||
handle: handle to the child analyzer to remove
|
||||
|
||||
.. _spicy_file_begin:
|
||||
|
||||
.. rubric:: ``function zeek::file_begin(mime_type: optional<string> = Null, fuid: optional<string> = Null) : string``
|
||||
|
||||
Signals the beginning of a file to Zeek's file analysis, associating it with the current connection.
|
||||
Optionally, a mime type can be provided. It will be passed on to Zeek's file analysis framework.
|
||||
Optionally, a file ID can be provided. It will be passed on to Zeek's file analysis framework.
|
||||
Returns the Zeek-side file ID of the new file.
|
||||
|
||||
This function creates a new Zeek file analyzer that will remain alive until
|
||||
either `file_end` gets called, or Zeek eventually expires the analyzer
|
||||
through a timeout. (As Zeek does not tie a file analyzer's lifetime to any
|
||||
connection, it may survive the termination of the current connection.)
|
||||
|
||||
.. _spicy_fuid:
|
||||
|
||||
.. rubric:: ``function zeek::fuid() : string``
|
||||
|
||||
Returns the current file's FUID.
|
||||
|
||||
.. _spicy_terminate_session:
|
||||
|
||||
.. rubric:: ``function zeek::terminate_session()``
|
||||
|
||||
Terminates the currently active Zeek-side session, flushing all state. Any
|
||||
subsequent activity will start a new session from scratch. This can only be
|
||||
called from inside a protocol analyzer.
|
||||
|
||||
.. _spicy_skip_input:
|
||||
|
||||
.. rubric:: ``function zeek::skip_input()``
|
||||
|
||||
Tells Zeek to skip sending any further input data to the current analyzer.
|
||||
This is supported for protocol and file analyzers.
|
||||
|
||||
.. _spicy_file_set_size:
|
||||
|
||||
.. rubric:: ``function zeek::file_set_size(size: uint64, fid: optional<string> = Null)``
|
||||
|
||||
Signals the expected size of a file to Zeek's file analysis.
|
||||
|
||||
size: expected size of file
|
||||
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
|
||||
|
||||
.. _spicy_file_data_in:
|
||||
|
||||
.. rubric:: ``function zeek::file_data_in(data: bytes, fid: optional<string> = Null)``
|
||||
|
||||
Passes file content on to Zeek's file analysis.
|
||||
|
||||
data: chunk of raw data to pass into analysis
|
||||
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
|
||||
|
||||
.. _spicy_file_data_in_at_offset:
|
||||
|
||||
.. rubric:: ``function zeek::file_data_in_at_offset(data: bytes, offset: uint64, fid: optional<string> = Null)``
|
||||
|
||||
Passes file content at a specific offset on to Zeek's file analysis.
|
||||
|
||||
data: chunk of raw data to pass into analysis
|
||||
offset: position in file where data starts
|
||||
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
|
||||
|
||||
.. _spicy_file_gap:
|
||||
|
||||
.. rubric:: ``function zeek::file_gap(offset: uint64, len: uint64, fid: optional<string> = Null)``
|
||||
|
||||
Signals a gap in a file to Zeek's file analysis.
|
||||
|
||||
offset: position in file where gap starts
|
||||
len: size of gap
|
||||
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
|
||||
|
||||
.. _spicy_file_end:
|
||||
|
||||
.. rubric:: ``function zeek::file_end(fid: optional<string> = Null)``
|
||||
|
||||
Signals the end of a file to Zeek's file analysis.
|
||||
|
||||
fid: Zeek-side ID of the file to operate on; if not given, the file started by the most recent file_begin() will be used
|
||||
|
||||
.. _spicy_forward_packet:
|
||||
|
||||
.. rubric:: ``function zeek::forward_packet(identifier: uint32)``
|
||||
|
||||
Inside a packet analyzer, forwards what data remains after parsing the top-level unit
|
||||
on to another analyzer. The index specifies the target, per the current dispatcher table.
|
||||
|
||||
.. _spicy_network_time:
|
||||
|
||||
.. rubric:: ``function zeek::network_time() : time``
|
||||
|
||||
Gets the network time from Zeek.
|
||||
|
||||
.. _spicy_get_address:
|
||||
|
||||
.. rubric:: ``function zeek::get_address(id: string) : addr``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``addr``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_bool:
|
||||
|
||||
.. rubric:: ``function zeek::get_bool(id: string) : bool``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``bool``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_count:
|
||||
|
||||
.. rubric:: ``function zeek::get_count(id: string) : uint64``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``count``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_double:
|
||||
|
||||
.. rubric:: ``function zeek::get_double(id: string) : real``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``double``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_enum:
|
||||
|
||||
.. rubric:: ``function zeek::get_enum(id: string) : string``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``enum``.
|
||||
The value is returned as a string containing the enum's label name, without
|
||||
any scope. Throws an exception if there's no such Zeek of that name, or if
|
||||
it's not of the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_int:
|
||||
|
||||
.. rubric:: ``function zeek::get_int(id: string) : int64``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``int``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_interval:
|
||||
|
||||
.. rubric:: ``function zeek::get_interval(id: string) : interval``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type
|
||||
``interval``. Throws an exception if there's no such Zeek of that name, or
|
||||
if it's not of the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_port:
|
||||
|
||||
.. rubric:: ``function zeek::get_port(id: string) : port``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``port``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_record:
|
||||
|
||||
.. rubric:: ``function zeek::get_record(id: string) : ZeekRecord``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``record``.
|
||||
The value is returned as an opaque handle to the record, which can be used
|
||||
with the ``zeek::record_*()`` functions to access the record's fields.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_set:
|
||||
|
||||
.. rubric:: ``function zeek::get_set(id: string) : ZeekSet``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``set``. The
|
||||
value is returned as an opaque handle to the set, which can be used with the
|
||||
``zeek::set_*()`` functions to access the set's content. Throws an exception
|
||||
if there's no such Zeek of that name, or if it's not of the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_string:
|
||||
|
||||
.. rubric:: ``function zeek::get_string(id: string) : bytes``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``string``.
|
||||
The string's value is returned as a Spicy ``bytes`` value. Throws an
|
||||
exception if there's no such Zeek of that name, or if it's not of the
|
||||
expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_subnet:
|
||||
|
||||
.. rubric:: ``function zeek::get_subnet(id: string) : network``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``subnet``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_table:
|
||||
|
||||
.. rubric:: ``function zeek::get_table(id: string) : ZeekTable``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``table``.
|
||||
The value is returned as an opaque handle to the set, which can be used with
|
||||
the ``zeek::set_*()`` functions to access the set's content. Throws an
|
||||
exception if there's no such Zeek of that name, or if it's not of the
|
||||
expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_time:
|
||||
|
||||
.. rubric:: ``function zeek::get_time(id: string) : time``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``time``.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_vector:
|
||||
|
||||
.. rubric:: ``function zeek::get_vector(id: string) : ZeekVector``
|
||||
|
||||
Returns the value of a global Zeek script variable of Zeek type ``vector``.
|
||||
The value is returned as an opaque handle to the vector, which can be used
|
||||
with the ``zeek::vector_*()`` functions to access the vector's content.
|
||||
Throws an exception if there's no such Zeek of that name, or if it's not of
|
||||
the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek variable to retrieve
|
||||
|
||||
.. _spicy_get_value:
|
||||
|
||||
.. rubric:: ``function zeek::get_value(id: string) : ZeekVal``
|
||||
|
||||
Returns an opaque handle to a global Zeek script variable. The handle can be
|
||||
used with the ``zeek::as_*()`` functions to access the variable's value.
|
||||
Throws an exception if there's no Zeek variable of that name.
|
||||
|
||||
.. _spicy_as_address:
|
||||
|
||||
.. rubric:: ``function zeek::as_address(v: ZeekVal) : addr``
|
||||
|
||||
Returns a Zeek ``addr`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_bool:
|
||||
|
||||
.. rubric:: ``function zeek::as_bool(v: ZeekVal) : bool``
|
||||
|
||||
Returns a Zeek ``bool`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_count:
|
||||
|
||||
.. rubric:: ``function zeek::as_count(v: ZeekVal) : uint64``
|
||||
|
||||
Returns a Zeek ``count`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_double:
|
||||
|
||||
.. rubric:: ``function zeek::as_double(v: ZeekVal) : real``
|
||||
|
||||
Returns a Zeek ``double`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_enum:
|
||||
|
||||
.. rubric:: ``function zeek::as_enum(v: ZeekVal) : string``
|
||||
|
||||
Returns a Zeek ``enum`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_int:
|
||||
|
||||
.. rubric:: ``function zeek::as_int(v: ZeekVal) : int64``
|
||||
|
||||
Returns a Zeek ``int`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_interval:
|
||||
|
||||
.. rubric:: ``function zeek::as_interval(v: ZeekVal) : interval``
|
||||
|
||||
Returns a Zeek ``interval`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_port:
|
||||
|
||||
.. rubric:: ``function zeek::as_port(v: ZeekVal) : port``
|
||||
|
||||
Returns a Zeek ``port`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_record:
|
||||
|
||||
.. rubric:: ``function zeek::as_record(v: ZeekVal) : ZeekRecord``
|
||||
|
||||
Returns a Zeek ``record`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_set:
|
||||
|
||||
.. rubric:: ``function zeek::as_set(v: ZeekVal) : ZeekSet``
|
||||
|
||||
Returns a Zeek ``set`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_string:
|
||||
|
||||
.. rubric:: ``function zeek::as_string(v: ZeekVal) : bytes``
|
||||
|
||||
Returns a Zeek ``string`` value refereced by an opaque handle. The string's
|
||||
value is returned as a Spicy ``bytes`` value. Throws an exception if the
|
||||
referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_subnet:
|
||||
|
||||
.. rubric:: ``function zeek::as_subnet(v: ZeekVal) : network``
|
||||
|
||||
Returns a Zeek ``subnet`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_table:
|
||||
|
||||
.. rubric:: ``function zeek::as_table(v: ZeekVal) : ZeekTable``
|
||||
|
||||
Returns a Zeek ``table`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_time:
|
||||
|
||||
.. rubric:: ``function zeek::as_time(v: ZeekVal) : time``
|
||||
|
||||
Returns a Zeek ``time`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_as_vector:
|
||||
|
||||
.. rubric:: ``function zeek::as_vector(v: ZeekVal) : ZeekVector``
|
||||
|
||||
Returns a Zeek ``vector`` value refereced by an opaque handle. Throws an
|
||||
exception if the referenced value is not of the expected type.
|
||||
|
||||
.. _spicy_set_contains:
|
||||
|
||||
.. rubric:: ``function zeek::set_contains(id: string, v: any) : bool``
|
||||
|
||||
Returns true if a Zeek set contains a given value. Throws an exception if
|
||||
the given ID does not exist, or does not have the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek set to check
|
||||
v: value to check for, which must be of the Spicy-side equivalent of the set's key type
|
||||
|
||||
.. _spicy_set_contains_2:
|
||||
|
||||
.. rubric:: ``function zeek::set_contains(s: ZeekSet, v: any) : bool``
|
||||
|
||||
Returns true if a Zeek set contains a given value. Throws an exception if
|
||||
the set does not have the expected type.
|
||||
|
||||
s: opaque handle to the Zeek set, as returned by other functions
|
||||
v: value to check for, which must be of the Spicy-side equivalent of the set's key type
|
||||
|
||||
.. _spicy_table_contains:
|
||||
|
||||
.. rubric:: ``function zeek::table_contains(id: string, v: any) : bool``
|
||||
|
||||
Returns true if a Zeek table contains a given value. Throws an exception if
|
||||
the given ID does not exist, or does not have the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek table to check
|
||||
v: value to check for, which must be of the Spicy-side equivalent of the table's key type
|
||||
|
||||
.. _spicy_table_contains_2:
|
||||
|
||||
.. rubric:: ``function zeek::table_contains(t: ZeekTable, v: any) : bool``
|
||||
|
||||
Returns true if a Zeek table contains a given value. Throws an exception if
|
||||
the given ID does not exist, or does not have the expected type.
|
||||
|
||||
t: opaque handle to the Zeek table, as returned by other functions
|
||||
v: value to check for, which must be of the Spicy-side equivalent of the table's key type
|
||||
|
||||
.. _spicy_table_lookup:
|
||||
|
||||
.. rubric:: ``function zeek::table_lookup(id: string, v: any) : optional<ZeekVal>``
|
||||
|
||||
Returns the value associated with a key in a Zeek table. Returns an error
|
||||
result if the key does not exist in the table. Throws an exception if the
|
||||
given table ID does not exist, or does not have the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek table to check
|
||||
v: value to lookup, which must be of the Spicy-side equivalent of the table's key type
|
||||
|
||||
.. _spicy_table_lookup_2:
|
||||
|
||||
.. rubric:: ``function zeek::table_lookup(t: ZeekTable, v: any) : optional<ZeekVal>``
|
||||
|
||||
Returns the value associated with a key in a Zeek table. Returns an error
|
||||
result if the key does not exist in the table. Throws an exception if the
|
||||
given table ID does not exist, or does not have the expected type.
|
||||
|
||||
t: opaque handle to the Zeek table, as returned by other functions
|
||||
v: value to lookup, which must be of the Spicy-side equivalent of the table's key type
|
||||
|
||||
.. _spicy_record_has_value:
|
||||
|
||||
.. rubric:: ``function zeek::record_has_value(id: string, field: string) : bool``
|
||||
|
||||
Returns true if a Zeek record provides a value for a given field. This
|
||||
includes fields with `&default` values. Throws an exception if the given ID
|
||||
does not exist, or does not have the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek record to check field: name of
|
||||
the field to check
|
||||
|
||||
.. _spicy_record_has_value_2:
|
||||
|
||||
.. rubric:: ``function zeek::record_has_value(r: ZeekRecord, field: string) : bool``
|
||||
|
||||
Returns true if a Zeek record provides a value for a given field.
|
||||
This includes fields with `&default` values.
|
||||
|
||||
r: opaque handle to the Zeek record, as returned by other functions
|
||||
field: name of the field to check
|
||||
|
||||
.. _spicy_record_has_field:
|
||||
|
||||
.. rubric:: ``function zeek::record_has_field(id: string, field: string) : bool``
|
||||
|
||||
Returns true if the type of a Zeek record has a field of a given name.
|
||||
Throws an exception if the given ID does not exist, or does not have the
|
||||
expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek record to check
|
||||
field: name of the field to check
|
||||
|
||||
.. _spicy_record_has_field_2:
|
||||
|
||||
.. rubric:: ``function zeek::record_has_field(r: ZeekRecord, field: string) : bool``
|
||||
|
||||
Returns true if the type of a Zeek record has a field of a given name.
|
||||
|
||||
r: opaque handle to the Zeek record, as returned by other functions
|
||||
field: name of the field to check
|
||||
|
||||
.. _spicy_record_field:
|
||||
|
||||
.. rubric:: ``function zeek::record_field(id: string, field: string) : ZeekVal``
|
||||
|
||||
Returns a field's value from a Zeek record. Throws an exception if the given
|
||||
ID does not exist, or does not have the expected type; or if there's no such
|
||||
field in the record type, or if the field does not have a value.
|
||||
|
||||
id: fully-qualified name of the global Zeek record to check
|
||||
field: name of the field to retrieve
|
||||
|
||||
.. _spicy_record_field_2:
|
||||
|
||||
.. rubric:: ``function zeek::record_field(r: ZeekRecord, field: string) : ZeekVal``
|
||||
|
||||
Returns a field's value from a Zeek record. Throws an exception if the given
|
||||
record does not have such a field, or if the field does not have a value.
|
||||
|
||||
r: opaque handle to the Zeek record, as returned by other functions
|
||||
field: name of the field to retrieve
|
||||
|
||||
.. _spicy_vector_index:
|
||||
|
||||
.. rubric:: ``function zeek::vector_index(id: string, index: uint64) : ZeekVal``
|
||||
|
||||
Returns the value of an index in a Zeek vector. Throws an exception if the
|
||||
given ID does not exist, or does not have the expected type; or if the index
|
||||
is out of bounds.
|
||||
|
||||
id: fully-qualified name of the global Zeek vector to check
|
||||
index: index of the element to retrieve
|
||||
|
||||
.. _spicy_vector_index_2:
|
||||
|
||||
.. rubric:: ``function zeek::vector_index(v: ZeekVector, index: uint64) : ZeekVal``
|
||||
|
||||
Returns the value of an index in a Zeek vector. Throws an exception if the
|
||||
index is out of bounds.
|
||||
|
||||
v: opaque handle to the Zeek vector, as returned by other functions
|
||||
index: index of the element to retrieve
|
||||
|
||||
.. _spicy_vector_size:
|
||||
|
||||
.. rubric:: ``function zeek::vector_size(id: string) : uint64``
|
||||
|
||||
Returns the size of a Zeek vector. Throws an exception if the given ID does
|
||||
not exist, or does not have the expected type.
|
||||
|
||||
id: fully-qualified name of the global Zeek vector to check
|
||||
|
||||
.. _spicy_vector_size_2:
|
||||
|
||||
.. rubric:: ``function zeek::vector_size(v: ZeekVector) : uint64``
|
||||
|
||||
Returns the size of a Zeek vector.
|
||||
|
||||
v: opaque handle to the Zeek vector, as returned by other functions
|
||||
|
5
doc/devel/spicy/examples/my-http.evt
Normal file
5
doc/devel/spicy/examples/my-http.evt
Normal file
|
@ -0,0 +1,5 @@
|
|||
protocol analyzer spicy::MyHTTP over TCP:
|
||||
parse originator with MyHTTP::RequestLine,
|
||||
port 12345/tcp;
|
||||
|
||||
on MyHTTP::RequestLine -> event MyHTTP::request_line($conn, self.method, self.uri, self.version.number);
|
26
doc/devel/spicy/examples/my-http.spicy
Normal file
26
doc/devel/spicy/examples/my-http.spicy
Normal file
|
@ -0,0 +1,26 @@
|
|||
# @TEST-EXEC: echo "GET /index.html HTTP/1.0" | spicy-driver %INPUT >output
|
||||
# @TEST-EXEC: btest-diff output
|
||||
|
||||
module MyHTTP;
|
||||
|
||||
const Token = /[^ \t\r\n]+/;
|
||||
const WhiteSpace = /[ \t]+/;
|
||||
const NewLine = /\r?\n/;
|
||||
|
||||
type Version = unit {
|
||||
: /HTTP\//;
|
||||
number: /[0-9]+\.[0-9]+/;
|
||||
};
|
||||
|
||||
public type RequestLine = unit {
|
||||
method: Token;
|
||||
: WhiteSpace;
|
||||
uri: Token;
|
||||
: WhiteSpace;
|
||||
version: Version;
|
||||
: NewLine;
|
||||
|
||||
on %done {
|
||||
print self.method, self.uri, self.version.number;
|
||||
}
|
||||
};
|
4
doc/devel/spicy/examples/my-http.zeek
Normal file
4
doc/devel/spicy/examples/my-http.zeek
Normal file
|
@ -0,0 +1,4 @@
|
|||
event MyHTTP::request_line(c: connection, method: string, uri: string, version: string)
|
||||
{
|
||||
print fmt("Zeek saw from %s: %s %s %s", c$id$orig_h, method, uri, version);
|
||||
}
|
BIN
doc/devel/spicy/examples/request-line.pcap
Normal file
BIN
doc/devel/spicy/examples/request-line.pcap
Normal file
Binary file not shown.
37
doc/devel/spicy/examples/tftp-schedule-analyzer.zeek
Normal file
37
doc/devel/spicy/examples/tftp-schedule-analyzer.zeek
Normal file
|
@ -0,0 +1,37 @@
|
|||
|
||||
function schedule_tftp_analyzer(id: conn_id)
|
||||
{
|
||||
# Schedule the TFTP analyzer for the expected next packet coming in on different
|
||||
# ports. We know that it will be exchanged between same IPs and reuse the
|
||||
# originator's port. "Spicy_TFTP" is the Zeek-side name of the TFTP analyzer
|
||||
# (generated from "Spicy::TFTP" in tftp.evt).
|
||||
Analyzer::schedule_analyzer(id$resp_h, id$orig_h, id$orig_p, Analyzer::ANALYZER_SPICY_TFTP, 1min);
|
||||
}
|
||||
|
||||
event tftp::read_request(c: connection, is_orig: bool, filename: string, mode: string)
|
||||
{
|
||||
print "TFTP read request", c$id, filename, mode;
|
||||
schedule_tftp_analyzer(c$id);
|
||||
}
|
||||
|
||||
event tftp::write_request(c: connection, is_orig: bool, filename: string, mode: string)
|
||||
{
|
||||
print "TFTP write request", c$id, filename, mode;
|
||||
schedule_tftp_analyzer(c$id);
|
||||
}
|
||||
|
||||
# Add handlers for other packet types so that we see their events being generated.
|
||||
event tftp::data(c: connection, is_orig: bool, block_num: count, data: string)
|
||||
{
|
||||
print "TFTP data", block_num, data;
|
||||
}
|
||||
|
||||
event tftp::ack(c: connection, is_orig: bool, block_num: count)
|
||||
{
|
||||
print "TFTP ack", block_num;
|
||||
}
|
||||
|
||||
event tftp::error(c: connection, is_orig: bool, code: count, msg: string)
|
||||
{
|
||||
print "TFTP error", code, msg;
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
protocol analyzer spicy::TFTP over UDP:
|
||||
parse with TFTP::Packet,
|
||||
port 69/udp;
|
||||
|
||||
import TFTP;
|
||||
|
||||
on TFTP::Request -> event tftp::request($conn, $is_orig, self.filename, self.mode);
|
|
@ -0,0 +1,4 @@
|
|||
event tftp::request(c: connection, is_orig: bool, filename: string, mode: string)
|
||||
{
|
||||
print "TFTP request", c$id, is_orig, filename, mode;
|
||||
}
|
7
doc/devel/spicy/examples/tftp-single-request.evt
Normal file
7
doc/devel/spicy/examples/tftp-single-request.evt
Normal file
|
@ -0,0 +1,7 @@
|
|||
protocol analyzer spicy::TFTP over UDP:
|
||||
parse with TFTP::Packet,
|
||||
port 69/udp;
|
||||
|
||||
import TFTP;
|
||||
|
||||
on TFTP::Request -> event tftp::request($conn);
|
4
doc/devel/spicy/examples/tftp-single-request.zeek
Normal file
4
doc/devel/spicy/examples/tftp-single-request.zeek
Normal file
|
@ -0,0 +1,4 @@
|
|||
event tftp::request(c: connection)
|
||||
{
|
||||
print "TFTP request", c$id;
|
||||
}
|
9
doc/devel/spicy/examples/tftp-two-requests.zeek
Normal file
9
doc/devel/spicy/examples/tftp-two-requests.zeek
Normal file
|
@ -0,0 +1,9 @@
|
|||
event tftp::read_request(c: connection, is_orig: bool, filename: string, mode: string)
|
||||
{
|
||||
print "TFTP read request", c$id, is_orig, filename, mode;
|
||||
}
|
||||
|
||||
event tftp::write_request(c: connection, is_orig: bool, filename: string, mode: string)
|
||||
{
|
||||
print "TFTP write request", c$id, is_orig, filename, mode;
|
||||
}
|
88
doc/devel/spicy/faq.rst
Normal file
88
doc/devel/spicy/faq.rst
Normal file
|
@ -0,0 +1,88 @@
|
|||
|
||||
===
|
||||
FAQ
|
||||
===
|
||||
|
||||
.. _faq_zeek_install_spicy_and_plugin_to_use_parsers:
|
||||
|
||||
.. rubric:: Do I need to install Spicy and/or a Zeek plugin to use Spicy parsers in Zeek?
|
||||
|
||||
If you're using Zeek >= 5.0 with a default build configuration,
|
||||
there's nothing else you need to install. After installing Zeek, the
|
||||
same folder containing the ``zeek`` binary will also have the relevant
|
||||
Spicy tools, such as ``spicyc`` (provided by Spicy) and ``spicyz``
|
||||
(provided by Zeek). To double check that the Spicy support is indeed
|
||||
available, look for ``Zeek::Spicy`` in the output of ``zeek -N``::
|
||||
|
||||
# zeek -N
|
||||
<...>
|
||||
Zeek::Spicy - Support for Spicy parsers (``*.spicy``, ``*.evt``, ``*.hlto``) (built-in)
|
||||
|
||||
Note that it remains possible to build Zeek against an external Spicy
|
||||
installation, or even without any Spicy support at all. Look at Zeek's
|
||||
``configure`` for corresponding options.
|
||||
|
||||
.. note::
|
||||
|
||||
For some historic background: Zeek 5.0 started bundling Spicy, as well
|
||||
as the former Zeek plugin for Spicy, so that now nothing else needs to
|
||||
be installed separately anymore to use Spicy parsers. Since Zeek 6.0,
|
||||
the code for that former plugin has further moved into Zeek itself,
|
||||
and is now maintained directly by the Zeek developers.
|
||||
|
||||
|
||||
.. _faq_zeek_spicy_dpd_support:
|
||||
|
||||
.. rubric:: Does Spicy support *Dynamic Protocol Detection (DPD)*?
|
||||
|
||||
Yes, see the :ref:`corresponding section <spicy_dpd>` on how to add it
|
||||
to your analyzers.
|
||||
|
||||
.. _faq_zeek_layer2_analyzer:
|
||||
|
||||
.. rubric:: Can I write a Layer 2 protocol analyzer with Spicy?
|
||||
|
||||
Yes, you can. In Zeek terminology a layer 2 protocol analyzer is a packet
|
||||
analyzer, see the :ref:`corresponding section <spicy_packet_analyzer>` on how
|
||||
to declare such an analyzer.
|
||||
|
||||
.. _faq_zeek_print_statements_no_effect:
|
||||
|
||||
.. rubric:: I have ``print`` statements in my Spicy grammar, why do I not see any output when running Zeek?
|
||||
|
||||
Zeek by default disables the output of Spicy-side ``print``
|
||||
statements. To enable them, add ``Spicy::enable_print=T`` to the Zeek
|
||||
command line (or ``redef Spicy::enable_print=T;`` to a Zeek script
|
||||
that you are loading).
|
||||
|
||||
.. _faq_zeek_tcp_analyzer_not_all_messages_recognized:
|
||||
|
||||
.. rubric:: My analyzer recognizes only one or two TCP packets even though there are more in the input.
|
||||
|
||||
In Zeek, a Spicy analyzer parses the sending and receiving sides of a TCP
|
||||
connection each according to the given Spicy grammar. This means that
|
||||
if more than one message can be sent per side the grammar needs to
|
||||
allow for that. For example, if the grammar parses messages of the
|
||||
protocol as ``Message``, the top-level parsing unit given in the EVT
|
||||
file needs to be able to parse a list of messages ``Message[]``.
|
||||
|
||||
One way to express this is to introduce a parser which wraps messages
|
||||
of the protocol in an :spicylink:`anonymous field
|
||||
<programming/parsing.html#anonymous-fields>`.
|
||||
|
||||
.. warning:: Since in general the number of messages exchanged over a TCP
|
||||
connection is unbounded, an anonymous field should be used. If a named field
|
||||
was used instead the parser would need to store all messages over the
|
||||
connection which would lead to unbounded memory growth.
|
||||
|
||||
.. code-block:: spicy
|
||||
|
||||
type Message = unit {
|
||||
# Fields for messages of the protocol.
|
||||
};
|
||||
|
||||
# Parser used e.g., in EVT file.
|
||||
public type Messages = unit {
|
||||
: Message[];
|
||||
};
|
||||
|
118
doc/devel/spicy/getting-started.rst
Normal file
118
doc/devel/spicy/getting-started.rst
Normal file
|
@ -0,0 +1,118 @@
|
|||
|
||||
===============
|
||||
Getting Started
|
||||
===============
|
||||
|
||||
Spicy's own :spicylink:`Getting Started <getting-started.html>` guide
|
||||
uses the following Spicy code to parse a simple HTTP request line:
|
||||
|
||||
.. literalinclude:: examples/my-http.spicy
|
||||
:lines: 4-
|
||||
:caption: my-http.spicy
|
||||
:language: spicy
|
||||
|
||||
While the Spicy documentation goes on to show :spicylink:`how to use
|
||||
this to parse corresponding data from the command line
|
||||
<getting-started.html#a-simple-parser>`, here we will instead leverage
|
||||
the ``RequestLine`` parser to build a proof-of-concept protocol
|
||||
analyzer for Zeek. While this all remains simplified here, the
|
||||
following, more in-depth :ref:`spicy_tutorial` demonstrates how
|
||||
to build a complete analyzer for a real protocol.
|
||||
|
||||
.. rubric:: Preparations
|
||||
|
||||
Because Zeek works from network packets, we first need a packet trace
|
||||
with the payload we want to parse. We can't just use a normal HTTP
|
||||
session as our simple parser wouldn't go further than just the first
|
||||
line of the protocol exchange and then bail out with an error. So
|
||||
instead, for our example we create a custom packet trace with a TCP
|
||||
connection that carries just a single HTTP request line as its
|
||||
payload::
|
||||
|
||||
# tcpdump -i lo0 -w request-line.pcap port 12345 &
|
||||
# nc -l 12345 &
|
||||
# echo "GET /index.html HTTP/1.0" | nc localhost 12345
|
||||
# killall tcpdump nc
|
||||
|
||||
This gets us :download:`this trace file <examples/request-line.pcap>`.
|
||||
|
||||
.. _example_spicy_my_http_adding_analyzer:
|
||||
|
||||
.. rubric:: Adding a Protocol Analyzer
|
||||
|
||||
Now we can go ahead and add a new protocol analyzer to Zeek. We
|
||||
already got the Spicy grammar to parse our connection's payload, it's
|
||||
in ``my-http.spicy``. In order to use this with Zeek, we have two
|
||||
additional things to do: (1) We need to let Zeek know about our new
|
||||
protocol analyzer, including when to use it; and (2) we need to define
|
||||
at least one Zeek event that we want our parser to generate, so that
|
||||
we can then write a Zeek script working with the information that it
|
||||
extracts.
|
||||
|
||||
We do both of these by creating an additional control file for Zeek:
|
||||
|
||||
.. literalinclude:: examples/my-http.evt
|
||||
:caption: my-http.evt
|
||||
:linenos:
|
||||
:language: spicy-evt
|
||||
|
||||
The first block (lines 1-3) tells Zeek that we have a new protocol
|
||||
analyzer to provide. The analyzer's Zeek-side name is
|
||||
``spicy::MyHTTP``, and it's meant to run on top of TCP connections
|
||||
(line 1). Lines 2-3 then provide Zeek with more specifics: The entry
|
||||
point for originator-side payload is the ``MyHTTP::RequestLine`` unit
|
||||
type that our Spicy grammar defines (line 2); and we want Zeek to
|
||||
activate our analyzer for all connections with a responder port of
|
||||
12345 (which, of course, matches the packet trace we created).
|
||||
|
||||
The second block (line 5) tells Zeek that we want to
|
||||
define one event. On the left-hand side of that line we give the unit
|
||||
that is to trigger the event. The right-hand side defines its name and
|
||||
arguments. What we are saying here is that every time a ``RequestLine``
|
||||
line has been fully parsed, we'd like a ``MyHTTP::request_line`` event
|
||||
to go to Zeek. Each event instance will come with four parameters:
|
||||
Three of them are the values of corresponding unit fields, accessed
|
||||
just through normal Spicy expressions (inside an event argument
|
||||
expression, ``self`` refers to the unit instance that has led to the
|
||||
generation of the current event). The first parameter, ``$conn``, is a
|
||||
"magic" keyword that passes the Zeek-side
|
||||
connection ID (``conn_id``) to the event.
|
||||
|
||||
Now we got everything in place that we need for our new protocol
|
||||
analyzer---except for a Zeek script actually doing something with the
|
||||
information we are parsing. Let's use this:
|
||||
|
||||
.. literalinclude:: examples/my-http.zeek
|
||||
:caption: my-http.zeek
|
||||
:language: zeek
|
||||
|
||||
You see an Zeek event handler for the event that we just defined,
|
||||
having the expected signature of four parameters matching the types of
|
||||
the parameter expressions that the ``*.evt`` file specifies. The
|
||||
handler's body then just prints out what it gets.
|
||||
|
||||
.. _example_spicy_my_http:
|
||||
|
||||
Finally we can put together our pieces by compiling the Spicy grammar and the
|
||||
EVT file into an HLTO file with ``spicyz``, and by pointing Zeek at the produced
|
||||
file and the analyzer-specific Zeek scripts::
|
||||
|
||||
# spicyz my-http.spicy my-http.evt -o my-http.hlto
|
||||
# zeek -Cr request-line.pcap my-http.hlto my-http.zeek
|
||||
Zeek saw from 127.0.0.1: GET /index.html 1.0
|
||||
|
||||
When Zeek starts up here the Spicy integration registers a protocol analyzer to
|
||||
the entry point of our Spicy grammar as specified in the EVT file. It then
|
||||
begins processing the packet trace as usual, now activating our new analyzer
|
||||
whenever it sees a TCP connection on port 12345. Accordingly, the
|
||||
``MyHTTP::request_line`` event gets generated once the parser gets to process
|
||||
the session's payload. The Zeek event handler then executes and prints the
|
||||
output we would expect.
|
||||
|
||||
.. note::
|
||||
|
||||
By default, Zeek suppresses any output from Spicy-side
|
||||
``print`` statements. You can add ``Spicy::enable_print=T`` to the
|
||||
command line to see it. In the example above, you would then get
|
||||
an additional line of output: ``GET, /index.html, 1.0``.
|
||||
|
73
doc/devel/spicy/index.rst
Normal file
73
doc/devel/spicy/index.rst
Normal file
|
@ -0,0 +1,73 @@
|
|||
============================
|
||||
Writing Analyzers with Spicy
|
||||
============================
|
||||
|
||||
:spicylink:`Spicy <index.html>` is a parser generator that makes it
|
||||
easy to create robust C++ parsers for network protocols, file formats,
|
||||
and more. Zeek supports integrating Spicy analyzers so that one can
|
||||
create Zeek protocol, packet and file analyzers. This section digs
|
||||
into how that integration works. We begin with a short "Getting
|
||||
Started" guide showing you the basics of using Spicy with Zeek,
|
||||
followed by an in-depth tutorial on adding a complete protocol
|
||||
analyzer to Zeek. The final part consists of a reference section
|
||||
documenting everything the Spicy integration supports.
|
||||
|
||||
While this documentation walks through all the bits and pieces that an
|
||||
analyzer consists of, there's an easy way to get started when writing
|
||||
a new analyzer from scratch: the `Zeek package manager
|
||||
<https://docs.zeek.org/projects/package-manager>`_ can create analyzer
|
||||
scaffolding for you that includes an initial Spicy grammar
|
||||
(``*.spicy``), Zeek integration glue code (``*.evt``; see below) and a
|
||||
corresponding CMake build setup. To create that scaffolding, use the
|
||||
package managers ``create`` command and pass one of
|
||||
``--features=spicy-protocol-analyzer``,
|
||||
``--features=spicy-packet-analyzer``, or
|
||||
``--features=spicy-file-analyzer`` to create a Zeek protocol, packet,
|
||||
or file analyzer, respectively. See :ref:`the tutorial
|
||||
<zkg_create_package>` for more on this.
|
||||
|
||||
Note that Zeek itself installs the grammars of its builtin Spicy
|
||||
analyzers for potential reuse. For example, the `Finger grammar
|
||||
<https://github.com/zeek/zeek/blob/master/src/analyzer/protocol/finger/finger.spicy>`_
|
||||
gets installed to ``<PREFIX>/share/spicy/finger/finger.spicy``. It can
|
||||
be used in custom code by importing it with ``import Finger from
|
||||
finger;``.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Table of Contents
|
||||
|
||||
installation
|
||||
getting-started
|
||||
tutorial
|
||||
reference
|
||||
faq
|
||||
|
||||
.. note::
|
||||
|
||||
This documentation focuses on writing *external* Spicy analyzers
|
||||
that you can load into Zeek at startup. Zeek also comes with the
|
||||
infrastructure to build Spicy analyzers directly into the
|
||||
executable itself, just like traditional built-in analyzers. We
|
||||
will document this more as we're converting more of Zeek's built-in
|
||||
analyzers over to Spicy. For now, we recommend locking at one of
|
||||
the existing built-in Spicy analyzers (Syslog, Finger) as examples.
|
||||
|
||||
.. _spicy_terminology:
|
||||
|
||||
Terminology
|
||||
===========
|
||||
|
||||
A word on terminology: In Zeek, the term "analyzer" generally refers
|
||||
to a component that processes a particular protocol ("protocol
|
||||
analyzer"), file format ("file analyzer"), or low-level packet
|
||||
structure ("packet analyzer"). "Processing" here means more than just
|
||||
parsing content: An analyzer controls when it wants to be used (e.g.,
|
||||
with connections on specific ports, or with files of a specific MIME
|
||||
type); what events to generate for Zeek's scripting layer; and how to
|
||||
handle any errors occurring during parsing. While Spicy itself focuses
|
||||
just on the parsing part, Spicy makes it possible to provide the
|
||||
remaining pieces to Zeek, turning a Spicy parser into a full Zeek
|
||||
analyzer. That's what we refer to as a "Spicy (protocol/file/packet)
|
||||
analyzer" for Zeek.
|
||||
|
18
doc/devel/spicy/installation.rst
Normal file
18
doc/devel/spicy/installation.rst
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
.. _spicy_installation:
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
Since Zeek version 5.0, support for Spicy is built right into Zeek by
|
||||
default. To confirm that Spicy is indeed available, you can inspect
|
||||
the output of ``zeek -N``::
|
||||
|
||||
# zeek -N Zeek::Spicy
|
||||
Zeek::Spicy - Support for Spicy parsers (*.hlto) (built-in)
|
||||
|
||||
It remains possible to build Zeek against an external Spicy
|
||||
installation through Zeek's ``configure`` option
|
||||
``--with-spicy=PATH``, where ``PATH`` points to the Spicy installation
|
||||
directory. In that case, you also need to ensure that the Spicy tools
|
||||
(e.g., ``spicyc``, ``spicy-config``) are available in ``PATH``.
|
1189
doc/devel/spicy/reference.rst
Normal file
1189
doc/devel/spicy/reference.rst
Normal file
File diff suppressed because it is too large
Load diff
441
doc/devel/spicy/tutorial.rst
Normal file
441
doc/devel/spicy/tutorial.rst
Normal file
|
@ -0,0 +1,441 @@
|
|||
|
||||
.. _spicy_tutorial:
|
||||
|
||||
Tutorial
|
||||
========
|
||||
|
||||
This tutorial walks through the integration of a simple TFTP analyzer
|
||||
into Zeek. This discussion continues the example from
|
||||
:spicylink:`Spicy's own tutorial <tutorial/index.html>` that develops
|
||||
the TFTP grammar, now focusing on how to use it with Zeek. Please go
|
||||
through that Spicy tutorial first before continuing here.
|
||||
|
||||
To turn a Spicy-side grammar into a Zeek analyzer, we need to provide
|
||||
Zeek with a description of how to employ it. There are two parts to
|
||||
that: Telling Zeek when to activate the analyzer, and defining events
|
||||
to generate. In addition, we will need a Zeek-side script to do
|
||||
something with our new TFTP events. We will walk through this in the
|
||||
following, starting with the mechanics of compiling the Spicy analyzer
|
||||
for Zeek. While we will build up the files involved individually
|
||||
first, see the :ref:`final section <zkg_create_package>` for how the
|
||||
Zeek package manager, *zkg*, can be used to bootstrap a new Zeek
|
||||
package with a skeleton of everything needed for an analyzer.
|
||||
|
||||
Before proceeding, make sure that your Zeek comes with Spicy support
|
||||
built-in---which is the default since Zeek version 5.0::
|
||||
|
||||
# zeek -N Zeek::Spicy
|
||||
Zeek::Spicy - Support for Spicy parsers (*.hlto) (built-in)
|
||||
|
||||
You should also have ``spicyz`` in your ``PATH``::
|
||||
|
||||
# which spicyz
|
||||
/usr/local/zeek/bin/spicyz
|
||||
|
||||
.. note::
|
||||
|
||||
There are a number of pieces involved in creating a full Zeek
|
||||
analyzer, in particular if you want to distribute it as a Zeek
|
||||
package. To help you get started with that, Zeek's package manager
|
||||
can create a skeleton Spicy package by running::
|
||||
|
||||
# zkg create --features=spicy-protocol-analyzer --packagedir <packagedir>
|
||||
|
||||
The generated files mark places that will need manual editing with
|
||||
``TODO``. See the :ref:`tutorial <zkg_create_package>` for more on
|
||||
this.
|
||||
|
||||
Compiling the Analyzer
|
||||
----------------------
|
||||
|
||||
Zeek comes with a tool :ref:`spicyz <spicyz>` that compiles Spicy
|
||||
analyzers into binary code that Zeek can load through a Spicy plugin.
|
||||
The following command line produces a binary object file ``tftp.hlto``
|
||||
containing the executable analyzer code:
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy
|
||||
|
||||
Below, we will prepare an additional interface definition file
|
||||
``tftp.evt`` that describes the analyzer's integration into Zeek. We
|
||||
will need to give that to ``spicyz`` as well, and our full
|
||||
compilation command hence becomes:
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy tftp.evt
|
||||
|
||||
When starting Zeek, we add ``tftp.hlto`` to its command line:
|
||||
|
||||
.. code::
|
||||
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto
|
||||
|
||||
|
||||
Activating the Analyzer
|
||||
-----------------------
|
||||
|
||||
In *Getting Started*, :ref:`we already saw
|
||||
<example_spicy_my_http_adding_analyzer>` how to inform Zeek about a new
|
||||
protocol analyzer. We follow the same scheme here and put the
|
||||
following into ``tftp.evt``, the analyzer definition file:
|
||||
|
||||
.. literalinclude:: autogen/tftp.evt
|
||||
:lines: 5-7
|
||||
:language: spicy-evt
|
||||
|
||||
The first line provides our analyzer with a Zeek-side name
|
||||
(``spicy::TFTP``) and also tells Zeek that we are adding an
|
||||
application analyzer on top of UDP (``over UDP``). ``TFTP::Packet``
|
||||
provides the top-level entry point for parsing both sides of a TFTP
|
||||
connection. Furthermore, we want Zeek to automatically activate our
|
||||
analyzer for all sessions on UDP port 69 (i.e., TFTP's well known
|
||||
port). See :ref:`spicy_evt_analyzer_setup` for more details on defining
|
||||
such a ``protocol analyzer`` section.
|
||||
|
||||
.. note::
|
||||
|
||||
We use the ``port`` attribute in the ``protocol analyzer`` section
|
||||
mainly for convenience; it's not the only way to define the
|
||||
well-known ports. For a production analyzer, it's more idiomatic
|
||||
to use the a Zeek script instead; see :ref:`this note
|
||||
<zeek_init_instead_of_port>` for more information.
|
||||
|
||||
With this in place, we can already employ the analyzer inside Zeek. It
|
||||
will not generate any events yet, but we can at least see the output of
|
||||
the ``on %done { print self; }`` hook that still remains part of the
|
||||
grammar from earlier:
|
||||
|
||||
.. code::
|
||||
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto Spicy::enable_print=T
|
||||
[$opcode=Opcode::RRQ, $rrq=[$filename=b"rfc1350.txt", $mode=b"octet"], $wrq=(not set), $data=(not set), $ack=(not set), $error=(not set)]
|
||||
|
||||
As by default, the Zeek plugin does not show the output of Spicy-side
|
||||
``print`` statements, we added ``Spicy::enable_print=T`` to the
|
||||
command line to turn that on. We see that Zeek took care of the
|
||||
lower network layers, extracted the UDP payload from the Read Request,
|
||||
and passed that into our Spicy parser. (If you want to view more about
|
||||
the internals of what is happening here, there are a couple kinds of
|
||||
:ref:`debug output available <spicy_debugging>`.)
|
||||
|
||||
You might be wondering why there is only one line of output, even
|
||||
though there are multiple TFTP packets in our pcap trace. Shouldn't
|
||||
the ``print`` execute multiple times? Yes, it should, but it does not
|
||||
currently: Due to some intricacies of the TFTP protocol, our analyzer
|
||||
gets to see only the first packet for now. We will fix this later. For
|
||||
now, we focus on the Read Request packet that the output above shows.
|
||||
|
||||
Defining Events
|
||||
---------------
|
||||
|
||||
The core task of any Zeek analyzer is to generate events for Zeek
|
||||
scripts to process. For binary protocols, events will often correspond
|
||||
pretty directly to data units specified by their specifications---and
|
||||
TFTP is no exception. We start with an event for Read/Write Requests
|
||||
by adding this definition to ``tftp.evt``:
|
||||
|
||||
.. literalinclude:: examples/tftp-single-request.evt
|
||||
:lines: 5-7
|
||||
:language: spicy-evt
|
||||
|
||||
The first line makes our Spicy TFTP grammar available to the rest of
|
||||
the file. The line ``on ...`` defines one event: Every time a
|
||||
``Request`` unit will be parsed, we want to receive an event
|
||||
``tftp::request`` with one parameter: the connection it belongs to.
|
||||
Here, ``$conn`` is a reserved identifier that will turn into the
|
||||
standard `connection record
|
||||
<https://docs.zeek.org/en/current/scripts/base/init-bare.zeek.html#type-connection>`_
|
||||
record on the Zeek side.
|
||||
|
||||
Now we need a Zeek event handler for our new event. Let's put this
|
||||
into ``tftp.zeek``:
|
||||
|
||||
.. literalinclude:: examples/tftp-single-request.zeek
|
||||
:language: zeek
|
||||
|
||||
Running Zeek then gives us:
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy tftp.evt
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
|
||||
TFTP request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp]
|
||||
|
||||
Let's extend the event signature a bit by passing further arguments:
|
||||
|
||||
.. literalinclude:: examples/tftp-single-request-more-args.evt
|
||||
:lines: 5-7
|
||||
:language: spicy-evt
|
||||
|
||||
This shows how each parameter gets specified as a Spicy expression:
|
||||
``self`` refers to the instance currently being parsed (``self``), and
|
||||
``self.filename`` retrieves the value of its ``filename`` field.
|
||||
``$is_orig`` is another reserved ID that turns into a boolean that
|
||||
will be true if the event has been triggered by originator-side
|
||||
traffic. On the Zeek side, our event now has the following signature:
|
||||
|
||||
.. literalinclude:: examples/tftp-single-request-more-args.zeek
|
||||
:language: zeek
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy tftp.evt
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
|
||||
TFTP request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], T, rfc1350.txt, octet
|
||||
|
||||
Going back to our earlier discussion of Read vs Write Requests, we do
|
||||
not yet make that distinction with the ``request`` event that we are
|
||||
sending to Zeek-land. However, since we had introduced the ``is_read``
|
||||
unit parameter, we can easily separate the two by gating event
|
||||
generation through an additional ``if`` condition:
|
||||
|
||||
.. literalinclude:: autogen/tftp.evt
|
||||
:lines: 11-12
|
||||
:language: spicy-evt
|
||||
|
||||
This now defines two separate events, each being generated only for
|
||||
the corresponding value of ``is_read``. Let's try it with a new
|
||||
``tftp.zeek``:
|
||||
|
||||
.. literalinclude:: examples/tftp-two-requests.zeek
|
||||
:language: zeek
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy tftp.evt
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
|
||||
TFTP read request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], T, rfc1350.txt, octet
|
||||
|
||||
If we look at the :file:`conn.log` that Zeek produces during this run, we
|
||||
will see that the ``service`` field is not filled in yet. That's
|
||||
because our analyzer does not yet confirm to Zeek that it has been
|
||||
successful in parsing the content. To do that, we can call a library
|
||||
function that Spicy makes available once we have successfully parsed a
|
||||
request: :spicylink:`spicy::accept_input
|
||||
<programming/library.html#spicy-accept-input>`. That function signals
|
||||
the host application---i.e., Zeek in our case—--that the parser is
|
||||
processing the expected protocol.
|
||||
|
||||
First, we need to make sure the Spicy standard library is imported
|
||||
in ``tftp.spicy``, so that we will have its functions available:
|
||||
|
||||
.. code::
|
||||
|
||||
import spicy;
|
||||
|
||||
With that, our request looks like this now:
|
||||
|
||||
.. code-block::
|
||||
|
||||
type Request = unit(is_read: bool) {
|
||||
filename: bytes &until=b"\x00";
|
||||
mode: bytes &until=b"\x00";
|
||||
|
||||
on %done { spicy::accept_input(); }
|
||||
};
|
||||
|
||||
|
||||
Let's try it again:
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy tftp.evt
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
|
||||
TFTP read request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], T, rfc1350.txt, octet
|
||||
# cat conn.log
|
||||
[...]
|
||||
1367411051.972852 C1f7uj4uuv6zu2aKti 192.168.0.253 50618 192.168.0.10 69 udp spicy_tftp - - - S0 - -0 D 1 48 0 0 -
|
||||
[...]
|
||||
|
||||
Now the service field says TFTP! (There will be a 2nd connection in
|
||||
the log that we are not showing here; see the next section on that).
|
||||
|
||||
Turning to the other TFTP packet types, it is straight-forward to add
|
||||
events for them as well. The following is our complete ``tftp.evt``
|
||||
file:
|
||||
|
||||
.. literalinclude:: autogen/tftp.evt
|
||||
:lines: 5-
|
||||
:language: spicy-evt
|
||||
|
||||
|
||||
|
||||
Detour: Zeek vs. TFTP
|
||||
---------------------
|
||||
|
||||
We noticed above that Zeek seems to be seeing only a single TFTP
|
||||
packet from our input trace, even though ``tcpdump`` shows that the
|
||||
pcap file contains multiple different types of packets. The reason
|
||||
becomes clear once we look more closely at the UDP ports that are in
|
||||
use:
|
||||
|
||||
.. code::
|
||||
|
||||
# tcpdump -ttnr tftp_rrq.pcap
|
||||
1367411051.972852 IP 192.168.0.253.50618 > 192.168.0.10.69: 20 RRQ "rfc1350.txtoctet" [tftp]
|
||||
1367411052.077243 IP 192.168.0.10.3445 > 192.168.0.253.50618: UDP, length 516
|
||||
1367411052.081790 IP 192.168.0.253.50618 > 192.168.0.10.3445: UDP, length 4
|
||||
1367411052.086300 IP 192.168.0.10.3445 > 192.168.0.253.50618: UDP, length 516
|
||||
1367411052.088961 IP 192.168.0.253.50618 > 192.168.0.10.3445: UDP, length 4
|
||||
1367411052.088995 IP 192.168.0.10.3445 > 192.168.0.253.50618: UDP, length 516
|
||||
[...]
|
||||
|
||||
Turns out that only the first packet is using the well-known TFTP port
|
||||
69/udp, whereas all the subsequent packets use ephemeral ports. Due to
|
||||
the port difference, Zeek believes it is seeing two independent
|
||||
network connections, and it does not associate TFTP with the second
|
||||
one at all due to its lack of the well-known port (neither does
|
||||
``tcpdump``!). Zeek's connection log confirms this by showing two
|
||||
separate entries:
|
||||
|
||||
.. code::
|
||||
|
||||
# cat conn.log
|
||||
1367411051.972852 CH3xFz3U1nYI1Dp1Dk 192.168.0.253 50618 192.168.0.10 69 udp spicy_tftp - - - S0 - - 0 D 1 48 0 0 -
|
||||
1367411052.077243 CfwsLw2TaTIeo3gE9g 192.168.0.10 3445 192.168.0.253 50618 udp - 0.181558 24795 196 SF - - 0 Dd 49 26167 49 1568 -
|
||||
|
||||
Switching the ports for subsequent packets is a quirk in TFTP that
|
||||
resembles similar behaviour in standard FTP, where data connections
|
||||
get set up separately as well. Fortunately, Zeek provides a built-in
|
||||
function to designate a specific analyzer for an anticipated future
|
||||
connection. We can call that function when we see the initial request:
|
||||
|
||||
.. literalinclude:: examples/tftp-schedule-analyzer.zeek
|
||||
:language: zeek
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy tftp.evt
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
|
||||
TFTP read request, [orig_h=192.168.0.253, orig_p=50618/udp, resp_h=192.168.0.10, resp_p=69/udp], rfc1350.txt, octet
|
||||
TFTP data, 1, \x0a\x0a\x0a\x0a\x0a\x0aNetwork Working Group [...]
|
||||
TFTP ack, 1
|
||||
TFTP data, 2, B Official Protocol\x0a Standards" for the [...]
|
||||
TFTP ack, 2
|
||||
TFTP data, 3, protocol was originally designed by Noel Chia [...]
|
||||
TFTP ack, 3
|
||||
TFTP data, 4, r mechanism was suggested by\x0a PARC's EFT [...]
|
||||
TFTP ack, 4
|
||||
[...]
|
||||
|
||||
Now we are seeing all the packets as we would expect.
|
||||
|
||||
|
||||
Zeek Script
|
||||
-----------
|
||||
|
||||
Analyzers normally come along with a Zeek-side script that implements
|
||||
a set of standard base functionality, such as recording activity into
|
||||
a protocol specific log file. These scripts provide handlers for the
|
||||
analyzers' events, and collect and correlate their activity as
|
||||
desired. We have created such :download:`a script for TFTP
|
||||
<autogen/tftp.zeek>`, based on the events that our Spicy analyzer
|
||||
generates. Once we add that to the Zeek command line, we will see a
|
||||
new :file:`tftp.log`:
|
||||
|
||||
.. code::
|
||||
|
||||
# spicyz -o tftp.hlto tftp.spicy tftp.evt
|
||||
# zeek -r tftp_rrq.pcap tftp.hlto tftp.zeek
|
||||
# cat tftp.log
|
||||
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p wrq fname mode uid_data size block_sent block_acked error_code error_msg
|
||||
1367411051.972852 CKWH8L3AIekSHYzBU 192.168.0.253 50618 192.168.0.10 69 F rfc1350.txt octet ClAr3P158Ei77Fql8h 24599 49 49 - -
|
||||
|
||||
The TFTP script also labels the second session as TFTP data by
|
||||
adding a corresponding entry to the ``service`` field inside the
|
||||
Zeek-side connection record. With that, we are now seeing this in
|
||||
:file:`conn.log`:
|
||||
|
||||
.. code::
|
||||
|
||||
1367411051.972852 ChbSfq3QWKuNirt9Uh 192.168.0.253 50618 192.168.0.10 69 udp spicy_tftp - - - S0 - -0 D 1 48 0 0 -
|
||||
1367411052.077243 CowFQj20FHHduhHSYk 192.168.0.10 3445 192.168.0.253 50618 udp spicy_tftp_data 0.181558 24795 196 SF -- 0 Dd 49 26167 49 1568 -
|
||||
|
||||
The TFTP script ends up being a bit more complex than one would expect
|
||||
for such a simple protocol. That's because it tracks the two related
|
||||
connections (initial request and follow-up traffic on a different
|
||||
port), and combines them into a single TFTP transaction for logging.
|
||||
Since there is nothing Spicy-specific in that Zeek script, we skip
|
||||
discussing it here in more detail.
|
||||
|
||||
|
||||
.. _zkg_create_package:
|
||||
|
||||
Creating a Zeek Package
|
||||
-----------------------
|
||||
|
||||
We have now assembled all the parts needed for providing a new
|
||||
analyzer to Zeek. By adding a few further pieces, we can wrap that
|
||||
analyzer into a full *Zeek package* for others to install easily
|
||||
through *zkg*. To help create that wrapping, *zkg* provides a template
|
||||
for instantiating a skeleton analyzer package as a starting point. The
|
||||
skeleton comes in three different flavors, depending on which kind of
|
||||
analyzer you want to create: protocol, file, or packet analyzer.
|
||||
In each case, it creates all the necessary files along with the
|
||||
appropriate directory layout, and even includes a couple of
|
||||
standard test cases.
|
||||
|
||||
To create the scaffolding for our TFTP analyzer, execute the following
|
||||
command and provide the requested information::
|
||||
|
||||
# zkg create --features spicy-protocol-analyzer --packagedir spicy-tftp
|
||||
"package-template" requires a "name" value (the name of the package, e.g. "FooBar" or "spicy-http"):
|
||||
name: spicy-tftp
|
||||
"package-template" requires a "analyzer" value (name of the Spicy analyzer, which typically corresponds to the protocol/format being parsed (e.g. "HTTP", "PNG")):
|
||||
analyzer: TFTP
|
||||
"package-template" requires a "protocol" value (transport protocol for the analyzer to use: TCP or UDP):
|
||||
protocol: UDP
|
||||
"package-template" requires a "unit_orig" value (name of the top-level Spicy parsing unit for the originator side of the connection (e.g. "Request")):
|
||||
unit_orig: Packet
|
||||
"package-template" requires a "unit_resp" value (name of the top-level Spicy parsing unit for the responder side of the connection (e.g. "Reply"); may be the same as originator side):
|
||||
unit_resp: Packet
|
||||
|
||||
|
||||
The above creates the following files (skipping anything related to
|
||||
``.git``)::
|
||||
|
||||
spicy-tftp/CMakeLists.txt
|
||||
spicy-tftp/COPYING
|
||||
spicy-tftp/README
|
||||
spicy-tftp/analyzer/CMakeLists.txt
|
||||
spicy-tftp/analyzer/tftp.evt
|
||||
spicy-tftp/analyzer/tftp.spicy
|
||||
spicy-tftp/cmake/FindSpicyPlugin.cmake
|
||||
spicy-tftp/scripts/__load__.zeek
|
||||
spicy-tftp/scripts/dpd.sig
|
||||
spicy-tftp/scripts/main.zeek
|
||||
spicy-tftp/testing/Baseline/tests.run-pcap/conn.log
|
||||
spicy-tftp/testing/Baseline/tests.run-pcap/output
|
||||
spicy-tftp/testing/Baseline/tests.standalone/
|
||||
spicy-tftp/testing/Baseline/tests.standalone/output
|
||||
spicy-tftp/testing/Baseline/tests.trace/output
|
||||
spicy-tftp/testing/Baseline/tests.trace/tftp.log
|
||||
spicy-tftp/testing/Files/random.seed
|
||||
spicy-tftp/testing/Makefile
|
||||
spicy-tftp/testing/Scripts/README
|
||||
spicy-tftp/testing/Scripts/diff-remove-timestamps
|
||||
spicy-tftp/testing/Scripts/get-zeek-env
|
||||
spicy-tftp/testing/Traces/tcp-port-12345.pcap
|
||||
spicy-tftp/testing/Traces/udp-port-12345.pcap
|
||||
spicy-tftp/testing/btest.cfg
|
||||
spicy-tftp/testing/tests/availability.zeek
|
||||
spicy-tftp/testing/tests/standalone.spicy
|
||||
spicy-tftp/testing/tests/trace.zeek
|
||||
spicy-tftp/zkg.meta
|
||||
|
||||
|
||||
Note the ``*.evt``, ``*.spicy``, ``*.zeek`` files: they correspond to
|
||||
the files we created for TFTP in the preceding sections; we can just
|
||||
move our versions in there. Furthermore, the generated scaffolding
|
||||
marks places with ``TODO`` that need manual editing: use ``git grep
|
||||
TODO`` inside the ``spicy-tftp`` directory to find them. We won't go
|
||||
through all the specific customizations for TFTP here, but for
|
||||
reference you can find the full TFTP package as created from the *zkg*
|
||||
template on `GitHub <https://github.com/zeek/spicy-tftp>`_.
|
||||
|
||||
If instead of a protocol analyzer, you'd like to create a file or
|
||||
packet analyzer, run zkg with ``--features spicy-file-analyzer`` or
|
||||
``--features spicy-packet-analyzer``, respectively. The generated
|
||||
skeleton will be suitably adjusted then.
|
317
doc/devel/websocket-api.rst
Normal file
317
doc/devel/websocket-api.rst
Normal file
|
@ -0,0 +1,317 @@
|
|||
.. _websocket-api:
|
||||
|
||||
.. _websocat: https://github.com/vi/websocat
|
||||
|
||||
|
||||
======================================
|
||||
Interacting with Zeek using WebSockets
|
||||
======================================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
Usually, Zeek produces protocol logs consumed by external applications. These
|
||||
external applications might be SIEMs, real-time streaming analysis platforms
|
||||
or basic archival processes compressing logs for long term storage.
|
||||
|
||||
Certain use-cases require interacting and influencing Zeek's runtime behavior
|
||||
outside of static configuration via ``local.zeek``.
|
||||
|
||||
The classic :ref:`framework-input` and :ref:`framework-configuration` can be
|
||||
leveraged for runtime configuration of Zeek as well as triggering arbitrary
|
||||
events or script execution via option handlers. These frameworks are mostly
|
||||
file- or process-based and may feel a bit unusual in environments where creation
|
||||
of files is uncommon or even impossible due to separation of concerns. In many
|
||||
of today's environments, interacting using HTTP-based APIs or other remote
|
||||
interfaces is more common.
|
||||
|
||||
.. note::
|
||||
|
||||
As an aside, if you need more flexibility than the WebSocket API offers today,
|
||||
an alternative could be to use :ref:`javascript` within Zeek. This opens the
|
||||
possibility to run a separate HTTP or a totally different Node.js based server
|
||||
within a Zeek process for quick experimentation and evaluation of other
|
||||
approaches.
|
||||
|
||||
Background and Setup
|
||||
====================
|
||||
|
||||
Since Zeek 5.0, Zeek allows connections from external clients over WebSocket.
|
||||
This allows these clients to interact with Zeek's publish-subscribe layer and
|
||||
exchange Zeek events with other Zeek nodes.
|
||||
Initially, this implementation resided in the Broker subsystem.
|
||||
With Zeek 8.0, most of the implementation has been moved into core Zeek
|
||||
itself with the v1 serialization format remaining in Broker.
|
||||
|
||||
WebSocket clients may subscribe to a fixed set of topics and will receive
|
||||
Zeek events matching these topics that Zeek cluster nodes, but also other
|
||||
WebSocket clients, publish.
|
||||
|
||||
With Zeek 8.0, Zeekctl has received support to interact with Zeek cluster nodes
|
||||
using the WebSocket protocol. If you're running a Zeekctl based cluster and
|
||||
want to experiment with WebSocket functionality, add ``UseWebSocket = 1`` to
|
||||
your ``zeekctl.cfg``:
|
||||
|
||||
.. code-block:: ini
|
||||
|
||||
# zeekctl.cfg
|
||||
...
|
||||
UseWebSocket = 1
|
||||
|
||||
This will essentially add the following snippet, enabling a WebSocket server
|
||||
on the Zeek manager:
|
||||
|
||||
.. code-block:: zeek
|
||||
:caption: websocket.zeek
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||
{
|
||||
Cluster::listen_websocket([
|
||||
$listen_addr=127.0.0.1,
|
||||
$listen_port=27759/tcp,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
To verify that the WebSocket API is functional in your deployment use, for example,
|
||||
`websocat`_ as a quick check.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ echo '[]' | websocat ws://127.0.0.1:27759/v1/messages/json
|
||||
{"type":"ack","endpoint":"3eece35d-9f94-568d-861c-6a16c433e090-websocket-2","version":"8.0.0-dev.684"}
|
||||
|
||||
Zeek's ``cluster.log`` file will also have an entry for the WebSocket client connection.
|
||||
The empty array in the command specifies the client's subscriptions, in this case none.
|
||||
|
||||
Version 1
|
||||
=========
|
||||
|
||||
The currently implemented protocol is accessible at ``/v1/messages/json``.
|
||||
The `data representation <https://docs.zeek.org/projects/broker/en/current/web-socket.html#data-representation>`_
|
||||
is documented in detail within the Broker project. Note that this format is a
|
||||
direct translation of Broker's binary format into JSON, resulting in a fairly
|
||||
tight coupling between WebSocket clients and the corresponding Zeek scripts.
|
||||
Most prominently is the representation of record values as vectors instead
|
||||
of objects, making the protocol sensitive against reordering or introduction
|
||||
of optional fields to records.
|
||||
|
||||
.. note::
|
||||
|
||||
We're looking into an iteration of the format. If you have feedback or
|
||||
would like to contribute, please reach out on the usual community channels.
|
||||
|
||||
|
||||
Handshake and Acknowledgement
|
||||
-----------------------------
|
||||
|
||||
The first message after a WebSocket connection has been established originates
|
||||
from the client. This message is a JSON array of strings that represent the
|
||||
topics the WebSocket client wishes to subscribe to.
|
||||
|
||||
Zeek replies with an acknowledgement message that's a JSON object or an error.
|
||||
|
||||
Events
|
||||
------
|
||||
|
||||
After the acknowledgement, WebSocket clients receive all events arriving on
|
||||
topics they have subscribed to.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ websocat ws://127.0.0.1:27759/v1/messages/json
|
||||
["zeek.test"]
|
||||
{"type":"ack","endpoint":"d955d990-ad8a-5ed4-8bc5-bee252d4a2e6-websocket-0","version":"8.0.0-dev.684"}
|
||||
{"type":"data-message","topic":"zeek.test","@data-type":"vector","data":[{"@data-type":"count","data":1},{"@data-type":"count","data":1},{"@data-type":"vector","data":[{"@data-type":"string","data":"hello"},{"@data-type":"vector","data":[{"@data-type":"count","data":3}]},{"@data-type":"vector","data":[]}]}]}
|
||||
|
||||
The received messages, again, are encoded in Broker's JSON format. Above ``data-message``
|
||||
represents an event received on topic ``zeek.test``. The event's name is ``hello``.
|
||||
This event has a single argument of type :zeek:type:`count`. In the example above
|
||||
its value is ``3``.
|
||||
|
||||
To send events, WebSocket clients similarly encode their event representation
|
||||
to Broker's JSON format and send them as `text data frames <https://datatracker.ietf.org/doc/html/rfc6455#section-5.6>`_.
|
||||
|
||||
|
||||
X-Application-Name Header
|
||||
-------------------------
|
||||
|
||||
When a WebSocket client includes an ``X-Application-Name`` HTTP header in
|
||||
the initial WebSocket Handshake's GET request, that header's value is available
|
||||
in the :zeek:see:`Cluster::websocket_client_added` event's ``endpoint`` argument (see :zeek:see:`Cluster::EndpointInfo`).
|
||||
|
||||
The header's value will also be included in ``cluster.log`` messages.
|
||||
|
||||
Additionally, if the cluster telemetry for WebSocket clients is set to
|
||||
:zeek:see:`Cluster::Telemetry::VERBOSE` or :zeek:see:`Cluster::Telemetry::DEBUG`
|
||||
via :zeek:see:`Cluster::Telemetry::websocket_metrics`, the header's value is
|
||||
included as ``app`` label in metrics exposed by the :ref:`framework-telemetry`.
|
||||
|
||||
As of Zeek 8.0, a WebSocket client will be rejected if the header is set, but
|
||||
its value doesn't match ``[-/_.=:*@a-zA-Z0-9]+``.
|
||||
|
||||
|
||||
Language Bindings
|
||||
-----------------
|
||||
|
||||
Note that it's possible to use any language that offers WebSocket bindings.
|
||||
The ones listed below mostly add a bit of convenience features around the
|
||||
initial Handshake message, error handling and serializing Zeek events and
|
||||
values into the Broker-specific serialization format.
|
||||
|
||||
For example, using the Node.js `builtin WebSocket functionality <https://nodejs.org/en/learn/getting-started/websocket>`_,
|
||||
the ``websocat`` example from above can be reproduced as follows:
|
||||
|
||||
.. code-block:: javascript
|
||||
:caption: client.js
|
||||
|
||||
// client.js
|
||||
const socket = new WebSocket('ws://192.168.122.107:27759/v1/messages/json');
|
||||
|
||||
socket.addEventListener('open', event => {
|
||||
socket.send('["zeek.test"]');
|
||||
});
|
||||
|
||||
socket.addEventListener('message', event => {
|
||||
console.log('Message from server: ', event.data);
|
||||
});
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ node ./client.js
|
||||
Message from server: {"type":"ack","endpoint":"2e951b0c-3ca4-504c-ae8a-5d3750fec588-websocket-10","version":"8.0.0-dev.684"}
|
||||
Message from server: {"type":"data-message","topic":"zeek.test","@data-type":"vector","data":[{"@data-type":"count","data":1},{"@data-type":"count","data":1},{"@data-type":"vector","data":[{"@data-type":"string","data":"hello"},{"@data-type":"vector","data":[{"@data-type":"count","data":374}]},{"@data-type":"vector","data":[]}]}]}
|
||||
|
||||
|
||||
Golang
|
||||
^^^^^^
|
||||
|
||||
* `Zeek Broker websocket interface library for Golang <https://github.com/corelight/go-zeek-broker-ws>`_ (not an official Zeek project)
|
||||
|
||||
|
||||
Rust
|
||||
^^^^
|
||||
|
||||
* `Rust types for interacting with Zeek over WebSocket <https://github.com/bbannier/zeek-websocket-rs>`_ (not an official Zeek project)
|
||||
|
||||
Python
|
||||
^^^^^^
|
||||
|
||||
There are no ready to use Python libraries available, but the third-party
|
||||
`websockets <https://github.com/python-websockets/websockets>`_ package
|
||||
allows to get started quickly.
|
||||
You may take inspiration from `zeek-client's implementation <https://github.com/zeek/zeek-client>`_
|
||||
or the `small helper library <https://raw.githubusercontent.com/zeek/zeek/refs/heads/master/testing/btest/Files/ws/wstest.py>`_ used by various of Zeek's own tests for the
|
||||
WebSocket API.
|
||||
Zeekctl similarly ships a `light implementation <https://github.com/zeek/zeekctl/blob/93459b37c3deab4bec9e886211672024fa3e4759/ZeekControl/events.py#L159>`_
|
||||
using the ``websockets`` library to implement its ``netstats`` and ``print`` commands.
|
||||
|
||||
|
||||
Outgoing Connections
|
||||
====================
|
||||
|
||||
For some deployment scenarios, Zeek only offering a WebSocket server can be cumbersome.
|
||||
Concretely, when multiple independent Zeek clusters interact with
|
||||
a single instance of a remote API. For instance, this could be needed for
|
||||
configuring a central firewall.
|
||||
In such scenarios, it is more natural for Zeek to connect out to the
|
||||
remote API, rather than the remote API connecting to the Zeek cluster.
|
||||
|
||||
For these use-cases, the current suggestion is to run a WebSocket bridge between
|
||||
a Zeek cluster and the remote API. One concrete tool that can be used
|
||||
for this purpose is `websocat`_.
|
||||
|
||||
.. note::
|
||||
|
||||
This topic has previously been discussed elsewhere. The following
|
||||
`GitHub issue <https://github.com/zeek/zeek/issues/3597>`_ and
|
||||
`discussion <https://github.com/zeek/zeek/discussions/4768>`_
|
||||
provide more background and details.
|
||||
|
||||
|
||||
Example Architecture
|
||||
--------------------
|
||||
|
||||
.. figure:: ../images/websocket-api/one-api-many-zeek.svg
|
||||
:width: 300
|
||||
|
||||
Multiple Zeek instances and a single remote API
|
||||
|
||||
The following proposal decouples the components using a WebSocket
|
||||
bridge for every Zeek cluster. This ensures that the depicted remote API
|
||||
does not need knowledge about an arbitrary number of Zeek clusters.
|
||||
|
||||
|
||||
.. figure:: ../images/websocket-api/one-api-many-zeek-ws-bridge.svg
|
||||
:width: 300
|
||||
|
||||
Multiple Zeek instances and a single remote API with WebSocket bridges.
|
||||
|
||||
Example Implementation
|
||||
----------------------
|
||||
|
||||
Assuming the depicted remote API provides a WebSocket server as well,
|
||||
it is possible to use ``websocat`` as the bridge directly.
|
||||
The crux for the remote API is that upon a new WebSocket client connection,
|
||||
the first message is the topic array that the remote API wishes to subscribe
|
||||
to on a Zeek cluster.
|
||||
|
||||
|
||||
Putting these pieces together, the following JavaScript script presents the
|
||||
remote API, implemented using the `ws library <https://github.com/websockets/ws?tab=readme-ov-file>`_.
|
||||
It accepts WebSocket clients on port 8080 and sends the topic array as the first message
|
||||
containing just ``zeek.bridge.test``. Thereafter, it simply echos all incoming
|
||||
WebSocket messages.
|
||||
|
||||
.. literalinclude:: websocket-api/server.js
|
||||
:caption: server.js
|
||||
:language: javascript
|
||||
|
||||
The Zeek side starts a WebSocket server on port 8000 and regularly publishes
|
||||
a ``hello`` event to the ``zeek.bridge.test`` topic.
|
||||
|
||||
.. literalinclude:: websocket-api/server.zeek
|
||||
:caption: server.zeek
|
||||
:language: zeek
|
||||
|
||||
These two servers can now be connected by running ``websocat`` as follows:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# In terminal 1 (use node if your Zeek has no JavaScript support)
|
||||
$ zeek server.js
|
||||
|
||||
# In terminal 2
|
||||
$ zeek server.zeek
|
||||
|
||||
# In terminal 3
|
||||
$ while true; do websocat --text -H='X-Application-Name: client1' ws://localhost:8000/v1/messages/json ws://localhost:8080 || sleep 0.1 ; done
|
||||
|
||||
|
||||
The first few lines of output in terminal 1 should then look as follows:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
# zeek server.js
|
||||
client1: connected, sending topics array ["zeek.bridge.test"]
|
||||
client1: received: {"type":"ack","endpoint":"9089e06b-8d33-5585-ad79-4f7f6348754e-websocket-135","version":"8.1.0-dev.91"}
|
||||
client1: received: {"type":"data-message","topic":"zeek.bridge.test","@data-type":"vector","data":[{"@data-type":"count","data":1},{"@data-type":"count","data":1},{"@data-type":"vector","data":[{"@data-type":"string","data":"hello"},{"@data-type":"vector","data":[{"@data-type":"count","data":1792}]},{"@data-type":"vector","data":[]}]}]}
|
||||
...
|
||||
|
||||
If you require synchronization between the Zeek instance and the remote API, this
|
||||
is best achieved with events once the connection between the remote API and the
|
||||
Zeek cluster is established.
|
||||
|
||||
Alternative Approaches
|
||||
----------------------
|
||||
|
||||
Since v21, Node.js contains a built-in `WebSocket client <https://nodejs.org/en/learn/getting-started/websocket>`_,
|
||||
making it possible to use vanilla :ref:`javascript` within
|
||||
Zeek to establish outgoing WebSocket connections, too.
|
||||
|
||||
The ``websocat`` tool provides more flexibility, potentially allowing
|
||||
to forward WebSocket messages to external commands which in turn could
|
||||
use HTTP POST requests to an external API.
|
23
doc/devel/websocket-api/server.js
Normal file
23
doc/devel/websocket-api/server.js
Normal file
|
@ -0,0 +1,23 @@
|
|||
// server.js
|
||||
import WebSocket, { WebSocketServer } from 'ws';
|
||||
|
||||
const wss = new WebSocketServer({ port: 8080 });
|
||||
|
||||
wss.on('connection', (ws, req) => {
|
||||
ws.on('error', console.error);
|
||||
ws.on('close', () => { console.log('%s: gone', ws.zeek.app); });
|
||||
|
||||
ws.on('message', function message(data) {
|
||||
console.log('%s: received: %s', ws.zeek.app, data);
|
||||
});
|
||||
|
||||
let topics = ['zeek.bridge.test'];
|
||||
let app = req.headers['x-application-name'] || '<unknown application>'
|
||||
ws.zeek = {
|
||||
app: app,
|
||||
topics: topics,
|
||||
};
|
||||
|
||||
console.log(`${app}: connected, sending topics array ${JSON.stringify(topics)}`);
|
||||
ws.send(JSON.stringify(topics));
|
||||
});
|
15
doc/devel/websocket-api/server.zeek
Normal file
15
doc/devel/websocket-api/server.zeek
Normal file
|
@ -0,0 +1,15 @@
|
|||
global hello: event(c : count);
|
||||
|
||||
global c = 0;
|
||||
|
||||
event tick()
|
||||
{
|
||||
Cluster::publish("zeek.bridge.test", hello, ++c);
|
||||
schedule 1.0sec { tick() };
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Cluster::listen_websocket([$listen_addr=127.0.0.1, $listen_port=8000/tcp]);
|
||||
event tick();
|
||||
}
|
41
doc/ext/literal-emph.py
Normal file
41
doc/ext/literal-emph.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
import re
|
||||
|
||||
import sphinx
|
||||
from docutils import nodes
|
||||
|
||||
# This extension adds a 'literal-emph' directive that operates the same
|
||||
# as the 'code-block' directive except that it additionally understands
|
||||
# the **strong emphasis** markup, allowing custom rendering of it to be
|
||||
# substituted in the final literal block (e.g. HTML adds <strong> elements).
|
||||
# Adding " (no-emph)" to the end of a line within the 'literal-emph' content
|
||||
# disables substitutions for that line.
|
||||
|
||||
|
||||
class LiteralEmphNode(nodes.General, nodes.Element):
|
||||
pass
|
||||
|
||||
|
||||
class LiteralEmph(sphinx.directives.code.CodeBlock):
|
||||
def run(self):
|
||||
node = LiteralEmphNode()
|
||||
node += super().run()
|
||||
return [node]
|
||||
|
||||
|
||||
def visit_litemph_node(self, node):
|
||||
pass
|
||||
|
||||
|
||||
def depart_litemph_node(self, node):
|
||||
text = self.body[-1]
|
||||
text = re.sub(r"\*\*(.*?)\*\*(?!.* \(no-emph\)\n)", r"<strong>\1</strong>", text)
|
||||
text = re.sub(r"(.*) \(no-emph\)\n", r"\1\n", text)
|
||||
self.body[-1] = text
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_directive("literal-emph", LiteralEmph)
|
||||
app.add_node(LiteralEmphNode, html=(visit_litemph_node, depart_litemph_node))
|
||||
return {
|
||||
"parallel_read_safe": True,
|
||||
}
|
391
doc/ext/spicy-pygments.py
Normal file
391
doc/ext/spicy-pygments.py
Normal file
|
@ -0,0 +1,391 @@
|
|||
# Copyright (c) 2020-now by the Zeek Project. See LICENSE for details.
|
||||
|
||||
from pygments.lexer import RegexLexer, bygroups, include, words
|
||||
from pygments.token import (
|
||||
Comment,
|
||||
Keyword,
|
||||
Name,
|
||||
Number,
|
||||
Operator,
|
||||
Punctuation,
|
||||
String,
|
||||
Text,
|
||||
)
|
||||
from sphinx.highlighting import lexers
|
||||
|
||||
|
||||
def setup(app):
|
||||
lexers["spicy"] = SpicyLexer()
|
||||
lexers["spicy-evt"] = SpicyEvtLexer()
|
||||
return {
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
|
||||
|
||||
class SpicyLexer(RegexLexer):
|
||||
"""
|
||||
For `Spicy <https://github.com/zeek/spicy>`_ grammars.
|
||||
"""
|
||||
|
||||
name = "Spicy"
|
||||
aliases = ["spicy"]
|
||||
filenames = ["*.spicy"]
|
||||
|
||||
_hex = r"[0-9a-fA-F]"
|
||||
_float = r"((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?"
|
||||
_h = r"[A-Za-z0-9][-A-Za-z0-9]*"
|
||||
_id = r"[a-zA-Z_][a-zA-Z_0-9]*"
|
||||
|
||||
tokens = {
|
||||
"root": [
|
||||
include("whitespace"),
|
||||
include("comments"),
|
||||
include("directives"),
|
||||
include("attributes"),
|
||||
include("hooks"),
|
||||
include("properties"),
|
||||
include("types"),
|
||||
include("modules"),
|
||||
include("keywords"),
|
||||
include("literals"),
|
||||
include("operators"),
|
||||
include("punctuation"),
|
||||
include("function-call"),
|
||||
include("identifiers"),
|
||||
],
|
||||
"whitespace": [
|
||||
(r"\n", Text),
|
||||
(r"\s+", Text),
|
||||
(r"\\\n", Text),
|
||||
],
|
||||
"comments": [
|
||||
(r"#.*$", Comment),
|
||||
],
|
||||
"directives": [(r"(@(if|else|endif))\b", Comment.Preproc)],
|
||||
"attributes": [
|
||||
(
|
||||
words(
|
||||
(
|
||||
"bit-order",
|
||||
"byte-order",
|
||||
"chunked",
|
||||
"convert",
|
||||
"count",
|
||||
"cxxname",
|
||||
"default",
|
||||
"eod",
|
||||
"internal",
|
||||
"ipv4",
|
||||
"ipv6",
|
||||
"length",
|
||||
"max-size",
|
||||
"no-emit",
|
||||
"nosub",
|
||||
"on-heap",
|
||||
"optional",
|
||||
"originator",
|
||||
"parse-at",
|
||||
"parse-from",
|
||||
"priority",
|
||||
"requires",
|
||||
"responder",
|
||||
"size",
|
||||
"static",
|
||||
"synchronize",
|
||||
"transient",
|
||||
"try",
|
||||
"type",
|
||||
"until",
|
||||
"until-including",
|
||||
"while",
|
||||
"have_prototype",
|
||||
),
|
||||
prefix=r"&",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword.Pseudo,
|
||||
),
|
||||
],
|
||||
"hooks": [
|
||||
(
|
||||
rf"(on)(\s+)(({_id}::)+%?{_id}(\.{_id})*)",
|
||||
bygroups(Keyword, Text, Name.Function),
|
||||
),
|
||||
(rf"(on)(\s+)(%?{_id}(\.{_id})*)", bygroups(Keyword, Text, Name.Function)),
|
||||
],
|
||||
"properties": [
|
||||
# Like an ID, but allow hyphenation ('-')
|
||||
(r"%[a-zA-Z_][a-zA-Z_0-9-]*", Name.Attribute),
|
||||
],
|
||||
"types": [
|
||||
(
|
||||
words(
|
||||
(
|
||||
"any",
|
||||
"addr",
|
||||
"bitfield",
|
||||
"bool",
|
||||
"bytes",
|
||||
"__library_type",
|
||||
"iterator",
|
||||
"const_iterator",
|
||||
"int8",
|
||||
"int16",
|
||||
"int32",
|
||||
"int64",
|
||||
"uint8",
|
||||
"uint16",
|
||||
"uint32",
|
||||
"uint64",
|
||||
"enum",
|
||||
"interval",
|
||||
"interval_ns",
|
||||
"list",
|
||||
"map",
|
||||
"optional",
|
||||
"port",
|
||||
"real",
|
||||
"regexp",
|
||||
"set",
|
||||
"sink",
|
||||
"stream",
|
||||
"view",
|
||||
"string",
|
||||
"time",
|
||||
"time_ns",
|
||||
"tuple",
|
||||
"unit",
|
||||
"vector",
|
||||
"void",
|
||||
"function",
|
||||
"struct",
|
||||
),
|
||||
prefix=r"\b",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword.Type,
|
||||
),
|
||||
(
|
||||
rf"\b(type)(\s+)((?:{_id})(?:::(?:{_id}))*)\b",
|
||||
bygroups(Keyword, Text, Name.Class),
|
||||
),
|
||||
],
|
||||
"modules": [
|
||||
(
|
||||
rf"\b(import)(\s+)({_id})(\s+)(from)(\s+)(\S+)\b",
|
||||
bygroups(
|
||||
Keyword.Namespace,
|
||||
Text,
|
||||
Name.Namespace,
|
||||
Text,
|
||||
Keyword.Namespace,
|
||||
Text,
|
||||
Name.Namespace,
|
||||
),
|
||||
),
|
||||
(
|
||||
rf"\b(module|import)(\s+)({_id})\b",
|
||||
bygroups(Keyword.Namespace, Text, Name.Namespace),
|
||||
),
|
||||
],
|
||||
"keywords": [
|
||||
(
|
||||
words(
|
||||
("global", "const", "local", "var", "public", "private", "inout"),
|
||||
prefix=r"\b",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword.Declaration,
|
||||
),
|
||||
(
|
||||
words(
|
||||
(
|
||||
"print",
|
||||
"add",
|
||||
"delete",
|
||||
"stop",
|
||||
"unset",
|
||||
"assert",
|
||||
"assert-exception",
|
||||
"new",
|
||||
"cast",
|
||||
"begin",
|
||||
"end",
|
||||
"type",
|
||||
"attribute",
|
||||
"on",
|
||||
"priority",
|
||||
"if",
|
||||
"else",
|
||||
"switch",
|
||||
"case",
|
||||
"default",
|
||||
"try",
|
||||
"catch",
|
||||
"break",
|
||||
"return",
|
||||
"continue",
|
||||
"while",
|
||||
"for",
|
||||
"foreach",
|
||||
"module",
|
||||
"import",
|
||||
"export",
|
||||
"from",
|
||||
),
|
||||
prefix=r"\b",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword,
|
||||
),
|
||||
],
|
||||
"literals": [
|
||||
(r'b?"', String, "string"),
|
||||
# Not the greatest match for patterns, but generally helps
|
||||
# disambiguate between start of a pattern and just a division
|
||||
# operator.
|
||||
(r"/(?=.*/)", String.Regex, "regex"),
|
||||
(r"\b(True|False|None|Null)\b", Keyword.Constant),
|
||||
# Port
|
||||
(r"\b\d{1,5}/(udp|tcp)\b", Number),
|
||||
# IPv4 Address
|
||||
(
|
||||
r"\b(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\b",
|
||||
Number,
|
||||
),
|
||||
# IPv6 Address (not 100% correct: that takes more effort)
|
||||
(
|
||||
r"\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2}))?\]",
|
||||
Number,
|
||||
),
|
||||
# Numeric
|
||||
(rf"\b0[xX]{_hex}+\b", Number.Hex),
|
||||
(rf"\b{_float}\b", Number.Float),
|
||||
(r"\b(\d+)\b", Number.Integer),
|
||||
],
|
||||
"operators": [
|
||||
(r"[$][$]", Name.Builtin.Pseudo), # just-parsed-element
|
||||
(r"[$]\d+", Name.Builtin.Pseudo), # capture-group
|
||||
(r"\b(in)\b", Operator.Word),
|
||||
(r"[-+*=&|<>.]{2}", Operator),
|
||||
(r"[-+*/=!><]=", Operator),
|
||||
(r"[?][.]", Operator),
|
||||
(r"[.][?]", Operator),
|
||||
(r"[-][>]", Operator),
|
||||
(r"[!][<>]", Operator),
|
||||
(r"[!%*/+<=>~|&^-]", Operator),
|
||||
# Technically, colons are often used for punctuation/sepration.
|
||||
# E.g. field name/type separation.
|
||||
(r"[?:]", Operator),
|
||||
],
|
||||
"punctuation": [
|
||||
(r"[{}()\[\],;:.]", Punctuation),
|
||||
],
|
||||
"function-call": [
|
||||
(rf"\b((?:{_id})(?:::(?:{_id}))*)(?=\s*\()", Name.Function),
|
||||
],
|
||||
"identifiers": [
|
||||
(r"\b(self)\b", Name.Builtin.Pseudo),
|
||||
(r"([a-zA-Z_]\w*)(::)", bygroups(Name, Punctuation)),
|
||||
(r"[a-zA-Z_]\w*", Name),
|
||||
],
|
||||
"string": [
|
||||
(r"\\.", String.Escape),
|
||||
(r"%-?[0-9]*(\.[0-9]+)?[DTdxsefg]", String.Escape),
|
||||
(r'"', String, "#pop"),
|
||||
(r".", String),
|
||||
],
|
||||
"regex": [
|
||||
(r"\\.", String.Escape),
|
||||
(r"/", String.Regex, "#pop"),
|
||||
(r".", String.Regex),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class SpicyEvtLexer(RegexLexer):
|
||||
"""
|
||||
For `Spicy <https://github.com/zeek/spicy>`_ Zeek interface definitions.
|
||||
"""
|
||||
|
||||
name = "SpicyEvt"
|
||||
aliases = ["spicy-evt"]
|
||||
filenames = ["*.evt"]
|
||||
|
||||
_id = r"[a-zA-Z_][a-zA-Z_0-9]*"
|
||||
|
||||
tokens = {
|
||||
"root": [
|
||||
include("whitespace"),
|
||||
include("comments"),
|
||||
include("directives"),
|
||||
include("hooks"),
|
||||
include("modules"),
|
||||
include("keywords"),
|
||||
include("literals"),
|
||||
include("operators"),
|
||||
include("punctuation"),
|
||||
include("function-call"),
|
||||
include("identifiers"),
|
||||
],
|
||||
"whitespace": SpicyLexer.tokens["whitespace"],
|
||||
"comments": SpicyLexer.tokens["comments"],
|
||||
"directives": SpicyLexer.tokens["directives"],
|
||||
"hooks": SpicyLexer.tokens["hooks"],
|
||||
"modules": SpicyLexer.tokens["modules"],
|
||||
"keywords": [
|
||||
(
|
||||
rf"\b(analyzer|with|replaces)(\s+)({_id}(::{_id})*)",
|
||||
bygroups(Keyword, Text, Name.Class),
|
||||
),
|
||||
(
|
||||
words(("protocol", "packet", "file"), prefix=r"\b", suffix=r"\b"),
|
||||
Keyword.Type,
|
||||
),
|
||||
(
|
||||
words(
|
||||
("port", "event", "parse", "over", "mime-type"),
|
||||
prefix=r"\b",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword,
|
||||
),
|
||||
(words(("cast"), prefix=r"\b", suffix=r"\b"), Keyword),
|
||||
(
|
||||
words(
|
||||
(
|
||||
"if",
|
||||
"else",
|
||||
"switch",
|
||||
"case",
|
||||
"default",
|
||||
"try",
|
||||
"catch",
|
||||
"break",
|
||||
"return",
|
||||
"continue",
|
||||
"while",
|
||||
"for",
|
||||
"foreach",
|
||||
),
|
||||
prefix=r"\b",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword,
|
||||
),
|
||||
],
|
||||
"literals": SpicyLexer.tokens["literals"],
|
||||
"operators": SpicyLexer.tokens["operators"],
|
||||
"punctuation": SpicyLexer.tokens["punctuation"],
|
||||
"function-call": SpicyLexer.tokens["function-call"],
|
||||
"identifiers": [
|
||||
(r"\b(ZEEK_VERSION)\b", Name.Builtin),
|
||||
(r"\b(self)\b", Name.Builtin.Pseudo),
|
||||
(r"[$](conn|file|is_orig)", Name.Builtin.Pseudo),
|
||||
(r"([a-zA-Z_]\w*)(::)", bygroups(Name, Punctuation)),
|
||||
(r"[a-zA-Z_]\w*", Name),
|
||||
],
|
||||
"string": SpicyLexer.tokens["string"],
|
||||
"regex": SpicyLexer.tokens["regex"],
|
||||
}
|
597
doc/ext/zeek.py
Normal file
597
doc/ext/zeek.py
Normal file
|
@ -0,0 +1,597 @@
|
|||
"""
|
||||
The Zeek domain for Sphinx.
|
||||
"""
|
||||
|
||||
import collections
|
||||
|
||||
|
||||
def setup(Sphinx):
|
||||
Sphinx.add_domain(ZeekDomain)
|
||||
Sphinx.add_node(see)
|
||||
Sphinx.add_directive_to_domain("zeek", "see", SeeDirective)
|
||||
Sphinx.connect("object-description-transform", object_description_transform)
|
||||
Sphinx.connect("doctree-resolved", process_see_nodes)
|
||||
return {
|
||||
"parallel_read_safe": True,
|
||||
}
|
||||
|
||||
|
||||
from sphinx import addnodes, version_info
|
||||
from sphinx.directives import ObjectDescription
|
||||
from sphinx.domains import Domain, Index, ObjType
|
||||
from sphinx.locale import _
|
||||
from sphinx.roles import XRefRole
|
||||
from sphinx.util import docfields, logging
|
||||
from sphinx.util.nodes import make_refnode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst import Directive, directives
|
||||
|
||||
|
||||
class see(nodes.General, nodes.Element):
|
||||
refs = []
|
||||
|
||||
|
||||
class SeeDirective(Directive):
|
||||
has_content = True
|
||||
|
||||
def run(self):
|
||||
n = see("")
|
||||
n.refs = " ".join(self.content).split()
|
||||
return [n]
|
||||
|
||||
|
||||
# Wrapper for creating a tuple for index nodes, staying backwards
|
||||
# compatible to Sphinx < 1.4:
|
||||
def make_index_tuple(indextype, indexentry, targetname, targetname2):
|
||||
if version_info >= (1, 4, 0, "", 0):
|
||||
return (indextype, indexentry, targetname, targetname2, None)
|
||||
else:
|
||||
return (indextype, indexentry, targetname, targetname2)
|
||||
|
||||
|
||||
def object_description_transform(app, domain, objtype, contentnode):
|
||||
"""
|
||||
Add all collected record fields as a "Field" field to a ZeekType.
|
||||
"""
|
||||
if domain != "zeek" or objtype != "type":
|
||||
return
|
||||
|
||||
type_name = app.env.ref_context["zeek:type"]
|
||||
record_fields = app.env.domaindata["zeek"].get("fields", {}).get(type_name)
|
||||
|
||||
if not record_fields:
|
||||
return
|
||||
|
||||
field_list = contentnode[0]
|
||||
|
||||
name = nodes.field_name("", _("Fields"))
|
||||
body = nodes.field_body("")
|
||||
|
||||
for field_name, record_field in record_fields.items():
|
||||
body += record_field["idx"]
|
||||
body += record_field["signode"]
|
||||
|
||||
field_list.append(nodes.field("", name, body))
|
||||
|
||||
|
||||
def process_see_nodes(app, doctree, fromdocname):
|
||||
for node in doctree.traverse(see):
|
||||
content = []
|
||||
para = nodes.paragraph()
|
||||
para += nodes.Text("See also:", "See also:")
|
||||
for name in node.refs:
|
||||
join_str = " "
|
||||
if name != node.refs[0]:
|
||||
join_str = ", "
|
||||
link_txt = join_str + name
|
||||
if name not in app.env.domaindata["zeek"]["idtypes"]:
|
||||
# Just create the text and issue warning
|
||||
logger.warning(
|
||||
'%s: unknown target for ".. zeek:see:: %s"',
|
||||
fromdocname,
|
||||
name,
|
||||
location=node,
|
||||
)
|
||||
para += nodes.Text(link_txt, link_txt)
|
||||
else:
|
||||
# Create a reference
|
||||
typ = app.env.domaindata["zeek"]["idtypes"][name]
|
||||
todocname = app.env.domaindata["zeek"]["objects"][(typ, name)]
|
||||
|
||||
newnode = nodes.reference("", "")
|
||||
innernode = nodes.literal(_(name), _(name), classes=["xref"])
|
||||
newnode["refdocname"] = todocname
|
||||
newnode["refuri"] = app.builder.get_relative_uri(fromdocname, todocname)
|
||||
newnode["refuri"] += "#" + typ + "-" + name
|
||||
newnode.append(innernode)
|
||||
para += nodes.Text(join_str, join_str)
|
||||
para += newnode
|
||||
|
||||
content.append(para)
|
||||
node.replace_self(content)
|
||||
|
||||
|
||||
class ZeekGeneric(ObjectDescription):
|
||||
option_spec = {"source-code": directives.unchanged}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ObjectDescription, self).__init__(*args, **kwargs)
|
||||
options = args[2]
|
||||
self.code_url = None
|
||||
|
||||
if "source-code" in options and "zeek-code-url" in self.env.config:
|
||||
base_url = self.env.config["zeek-code-url"]
|
||||
path, start, end = options["source-code"].split()
|
||||
path_parts = path.split("/")
|
||||
file_name = path_parts[-1]
|
||||
|
||||
# Don't have anything to link to for BIFs
|
||||
if not file_name.endswith(".bif.zeek"):
|
||||
self.code_url = f"{base_url}/scripts/{path}#L{start}-L{end}"
|
||||
|
||||
def get_obj_name(self):
|
||||
return self.objtype
|
||||
|
||||
def update_type_map(self, idname):
|
||||
if "idtypes" not in self.env.domaindata["zeek"]:
|
||||
self.env.domaindata["zeek"]["idtypes"] = {}
|
||||
self.env.domaindata["zeek"]["idtypes"][idname] = self.get_obj_name()
|
||||
|
||||
def process_signode(self, name, sig, signode, targetname):
|
||||
signode["names"].append(targetname)
|
||||
signode["ids"].append(targetname)
|
||||
signode["first"] = not self.names
|
||||
self.state.document.note_explicit_target(signode)
|
||||
|
||||
def add_target_and_index(self, name, sig, signode):
|
||||
targetname = self.get_obj_name() + "-" + name
|
||||
|
||||
if targetname not in self.state.document.ids:
|
||||
self.process_signode(name, sig, signode, targetname)
|
||||
|
||||
objects = self.env.domaindata["zeek"]["objects"]
|
||||
key = (self.get_obj_name(), name)
|
||||
|
||||
if (
|
||||
key in objects
|
||||
and self.get_obj_name() != "id"
|
||||
and self.get_obj_name() != "type"
|
||||
and self.get_obj_name() != "field"
|
||||
):
|
||||
logger.warning(
|
||||
"%s: duplicate description of %s %s, other instance in %s %s",
|
||||
self.env.docname,
|
||||
self.get_obj_name(),
|
||||
name,
|
||||
self.env.doc2path(objects[key]),
|
||||
self.lineno,
|
||||
)
|
||||
|
||||
objects[key] = self.env.docname
|
||||
self.update_type_map(name)
|
||||
|
||||
indextext = self.get_index_text(name)
|
||||
|
||||
if indextext:
|
||||
self.indexnode["entries"].append(
|
||||
make_index_tuple("single", indextext, targetname, targetname)
|
||||
)
|
||||
|
||||
def get_index_text(self, name):
|
||||
return _("%s (%s)") % (name, self.get_obj_name())
|
||||
|
||||
def handle_signature(self, sig, signode):
|
||||
if self.code_url:
|
||||
signode += nodes.reference(
|
||||
sig, sig, refuri=self.code_url, reftitle="View Source Code"
|
||||
)
|
||||
|
||||
# Could embed snippets directly, but would probably want to clean
|
||||
# up how it's done: don't use an external script, figure out why
|
||||
# tab/indentation is broken, toggle snippet visibility on mouse
|
||||
# hover or other explicit button/link, fix the colors/theming...
|
||||
# But for now, leaving this commented out as an example and quick
|
||||
# way of checking that the code ranges that Zeekygen outputs are
|
||||
# sensible.
|
||||
|
||||
# import urllib
|
||||
# snippet_target = urllib.parse.quote(self.code_url, '')
|
||||
# snippet_url = 'https://emgithub.com/embed.js'
|
||||
# snippet_url += f'?target={snippet_target}'
|
||||
# snippet_url += '&style=github'
|
||||
# snippet_url += '&showLineNumbers=on'
|
||||
# snippet_url += '&showBorder=on'
|
||||
# snippet_url += '&ts=4'
|
||||
# rawnode = nodes.raw('', f'<script src="{snippet_url}"></script>',
|
||||
# format='html')
|
||||
# signode += rawnode
|
||||
|
||||
else:
|
||||
signode += addnodes.desc_name("", sig)
|
||||
|
||||
return sig
|
||||
|
||||
|
||||
class ZeekNamespace(ZeekGeneric):
|
||||
def add_target_and_index(self, name, sig, signode):
|
||||
targetname = self.get_obj_name() + "-" + name
|
||||
|
||||
if targetname not in self.state.document.ids:
|
||||
signode["names"].append(targetname)
|
||||
signode["ids"].append(targetname)
|
||||
signode["first"] = not self.names
|
||||
self.state.document.note_explicit_target(signode)
|
||||
|
||||
objects = self.env.domaindata["zeek"]["objects"]
|
||||
key = (self.get_obj_name(), name)
|
||||
objects[key] = self.env.docname
|
||||
self.update_type_map(name)
|
||||
|
||||
indextext = self.get_index_text(name)
|
||||
self.indexnode["entries"].append(
|
||||
make_index_tuple("single", indextext, targetname, targetname)
|
||||
)
|
||||
self.indexnode["entries"].append(
|
||||
make_index_tuple("single", f"namespaces; {sig}", targetname, targetname)
|
||||
)
|
||||
|
||||
def get_index_text(self, name):
|
||||
return _("%s (namespace); %s") % (name, self.env.docname)
|
||||
|
||||
def handle_signature(self, sig, signode):
|
||||
signode += addnodes.desc_name("", sig)
|
||||
return sig
|
||||
|
||||
|
||||
class ZeekEnum(ZeekGeneric):
|
||||
def add_target_and_index(self, name, sig, signode):
|
||||
targetname = self.get_obj_name() + "-" + name
|
||||
|
||||
if targetname not in self.state.document.ids:
|
||||
self.process_signode(name, sig, signode, targetname)
|
||||
|
||||
objects = self.env.domaindata["zeek"]["objects"]
|
||||
key = (self.get_obj_name(), name)
|
||||
objects[key] = self.env.docname
|
||||
self.update_type_map(name)
|
||||
|
||||
# indextext = self.get_index_text(name)
|
||||
# self.indexnode['entries'].append(make_index_tuple('single', indextext,
|
||||
# targetname, targetname))
|
||||
m = sig.split()
|
||||
|
||||
if len(m) < 2:
|
||||
logger.warning(
|
||||
"%s: zeek:enum directive missing argument(s)", self.env.docname
|
||||
)
|
||||
return
|
||||
|
||||
if m[1] == "Notice::Type":
|
||||
if "notices" not in self.env.domaindata["zeek"]:
|
||||
self.env.domaindata["zeek"]["notices"] = []
|
||||
self.env.domaindata["zeek"]["notices"].append(
|
||||
(m[0], self.env.docname, targetname)
|
||||
)
|
||||
|
||||
self.indexnode["entries"].append(
|
||||
make_index_tuple(
|
||||
"single", f"{m[1]} (enum values); {m[0]}", targetname, targetname
|
||||
)
|
||||
)
|
||||
|
||||
def handle_signature(self, sig, signode):
|
||||
m = sig.split()
|
||||
name = m[0]
|
||||
signode += addnodes.desc_name("", name)
|
||||
return name
|
||||
|
||||
|
||||
class ZeekParamField(docfields.GroupedField):
|
||||
has_arg = True
|
||||
is_typed = True
|
||||
|
||||
|
||||
class ZeekIdentifier(ZeekGeneric):
|
||||
zeek_param_field = ZeekParamField("param", label="Parameters", can_collapse=True)
|
||||
field_type_map = {"param": (zeek_param_field, False)}
|
||||
|
||||
def get_index_text(self, name):
|
||||
return name
|
||||
|
||||
def get_field_type_map(self):
|
||||
return self.field_type_map
|
||||
|
||||
|
||||
class ZeekNative(ZeekGeneric):
|
||||
def handle_signature(self, sig, signode):
|
||||
# The run() method is overridden to drop signode anyway in favor of
|
||||
# simply adding the index and a target nodes and leaving up
|
||||
# to the .rst document to explicitly add things that need to
|
||||
# be presented in the final rendering (e.g. a section header)
|
||||
self.native_name = sig
|
||||
return sig
|
||||
|
||||
def process_signode(self, name, sig, signode, targetname):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
ns = super().run()
|
||||
index_node = ns[0]
|
||||
|
||||
target_id = self.get_obj_name() + "-" + self.native_name
|
||||
target_node = nodes.target("", "", ids=[target_id])
|
||||
self.state.document.note_explicit_target(target_node)
|
||||
|
||||
# Replace the description node from Sphinx with a simple target node
|
||||
return [index_node, target_node]
|
||||
|
||||
|
||||
class ZeekKeyword(ZeekNative):
|
||||
def get_index_text(self, name):
|
||||
if name and name[0] == "@":
|
||||
return _("%s (directive)") % (name)
|
||||
else:
|
||||
return _("%s (keyword)") % (name)
|
||||
|
||||
|
||||
class ZeekAttribute(ZeekNative):
|
||||
def get_index_text(self, name):
|
||||
return _("%s (attribute)") % (name)
|
||||
|
||||
|
||||
class ZeekType(ZeekGeneric):
|
||||
"""
|
||||
Put the type that's currently documented into env.ref_context
|
||||
for usage with the ZeekField directive.
|
||||
"""
|
||||
|
||||
def before_content(self):
|
||||
self.env.ref_context["zeek:type"] = self.arguments[0]
|
||||
|
||||
def after_content(self):
|
||||
self.env.ref_context.pop("zeek:type", None)
|
||||
|
||||
|
||||
class ZeekField(ZeekGeneric):
|
||||
def handle_signature(self, sig, signode):
|
||||
"""
|
||||
The signature for .. zeek:field: currently looks like the following:
|
||||
|
||||
.. zeek:field:: ts :zeek:type:`time` :zeek:attr:`&log` :zeek:attr:`&optional`
|
||||
"""
|
||||
parts = sig.split(" ", 2)
|
||||
name, type_str = parts[0:2]
|
||||
record_type = self.env.ref_context["zeek:type"]
|
||||
fullname = "$".join([record_type, name])
|
||||
attrs_str = ""
|
||||
if len(parts) == 3:
|
||||
attrs_str = parts[2]
|
||||
|
||||
type_nodes, _ = self.state.inline_text(type_str, -1)
|
||||
|
||||
signode += addnodes.desc_name(name, name)
|
||||
signode += addnodes.desc_sig_punctuation("", ":")
|
||||
signode += addnodes.desc_sig_space()
|
||||
signode += type_nodes
|
||||
|
||||
if attrs_str:
|
||||
attr_nodes, _ = self.state.inline_text(attrs_str, -1)
|
||||
signode += addnodes.desc_sig_space()
|
||||
signode += attr_nodes
|
||||
|
||||
signode["class"] = record_type
|
||||
signode["fullname"] = fullname
|
||||
|
||||
return fullname
|
||||
|
||||
def run(self):
|
||||
idx, signode = super().run()
|
||||
|
||||
record_type = self.env.ref_context["zeek:type"]
|
||||
|
||||
fields = self.env.domaindata["zeek"].setdefault("fields", {})
|
||||
rfields = fields.setdefault(record_type, collections.OrderedDict())
|
||||
rfields[signode[0]["fullname"]] = {
|
||||
"idx": idx,
|
||||
"signode": signode,
|
||||
}
|
||||
|
||||
return []
|
||||
|
||||
|
||||
class ZeekNativeType(ZeekNative):
|
||||
def get_obj_name(self):
|
||||
# As opposed to using 'native-type', just imitate 'type'.
|
||||
return "type"
|
||||
|
||||
|
||||
class ZeekFieldXRefRole(XRefRole):
|
||||
def process_link(self, env, refnode, has_explicit_title, title, target):
|
||||
title, target = super().process_link(
|
||||
env, refnode, has_explicit_title, title, target
|
||||
)
|
||||
|
||||
parts = title.split("$")
|
||||
if len(parts) == 2 and parts[0] and parts[1]:
|
||||
# If a field is in Type$field, form, strip Type.
|
||||
title = parts[1]
|
||||
|
||||
return title, target
|
||||
|
||||
|
||||
class ZeekNotices(Index):
|
||||
"""
|
||||
Index subclass to provide the Zeek notices index.
|
||||
"""
|
||||
|
||||
name = "noticeindex"
|
||||
localname = _("Zeek Notice Index")
|
||||
shortname = _("notices")
|
||||
|
||||
def generate(self, docnames=None):
|
||||
content = {}
|
||||
|
||||
if "notices" not in self.domain.env.domaindata["zeek"]:
|
||||
return content, False
|
||||
|
||||
for n in self.domain.env.domaindata["zeek"]["notices"]:
|
||||
modname = n[0].split("::")[0]
|
||||
entries = content.setdefault(modname, [])
|
||||
entries.append([n[0], 0, n[1], n[2], "", "", ""])
|
||||
|
||||
content = sorted(content.items())
|
||||
|
||||
return content, False
|
||||
|
||||
|
||||
class ZeekDomain(Domain):
|
||||
"""Zeek domain."""
|
||||
|
||||
name = "zeek"
|
||||
label = "Zeek"
|
||||
|
||||
object_types = {
|
||||
"type": ObjType(_("type"), "type"),
|
||||
"native-type": ObjType(_("type"), "type"),
|
||||
"namespace": ObjType(_("namespace"), "namespace"),
|
||||
"id": ObjType(_("id"), "id"),
|
||||
"keyword": ObjType(_("keyword"), "keyword"),
|
||||
"enum": ObjType(_("enum"), "enum"),
|
||||
"attr": ObjType(_("attr"), "attr"),
|
||||
"field": ObjType(_("field"), "field"),
|
||||
}
|
||||
|
||||
directives = {
|
||||
"type": ZeekType,
|
||||
"native-type": ZeekNativeType,
|
||||
"namespace": ZeekNamespace,
|
||||
"id": ZeekIdentifier,
|
||||
"keyword": ZeekKeyword,
|
||||
"enum": ZeekEnum,
|
||||
"attr": ZeekAttribute,
|
||||
"field": ZeekField,
|
||||
}
|
||||
|
||||
roles = {
|
||||
"type": XRefRole(),
|
||||
"namespace": XRefRole(),
|
||||
"id": XRefRole(),
|
||||
"keyword": XRefRole(),
|
||||
"enum": XRefRole(),
|
||||
"attr": XRefRole(),
|
||||
"see": XRefRole(),
|
||||
"field": ZeekFieldXRefRole(),
|
||||
}
|
||||
|
||||
indices = [
|
||||
ZeekNotices,
|
||||
]
|
||||
|
||||
initial_data = {
|
||||
"objects": {}, # fullname -> docname, objtype
|
||||
}
|
||||
|
||||
def clear_doc(self, docname):
|
||||
to_delete = []
|
||||
|
||||
for (typ, name), doc in self.data["objects"].items():
|
||||
if doc == docname:
|
||||
to_delete.append((typ, name))
|
||||
|
||||
for typ, name in to_delete:
|
||||
del self.data["objects"][typ, name]
|
||||
|
||||
def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode):
|
||||
objects = self.data["objects"]
|
||||
|
||||
if typ == "see":
|
||||
if target not in self.data["idtypes"]:
|
||||
logger.warning(
|
||||
'%s: unknown target for ":zeek:see:`%s`"', fromdocname, target
|
||||
)
|
||||
return []
|
||||
|
||||
objtype = self.data["idtypes"][target]
|
||||
return make_refnode(
|
||||
builder,
|
||||
fromdocname,
|
||||
objects[objtype, target],
|
||||
objtype + "-" + target,
|
||||
contnode,
|
||||
target + " " + objtype,
|
||||
)
|
||||
elif typ == "field" and "$" not in target:
|
||||
# :zeek:field:`x` without a record type ends up just x, no ref.
|
||||
return []
|
||||
else:
|
||||
objtypes = self.objtypes_for_role(typ)
|
||||
|
||||
for objtype in objtypes:
|
||||
if (objtype, target) in objects:
|
||||
return make_refnode(
|
||||
builder,
|
||||
fromdocname,
|
||||
objects[objtype, target],
|
||||
objtype + "-" + target,
|
||||
contnode,
|
||||
target + " " + objtype,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
'%s: unknown target for ":zeek:%s:`%s`"',
|
||||
fromdocname,
|
||||
typ,
|
||||
target,
|
||||
)
|
||||
|
||||
def get_objects(self):
|
||||
for (typ, name), docname in self.data["objects"].items():
|
||||
yield name, name, typ, docname, typ + "-" + name, 1
|
||||
|
||||
def merge_domaindata(self, docnames, otherdata):
|
||||
"""
|
||||
Merge domaindata in multiprocess mode.
|
||||
|
||||
I'm quite unclear how the objects dict works out okay in single
|
||||
process mode. For example, the file_entropy() event is defined
|
||||
in scripts/base/bif/plugins/Zeek_FileEntropy.events.bif.zeek.rst
|
||||
*and* in script-reference/autogenerated-file-analyzer-index.rst.
|
||||
The current documentation refers to the first one for :zeek:see:.
|
||||
It seems in single process mode the reading sorts filenames and
|
||||
just uses the last highest sorting one. That ends-up being the one
|
||||
in scripts/base.
|
||||
|
||||
In [4]: "script-reference/autogenerated" < "scripts/base"
|
||||
Out[4]: True
|
||||
|
||||
"""
|
||||
for target, data in otherdata.items():
|
||||
if target == "version":
|
||||
continue
|
||||
elif hasattr(data, "items"):
|
||||
target_data = self.env.domaindata["zeek"].setdefault(target, {})
|
||||
|
||||
# Iterate manually over the elements for debugging
|
||||
for k, v in data.items():
|
||||
if k not in target_data:
|
||||
target_data[k] = v
|
||||
else:
|
||||
# The > comparison below updates the objects domaindata
|
||||
# to filenames that sort higher. See comment above.
|
||||
if isinstance(v, str):
|
||||
if v > target_data[k]:
|
||||
target_data[k] = v
|
||||
else:
|
||||
# Otherwise assume it's a dict and we can merge
|
||||
# using update()
|
||||
target_data[k].update(v)
|
||||
|
||||
elif hasattr(data, "extend"):
|
||||
# notices are a list
|
||||
target_data = self.env.domaindata["zeek"].setdefault(target, [])
|
||||
target_data.extend(data)
|
||||
else:
|
||||
raise NotImplementedError(target, type(data))
|
247
doc/ext/zeek_pygments.py
Normal file
247
doc/ext/zeek_pygments.py
Normal file
|
@ -0,0 +1,247 @@
|
|||
from pygments.lexer import RegexLexer, bygroups, include, words
|
||||
from pygments.token import (
|
||||
Comment,
|
||||
Keyword,
|
||||
Literal,
|
||||
Name,
|
||||
Number,
|
||||
Operator,
|
||||
Punctuation,
|
||||
String,
|
||||
Text,
|
||||
)
|
||||
|
||||
|
||||
def setup(Sphinx):
|
||||
return {
|
||||
"parallel_read_safe": True,
|
||||
}
|
||||
|
||||
|
||||
class ZeekLexer(RegexLexer):
|
||||
"""
|
||||
For `Zeek <https://www.zeek.org/>`_ scripts.
|
||||
|
||||
.. versionadded:: 2.5
|
||||
"""
|
||||
|
||||
name = "Zeek"
|
||||
aliases = ["zeek"]
|
||||
filenames = ["*.zeek"]
|
||||
|
||||
_hex = r"[0-9a-fA-F]"
|
||||
_float = r"((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?"
|
||||
_h = r"[A-Za-z0-9][-A-Za-z0-9]*"
|
||||
|
||||
tokens = {
|
||||
"root": [
|
||||
include("whitespace"),
|
||||
include("comments"),
|
||||
include("directives"),
|
||||
include("attributes"),
|
||||
include("types"),
|
||||
include("keywords"),
|
||||
include("literals"),
|
||||
include("operators"),
|
||||
include("punctuation"),
|
||||
(
|
||||
r"\b((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(?=\s*\()",
|
||||
Name.Function,
|
||||
),
|
||||
include("identifiers"),
|
||||
],
|
||||
"whitespace": [
|
||||
(r"\n", Text),
|
||||
(r"\s+", Text),
|
||||
(r"\\\n", Text),
|
||||
],
|
||||
"comments": [
|
||||
(r"#.*$", Comment),
|
||||
],
|
||||
"directives": [
|
||||
(r"(@(load-plugin|load-sigs|load|unload))\b.*$", Comment.Preproc),
|
||||
(
|
||||
r"(@(DEBUG|DIR|FILENAME|deprecated|if|ifdef|ifndef|else|endif))\b",
|
||||
Comment.Preproc,
|
||||
),
|
||||
(r"(@prefixes)\s*(\+?=).*$", Comment.Preproc),
|
||||
],
|
||||
"attributes": [
|
||||
(
|
||||
words(
|
||||
(
|
||||
"redef",
|
||||
"priority",
|
||||
"log",
|
||||
"optional",
|
||||
"default",
|
||||
"add_func",
|
||||
"delete_func",
|
||||
"expire_func",
|
||||
"read_expire",
|
||||
"write_expire",
|
||||
"create_expire",
|
||||
"synchronized",
|
||||
"persistent",
|
||||
"rotate_interval",
|
||||
"rotate_size",
|
||||
"encrypt",
|
||||
"raw_output",
|
||||
"mergeable",
|
||||
"error_handler",
|
||||
"broker_allow_complex_type",
|
||||
"is_assigned",
|
||||
"is_used",
|
||||
"type_column",
|
||||
"deprecated",
|
||||
"on_change",
|
||||
"backend",
|
||||
"broker_store",
|
||||
),
|
||||
prefix=r"&",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword.Pseudo,
|
||||
),
|
||||
],
|
||||
"types": [
|
||||
(
|
||||
words(
|
||||
(
|
||||
"any",
|
||||
"enum",
|
||||
"record",
|
||||
"set",
|
||||
"table",
|
||||
"vector",
|
||||
"function",
|
||||
"hook",
|
||||
"event",
|
||||
"addr",
|
||||
"bool",
|
||||
"count",
|
||||
"double",
|
||||
"file",
|
||||
"int",
|
||||
"interval",
|
||||
"pattern",
|
||||
"port",
|
||||
"string",
|
||||
"subnet",
|
||||
"time",
|
||||
),
|
||||
prefix=r"\b",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword.Type,
|
||||
),
|
||||
(
|
||||
r"\b(opaque)(\s+)(of)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b",
|
||||
bygroups(Keyword.Type, Text, Operator.Word, Text, Keyword.Type),
|
||||
),
|
||||
(
|
||||
r"\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)(\s*)\b(record|enum)\b",
|
||||
bygroups(Keyword, Text, Name.Class, Text, Operator, Text, Keyword.Type),
|
||||
),
|
||||
(
|
||||
r"\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)",
|
||||
bygroups(Keyword, Text, Name, Text, Operator),
|
||||
),
|
||||
(
|
||||
r"\b(redef)(\s+)(record|enum)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b",
|
||||
bygroups(Keyword, Text, Keyword.Type, Text, Name.Class),
|
||||
),
|
||||
],
|
||||
"keywords": [
|
||||
(
|
||||
words(
|
||||
(
|
||||
"redef",
|
||||
"export",
|
||||
"if",
|
||||
"else",
|
||||
"for",
|
||||
"while",
|
||||
"return",
|
||||
"break",
|
||||
"next",
|
||||
"continue",
|
||||
"fallthrough",
|
||||
"switch",
|
||||
"default",
|
||||
"case",
|
||||
"add",
|
||||
"delete",
|
||||
"copy",
|
||||
"when",
|
||||
"timeout",
|
||||
"schedule",
|
||||
),
|
||||
prefix=r"\b",
|
||||
suffix=r"\b",
|
||||
),
|
||||
Keyword,
|
||||
),
|
||||
(r"\b(print)\b", Keyword),
|
||||
(r"\b(global|local|const|option)\b", Keyword.Declaration),
|
||||
(
|
||||
r"\b(module)(\s+)(([A-Za-z_][A-Za-z_0-9]*)(?:::([A-Za-z_][A-Za-z_0-9]*))*)\b",
|
||||
bygroups(Keyword.Namespace, Text, Name.Namespace),
|
||||
),
|
||||
],
|
||||
"literals": [
|
||||
(r'"', String, "string"),
|
||||
# Not the greatest match for patterns, but generally helps
|
||||
# disambiguate between start of a pattern and just a division
|
||||
# operator.
|
||||
(r"/(?=.*/)", String.Regex, "regex"),
|
||||
(r"\b(T|F)\b", Keyword.Constant),
|
||||
# Port
|
||||
(r"\b\d{1,5}/(udp|tcp|icmp|unknown)\b", Number),
|
||||
# IPv4 Address
|
||||
(
|
||||
r"\b(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\b",
|
||||
Number,
|
||||
),
|
||||
# IPv6 Address (not 100% correct: that takes more effort)
|
||||
(
|
||||
r"\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2}))?\]",
|
||||
Number,
|
||||
),
|
||||
# Numeric
|
||||
(r"\b0[xX]" + _hex + r"+\b", Number.Hex),
|
||||
(r"\b" + _float + r"\s*(day|hr|min|sec|msec|usec)s?\b", Literal.Date),
|
||||
(r"\b" + _float + r"\b", Number.Float),
|
||||
(r"\b(\d+)\b", Number.Integer),
|
||||
# Hostnames
|
||||
(_h + r"(\." + _h + r")+", String),
|
||||
],
|
||||
"operators": [
|
||||
(r"[!%*/+<=>~|&^-]", Operator),
|
||||
(r"([-+=&|]{2}|[+=!><-]=)", Operator),
|
||||
(r"\b(in|as|is|of)\b", Operator.Word),
|
||||
(r"\??\$", Operator),
|
||||
# Technically, colons are often used for punctuation/separation.
|
||||
# E.g. field name/type separation.
|
||||
(r"[?:]", Operator),
|
||||
],
|
||||
"punctuation": [
|
||||
(r"\?\$", Punctuation),
|
||||
(r"[{}()\[\],;:.]", Punctuation),
|
||||
],
|
||||
"identifiers": [
|
||||
(r"([a-zA-Z_]\w*)(::)", bygroups(Name, Punctuation)),
|
||||
(r"[a-zA-Z_]\w*", Name),
|
||||
],
|
||||
"string": [
|
||||
(r"\\.", String.Escape),
|
||||
(r"%-?[0-9]*(\.[0-9]+)?[DTdxsefg]", String.Escape),
|
||||
(r'"', String, "#pop"),
|
||||
(r".", String),
|
||||
],
|
||||
"regex": [
|
||||
(r"\\.", String.Escape),
|
||||
(r"/", String.Regex, "#pop"),
|
||||
(r".", String.Regex),
|
||||
],
|
||||
}
|
644
doc/frameworks/broker.rst
Normal file
644
doc/frameworks/broker.rst
Normal file
|
@ -0,0 +1,644 @@
|
|||
.. _CAF: https://github.com/actor-framework/actor-framework
|
||||
|
||||
.. _broker-framework:
|
||||
|
||||
==============================
|
||||
Broker Communication Framework
|
||||
==============================
|
||||
|
||||
.. rst-class:: opening
|
||||
|
||||
Zeek uses the `Broker Library
|
||||
<https://docs.zeek.org/projects/broker>`_ to exchange information with
|
||||
other Zeek processes. Broker itself uses CAF_ (C++ Actor Framework)
|
||||
internally for connecting nodes and exchanging arbitrary data over
|
||||
networks. Broker then introduces, on top of CAF, a topic-based
|
||||
publish/subscribe communication pattern using a data model that is
|
||||
compatible to Zeek's. Broker itself can be utilized outside the
|
||||
context of Zeek, with Zeek itself making use of only a few predefined
|
||||
Broker message formats that represent Zeek events, log entries, etc.
|
||||
|
||||
In summary, the Zeek's Broker framework provides basic facilities for
|
||||
connecting broker-enabled peers (e.g. Zeek instances) to each other
|
||||
and exchanging messages (e.g. events and logs).
|
||||
|
||||
Cluster Layout / API
|
||||
====================
|
||||
|
||||
Layout / Topology
|
||||
-----------------
|
||||
|
||||
In a Zeek cluster setup, every Zeek process is assigned a cluster role.
|
||||
Such a process is then called a Zeek node, a cluster node, or just named
|
||||
after the role of the process (the manager, the loggers, ...). A basic Zeek
|
||||
cluster uses four different node types, enumerated in the script-level
|
||||
variable :zeek:see:`Cluster::NodeType`.
|
||||
|
||||
- Manager
|
||||
- Logger
|
||||
- Worker
|
||||
- Proxy
|
||||
|
||||
In small Zeek deployments, all nodes may run on a single host. In large
|
||||
Zeek deployments, nodes may be distributed across multiple physical
|
||||
systems for scaling.
|
||||
|
||||
Currently, a single Manager node in a Zeek cluster exists. Further, connectivity
|
||||
between nodes is determined statically based on their type:
|
||||
|
||||
- Every node connects to all loggers and the manager.
|
||||
|
||||
- Each worker connects to all proxies.
|
||||
|
||||
|
||||
.. figure:: broker/cluster-layout.png
|
||||
|
||||
Some general suggestions as to the purpose/utilization of each node type:
|
||||
|
||||
- Workers: are a good first choice for doing the brunt of any work you need
|
||||
done. They should be spending a lot of time performing the actual job
|
||||
of parsing/analyzing incoming data from packets, so you might choose
|
||||
to look at them as doing a "first pass" analysis and then deciding how
|
||||
the results should be shared with other nodes in the cluster.
|
||||
|
||||
- Proxies: serve as intermediaries for data storage and work/calculation
|
||||
offloading. Good for helping offload work or data in a scalable and
|
||||
distributed way. Since any given worker is connected to all
|
||||
proxies and can agree on an "arbitrary key -> proxy node" mapping
|
||||
(more on that later), you can partition work or data amongst them in a
|
||||
uniform manner. e.g. you might choose to use proxies as a method of
|
||||
sharing non-persistent state or as a "second pass" analysis for any
|
||||
work that you don't want interfering with the workers' capacity to
|
||||
keep up with capturing and parsing packets. Note that the default scripts
|
||||
that come with Zeek make minimal use of proxies, so if you are coming
|
||||
from a previous ZeekControl deployment, you may want to try reducing down
|
||||
to a single proxy node. If you come to have custom/community scripts
|
||||
that utilize proxies, that would be the time to start considering scaling
|
||||
up the number of proxies to meet demands.
|
||||
|
||||
- Manager: this node will be good at performing decisions that require a
|
||||
global view of things since it is in a centralized location, connected
|
||||
to everything. However, that also makes it easy to overload, so try
|
||||
to use it sparingly and only for tasks that must be done in a
|
||||
centralized or authoritative location. Optionally, for some
|
||||
deployments, the Manager can also serve as the sole Logger.
|
||||
|
||||
- Loggers: these nodes should simply be spending their time writing out
|
||||
logs to disk and not used for much else. In the default cluster
|
||||
configuration, logs get distributed among available loggers in a
|
||||
round-robin fashion, providing failover capability should any given
|
||||
logger temporarily go offline.
|
||||
|
||||
Data Management/Sharing Strategies
|
||||
==================================
|
||||
|
||||
There's maybe no single, best approach or pattern to use when you need a
|
||||
Zeek script to store or share long-term state and data. The two
|
||||
approaches that were previously used were either using the ``&synchronized``
|
||||
attribute on tables/sets or by explicitly sending events to specific
|
||||
nodes on which you wanted data to be stored. The former is no longer
|
||||
possible, though there are several new possibilities that the new
|
||||
Broker/Cluster framework offer, namely distributed data store and data
|
||||
partitioning APIs.
|
||||
|
||||
Data Stores
|
||||
-----------
|
||||
|
||||
Broker provides a distributed key-value store interface with optional
|
||||
choice of using a persistent backend. For more detail, see
|
||||
:ref:`this example <data_store_example>`.
|
||||
|
||||
Some ideas/considerations/scenarios when deciding whether to use
|
||||
a data store for your use-case:
|
||||
|
||||
* If you need the full data set locally in order to achieve low-latency
|
||||
queries using data store "clones" can provide that.
|
||||
|
||||
* If you need data that persists across restarts of Zeek processes, then
|
||||
data stores can also provide that.
|
||||
|
||||
* If the data you want to store is complex (tables, sets, records) or
|
||||
you expect to read, modify, and store back, then data stores may not
|
||||
be able to provide simple, race-free methods of performing the pattern
|
||||
of logic that you want.
|
||||
|
||||
* If the data set you want to store is excessively large, that's still
|
||||
problematic even for stores that use a persistent backend as they are
|
||||
implemented in a way that requires a full snapshot of the store's
|
||||
contents to fit in memory (this limitation may change in the future).
|
||||
|
||||
Data Partitioning
|
||||
-----------------
|
||||
|
||||
New data partitioning strategies are available using the API in
|
||||
:doc:`/scripts/base/frameworks/cluster/pools.zeek`. Using that API, developers
|
||||
of custom Zeek scripts can define a custom pool of nodes that best fits the
|
||||
needs of their script.
|
||||
|
||||
One example strategy is to use Highest Random Weight (HRW) hashing to
|
||||
partition data tables amongst the pool of all proxy nodes. e.g. using
|
||||
:zeek:see:`Cluster::publish_hrw`. This could allow clusters to
|
||||
be scaled more easily than the approach of "the entire data set gets
|
||||
synchronized to all nodes" as the solution to memory limitations becomes
|
||||
"just add another proxy node". It may also take away some of the
|
||||
messaging load that used to be required to synchronize data sets across
|
||||
all nodes.
|
||||
|
||||
The tradeoff of this approach, is that nodes that leave the pool (due to
|
||||
crashing, etc.) cause a temporary gap in the total data set until
|
||||
workers start hashing keys to a new proxy node that is still alive,
|
||||
causing data to now be located and updated there.
|
||||
|
||||
If the developer of a script expects its workload to be particularly
|
||||
intensive, wants to ensure that their operations get exclusive
|
||||
access to nodes, or otherwise set constraints on the number of nodes within
|
||||
a pool utilized by their script, then the :zeek:see:`Cluster::PoolSpec`
|
||||
structure will allow them to do that while still allowing users of that script
|
||||
to override the default suggestions made by the original developer.
|
||||
|
||||
Broker Framework Examples
|
||||
=========================
|
||||
|
||||
The broker framework provides basic facilities for connecting Zeek instances
|
||||
to each other and exchanging messages, like events or logs.
|
||||
|
||||
See :doc:`/scripts/base/frameworks/broker/main.zeek` for an overview
|
||||
of the main Broker API.
|
||||
|
||||
.. _broker_topic_naming:
|
||||
|
||||
Topic Naming Conventions
|
||||
------------------------
|
||||
|
||||
All Broker-based messaging involves two components: the information you
|
||||
want to send (e.g. an event w/ its arguments) along with an associated
|
||||
topic name string. The topic strings are used as a filtering mechanism:
|
||||
Broker uses a publish/subscribe communication pattern where peers
|
||||
advertise interest in topic **prefixes** and only receive messages which
|
||||
match one of their prefix subscriptions.
|
||||
|
||||
Broker itself supports arbitrary topic strings, however Zeek generally
|
||||
follows certain conventions in choosing these topics to help avoid
|
||||
conflicts and generally make them easier to remember.
|
||||
|
||||
As a reminder of how topic subscriptions work, subscribers advertise
|
||||
interest in a topic **prefix** and then receive any messages published by a
|
||||
peer to a topic name that starts with that prefix. E.g. Alice
|
||||
subscribes to the "alice/dogs" prefix, then would receive the following
|
||||
message topics published by Bob:
|
||||
|
||||
- topic "alice/dogs/corgi"
|
||||
- topic "alice/dogs"
|
||||
- topic "alice/dogsarecool/oratleastilikethem"
|
||||
|
||||
Alice would **not** receive the following message topics published by Bob:
|
||||
|
||||
- topic "alice/cats/siamese"
|
||||
- topic "alice/cats"
|
||||
- topic "alice/dog"
|
||||
- topic "alice"
|
||||
|
||||
Note that the topics aren't required to form a slash-delimited hierarchy,
|
||||
the subscription matching is purely a byte-per-byte prefix comparison.
|
||||
|
||||
However, Zeek scripts generally will follow a topic naming hierarchy and
|
||||
any given script will make the topic names it uses apparent via some
|
||||
redef'able constant in its export section. Generally topics that Zeek
|
||||
scripts use will be along the lines of :samp:`zeek/{<namespace>}/{<specifics>}`
|
||||
with :samp:`{<namespace>}` being the script's module name (in all-undercase).
|
||||
For example, you might expect an imaginary ``Pretend`` framework to
|
||||
publish/subscribe using topic names like ``zeek/pretend/my_cool_event``.
|
||||
For scripts that use Broker as a means of cluster-aware analysis,
|
||||
it's usually sufficient for them to make use of the topics declared
|
||||
by the cluster framework. For scripts that are meant to establish
|
||||
communication flows unrelated to Zeek cluster, new topics are declared
|
||||
(examples being the NetControl and Control frameworks).
|
||||
|
||||
For cluster operation, see :doc:`/scripts/base/frameworks/cluster/main.zeek`
|
||||
for a list of topics that are useful for steering published events to
|
||||
the various node classes. E.g. you have the ability to broadcast
|
||||
to all nodes of a given class (e.g. just workers) or just send to a
|
||||
specific node within a class.
|
||||
|
||||
The topic names that logs get published under are a bit nuanced. In the
|
||||
default cluster configuration, they are round-robin published to
|
||||
explicit topic names that identify a single logger. In standalone Zeek
|
||||
processes, logs get published to the topic indicated by
|
||||
:zeek:see:`Broker::default_log_topic_prefix`.
|
||||
|
||||
For those writing their own scripts which need new topic names, a
|
||||
suggestion would be to avoid prefixing any new topics/prefixes with
|
||||
``zeek/`` as any changes in scripts shipping with Zeek will use that prefix
|
||||
and it's better to not risk unintended conflicts. Again, it's
|
||||
often less confusing to just re-use existing topic names instead
|
||||
of introducing new topic names. The typical use case is writing
|
||||
a cluster-enabled script, which usually just needs to route events
|
||||
based upon node classes, and that already has usable topics in the
|
||||
cluster framework.
|
||||
|
||||
Connecting to Peers
|
||||
-------------------
|
||||
|
||||
Zeek can accept incoming connections by calling :zeek:see:`Broker::listen`.
|
||||
|
||||
.. literalinclude:: broker/connecting-listener.zeek
|
||||
:caption: connecting-listener.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
Zeek can initiate outgoing connections by calling :zeek:see:`Broker::peer`.
|
||||
|
||||
.. literalinclude:: broker/connecting-connector.zeek
|
||||
:caption: connecting-connector.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
In either case, connection status updates are monitored via the
|
||||
:zeek:see:`Broker::peer_added` and :zeek:see:`Broker::peer_lost` events.
|
||||
|
||||
Remote Events
|
||||
-------------
|
||||
|
||||
To receive remote events, you need to first subscribe to a "topic" to which
|
||||
the events are being sent. A topic is just a string chosen by the sender,
|
||||
and named in a way that helps organize events into various categories.
|
||||
See the :ref:`topic naming conventions section <broker_topic_naming>` for
|
||||
more on how topics work and are chosen.
|
||||
|
||||
Use the :zeek:see:`Broker::subscribe` function to subscribe to topics and
|
||||
define any event handlers for events that peers will send.
|
||||
|
||||
.. literalinclude:: broker/events-listener.zeek
|
||||
:caption: events-listener.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
To send an event, call the :zeek:see:`Broker::publish` function which you can
|
||||
supply directly with the event and its arguments or give it the return value of
|
||||
:zeek:see:`Broker::make_event` in case you need to send the same event/args
|
||||
multiple times. When publishing events like this, local event handlers for
|
||||
the event are not called, even if a matching subscription exists.
|
||||
|
||||
.. literalinclude:: broker/events-connector.zeek
|
||||
:caption: events-connector.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
Note that the subscription model is prefix-based, meaning that if you subscribe
|
||||
to the ``zeek/events`` topic prefix you would receive events that are published
|
||||
to topic names ``zeek/events/foo`` and ``zeek/events/bar`` but not
|
||||
``zeek/misc``.
|
||||
|
||||
.. note::
|
||||
|
||||
In prior Zeek versions, ``Broker::auto_publish`` was available to automatically
|
||||
send events to peers whenever the events were called locally via the normal
|
||||
event invocation syntax. When auto-publishing events, local event handlers for
|
||||
the event were called in addition to sending the event to any subscribed peers.
|
||||
|
||||
``Broker::auto_publish`` was removed due to its
|
||||
`implicit nature <https://github.com/zeek/zeek/discussions/3637>`_.
|
||||
|
||||
|
||||
Remote Logging
|
||||
--------------
|
||||
|
||||
.. literalinclude:: broker/testlog.zeek
|
||||
:caption: testlog.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
To toggle remote logs, redef :zeek:see:`Log::enable_remote_logging`.
|
||||
Use the :zeek:see:`Broker::subscribe` function to advertise interest
|
||||
in logs written by peers. The topic names that Zeek uses are determined by
|
||||
:zeek:see:`Broker::log_topic`.
|
||||
|
||||
.. literalinclude:: broker/logs-listener.zeek
|
||||
:caption: logs-listener.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
.. literalinclude:: broker/logs-connector.zeek
|
||||
:caption: logs-connector.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
Note that logging events are only raised locally on the node that performs
|
||||
the :zeek:see:`Log::write` and not automatically published to peers.
|
||||
|
||||
.. _data_store_example:
|
||||
|
||||
Distributed Data Stores
|
||||
-----------------------
|
||||
|
||||
See :doc:`/scripts/base/frameworks/broker/store.zeek` for an overview
|
||||
of the Broker data store API.
|
||||
|
||||
There are two flavors of key-value data store interfaces: master and clone.
|
||||
|
||||
A master data store can be cloned from remote peers which may then
|
||||
perform lightweight, local queries against the clone, which
|
||||
automatically stays synchronized with the master store. Clones cannot
|
||||
modify their content directly, instead they send modifications to the
|
||||
centralized master store which applies them and then broadcasts them to
|
||||
all clones.
|
||||
|
||||
Master stores get to choose what type of storage backend to
|
||||
use. E.g. In-memory versus SQLite for persistence.
|
||||
|
||||
Data stores also support expiration on a per-key basis using an amount of
|
||||
time relative to the entry's last modification time.
|
||||
|
||||
.. literalinclude:: broker/stores-listener.zeek
|
||||
:caption: stores-listener.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
.. literalinclude:: broker/stores-connector.zeek
|
||||
:caption: stores-connector.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
Note that all data store queries must be made within Zeek's asynchronous
|
||||
``when`` statements and must specify a timeout block.
|
||||
|
||||
|
||||
SQLite Data Store Tuning
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
When leveraging the SQLite backend for persistence, SQLite's default journaling
|
||||
and consistency settings are used. Concretely, ``journal_mode`` is set to
|
||||
``DELETE`` and ``synchronous`` to ``FULL``. This in turn is not optimal for
|
||||
`high INSERT or UPDATE rates <https://www.sqlite.org/faq.html#q19>`_
|
||||
due to SQLite waiting for the required IO to complete until data is safely
|
||||
on disk. This can also have a non-negligible system effect when the
|
||||
SQLite database is located on the same device as other IO critical processes.
|
||||
|
||||
Starting with Zeek 5.2, it is possible to tune and relax these settings by
|
||||
providing an appropriate :zeek:see:`Broker::BackendOptions` and
|
||||
:zeek:see:`Broker::SQLiteOptions` instance to
|
||||
:zeek:see:`Broker::create_master`. The following example changes the
|
||||
data store to use `Write-Ahead Logging <https://www.sqlite.org/wal.html>`_
|
||||
which should perform significantly faster than the default.
|
||||
|
||||
|
||||
.. literalinclude:: broker/store-sqlite-tuning.zeek
|
||||
:caption: store-sqlite-tuning.zeek
|
||||
:language: zeek
|
||||
:linenos:
|
||||
:tab-width: 4
|
||||
|
||||
If your use-case turns out to require more and lower-level tuning around
|
||||
SQLite options, please get in contact or open a feature request on GitHub.
|
||||
|
||||
|
||||
Cluster Framework Examples
|
||||
==========================
|
||||
|
||||
This section contains a few brief examples of how various communication
|
||||
patterns one might use when developing Zeek scripts that are to operate in
|
||||
the context of a cluster.
|
||||
|
||||
.. _event-namespacing-pitfall:
|
||||
|
||||
A Reminder About Events and Module Namespaces
|
||||
---------------------------------------------
|
||||
|
||||
For simplicity, the following examples do not use any modules/namespaces.
|
||||
If you choose to use them within your own code, it's important to
|
||||
remember that the ``event`` and ``schedule`` dispatching statements
|
||||
should always use the fully-qualified event name.
|
||||
|
||||
For example, this will likely not work as expected:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
module MyModule;
|
||||
|
||||
export {
|
||||
global my_event: event();
|
||||
}
|
||||
|
||||
event my_event()
|
||||
{
|
||||
print "got my event";
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
event my_event();
|
||||
schedule 10sec { my_event() };
|
||||
}
|
||||
|
||||
This code runs without errors, however, the local ``my_event`` handler
|
||||
will never be called and also not any remote handlers either. Instead, at
|
||||
minimum you would need change the ``zeek_init()`` handler:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
event MyModule::my_event();
|
||||
schedule 10sec { MyModule::my_event() };
|
||||
}
|
||||
|
||||
Though, an easy rule of thumb to remember would be to always use the
|
||||
explicit module namespace scoping and you can't go wrong:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
module MyModule;
|
||||
|
||||
export {
|
||||
global MyModule::my_event: event();
|
||||
}
|
||||
|
||||
event MyModule::my_event()
|
||||
{
|
||||
print "got my event";
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
event MyModule::my_event();
|
||||
schedule 10sec { MyModule::my_event() };
|
||||
}
|
||||
|
||||
Event types that reside in the default namespace (such as :zeek:id:`zeek_init` or
|
||||
:zeek:id:`connection_established`) require no qualification, even when scheduled from
|
||||
inside a module. Don't force qualification of such events by prefixing with
|
||||
``GLOBAL::``.
|
||||
|
||||
Note that other identifiers in Zeek do not have this inconsistency
|
||||
related to module namespacing, it's just events that require
|
||||
explicitness.
|
||||
|
||||
Manager Sending Events To Workers
|
||||
---------------------------------
|
||||
|
||||
This is fairly straightforward, we just need a topic name which we know
|
||||
all workers are subscribed combined with the event we want to send them.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event manager_to_workers(s: string)
|
||||
{
|
||||
print "got event from manager", s;
|
||||
}
|
||||
|
||||
event some_event_handled_on_manager()
|
||||
{
|
||||
Broker::publish(Cluster::worker_topic, manager_to_workers,
|
||||
"hello v0");
|
||||
|
||||
# If you know this event is only handled on the manager, you don't
|
||||
# need any of the following conditions, they're just here as an
|
||||
# example of how you can further discriminate based on node identity.
|
||||
|
||||
# Can check based on the name of the node.
|
||||
if ( Cluster::node == "manager" )
|
||||
Broker::publish(Cluster::worker_topic, manager_to_workers,
|
||||
"hello v1");
|
||||
|
||||
# Can check based on the type of the node.
|
||||
if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||
Broker::publish(Cluster::worker_topic, manager_to_workers,
|
||||
"hello v2");
|
||||
|
||||
# The run-time overhead of the above conditions can even be
|
||||
# eliminated by using the following conditional directives.
|
||||
# It's evaluated once per node at parse-time and, if false,
|
||||
# any code within is just ignored / treated as not existing at all.
|
||||
@if ( Cluster::local_node_type() == Cluster::MANAGER )
|
||||
Broker::publish(Cluster::worker_topic, manager_to_workers,
|
||||
"hello v3");
|
||||
@endif
|
||||
}
|
||||
|
||||
Worker Sending Events To Manager
|
||||
--------------------------------
|
||||
|
||||
This should look almost identical to the previous case of sending an event
|
||||
from the manager to workers, except it simply changes the topic name to
|
||||
one which the manager is subscribed.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event worker_to_manager(worker_name: string)
|
||||
{
|
||||
print "got event from worker", worker_name;
|
||||
}
|
||||
|
||||
event some_event_handled_on_worker()
|
||||
{
|
||||
Broker::publish(Cluster::manager_topic, worker_to_manager,
|
||||
Cluster::node);
|
||||
}
|
||||
|
||||
Worker Sending Events To All Workers
|
||||
------------------------------------
|
||||
|
||||
Since workers are not directly connected to each other in the cluster
|
||||
topology, this type of communication is a bit different than what we
|
||||
did before since we have to manually relay the event via some node that *is*
|
||||
connected to all workers. The manager or a proxy satisfies that requirement:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event worker_to_workers(worker_name: string)
|
||||
{
|
||||
@if ( Cluster::local_node_type() == Cluster::MANAGER ||
|
||||
Cluster::local_node_type() == Cluster::PROXY )
|
||||
Broker::publish(Cluster::worker_topic, worker_to_workers,
|
||||
worker_name);
|
||||
@else
|
||||
print "got event from worker", worker_name;
|
||||
@endif
|
||||
}
|
||||
|
||||
event some_event_handled_on_worker()
|
||||
{
|
||||
# We know the manager is connected to all workers, so we could
|
||||
# choose to relay the event across it.
|
||||
Broker::publish(Cluster::manager_topic, worker_to_workers,
|
||||
Cluster::node + " (via manager)");
|
||||
|
||||
# We also know that any given proxy is connected to all workers,
|
||||
# though now we have a choice of which proxy to use. If we
|
||||
# want to distribute the work associated with relaying uniformly,
|
||||
# we can use a round-robin strategy. The key used here is simply
|
||||
# used by the cluster framework internally to keep track of
|
||||
# which node is up next in the round-robin.
|
||||
local pt = Cluster::rr_topic(Cluster::proxy_pool, "example_key");
|
||||
Broker::publish(pt, worker_to_workers,
|
||||
Cluster::node + " (via a proxy)");
|
||||
}
|
||||
|
||||
Worker Distributing Events Uniformly Across Proxies
|
||||
---------------------------------------------------
|
||||
|
||||
If you want to offload some data/work from a worker to your proxies,
|
||||
we can make use of a `Highest Random Weight (HRW) hashing
|
||||
<https://en.wikipedia.org/wiki/Rendezvous_hashing>`_ distribution strategy
|
||||
to uniformly map an arbitrary key space across all available proxies.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event worker_to_proxies(worker_name: string)
|
||||
{
|
||||
print "got event from worker", worker_name;
|
||||
}
|
||||
|
||||
global my_counter = 0;
|
||||
|
||||
event some_event_handled_on_worker()
|
||||
{
|
||||
# The key here is used to choose which proxy shall receive
|
||||
# the event. Different keys may map to different nodes, but
|
||||
# any given key always maps to the same node provided the
|
||||
# pool of nodes remains consistent. If a proxy goes offline,
|
||||
# that key maps to a different node until the original comes
|
||||
# back up.
|
||||
Cluster::publish_hrw(Cluster::proxy_pool,
|
||||
cat("example_key", ++my_counter),
|
||||
worker_to_proxies, Cluster::node);
|
||||
}
|
||||
|
||||
Broker-backed Zeek Tables for Data Synchronization and Persistence
|
||||
==================================================================
|
||||
|
||||
Starting with Zeek 3.2, it is possible to "bind" a Zeek table to a backing
|
||||
Broker store. Changes to the Zeek table are sent to the Broker store. Similarly,
|
||||
changes of the Broker store are applied to the Zeek table.
|
||||
|
||||
This feature allows easy distribution of table contents across a cluster.
|
||||
It also offers persistence for tables (when using a persistent Broker store
|
||||
backend like SQLite).
|
||||
|
||||
To give a short example, to distribute a table over a cluster you can use
|
||||
the :zeek:attr:`&backend` attribute.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
global t: table[string] of count &backend=Broker::MEMORY;
|
||||
|
||||
The :zeek:attr:`&backend` attribute creates a master data store on the
|
||||
manager and a clone data store on all other node on the cluster. This
|
||||
in essence means that the table exists twice in each Zeek process. One
|
||||
copy of the table is contained in a Broker data store (either a master
|
||||
or a clone depending on the node), which data store distributes the
|
||||
data across the cluster---and, depending on the backend, might also
|
||||
make the data persistent. Since Broker data stores are only accessible
|
||||
via asynchronous operations, and accessing them might not always be
|
||||
immediate, a second copy of the table, which is immediately
|
||||
accessible, is held inside the Zeek core. This is the copy that you
|
||||
see and interact with on the Zeek side.
|
BIN
doc/frameworks/broker/cluster-layout.png
Normal file
BIN
doc/frameworks/broker/cluster-layout.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 55 KiB |
2
doc/frameworks/broker/cluster-layout.xml
Normal file
2
doc/frameworks/broker/cluster-layout.xml
Normal file
|
@ -0,0 +1,2 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<mxfile userAgent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36" version="9.0.3-1" editor="www.draw.io" type="device"><diagram name="Page-1" id="42789a77-a242-8287-6e28-9cd8cfd52e62">7VxLc6M4EP41Po4LSUjAcZJJZg+7VVOVrd3ZowwKZgYjFyaxvb9+hZEwEhA/eITs2JdYrZYE0ve1ultyZuh+tfua0vXyDx6weAatYDdDX2ZQfDxL/Mkl+0Li2G4hCNMoKETgKHiK/mVSKNuFL1HANppixnmcRWtd6PMkYX6myWia8q2u9sxjfdQ1DVlN8OTTuC79OwqypZQCyzpW/MaicCmHdrGsWFD/Z5jyl0SON4Po+fApqldU9SX1N0sa8G1FhB5m6D7lPCu+rXb3LM7nVk1b0e6xpbZ87pQl2VkNGEXAcTwrgD71XPQJoKKLVxq/MPUOhyfN9mp2WCAmSxYTnog/d4dXZnmnQJSW2SqWX2O6YPFdOSv3PObpsdkmo2n2OV8wQ/YYxXkPlipLiGBRZkmgWvgx3Wwi/89llBQVshkoSpVGP1iW7WWZvmRciHiaLXnIExr/zvlattpkKf/J1FOK1fOIgz6TskahIdd95kn2SFdRnIP8L5YGNKFSLEcCUJYrHVqHj5An/KEyj4dH3kXZ9/yt51iW/pFzcHxWqVpfZ7n0G/6S+qxtcSWVaBqyrEXHK3TyNa50LNHzlfEVy9K9UEhZTLPoVScMlbwLSz3ZVKwY3VcU1jxKsk2l52+5QCgoE6KYJg0IcqCOY1MfQTFp1Ra2bVdbiC/FM6hS5WWOogM7zmWKfWPKL80UNAmmAHIZUwB0RmdKjSgrAYCQpTNIYjG7d4v8W5iVq1VlUByLTT9H+3YZZexpTQ9rthV+h06fI68OVFD7al7l81Xky4pTLGsANW0BtWBQRZOBADOnO9hpHIVJzliBVzFDbwD4laUZ270JPVWL9BVHyrpuj86NctmWFbdGQasJrBW4XIYG2GA2Cxhs1jRRQFinfLf/BJsQUkjEuFX9qQEncLyFZZ0DnOdnRnx/msCBUAcOwHXgOLgOHDwEcJo80zpwYh4Ky5LbnI+BE7Ig+CwDI4IIOFWcePoeZJN3hIlXg4mERIN7dlv7HjYXD7/b4t+CVQ1QXxzvrm3T6Qac0uGuuNvFg4sV+14tKEe87rT35JrDM1xzMIhrXvOlbYLmnmMjDCxgI9dBjsYED84RJB7GmCAHQ7334h1lh29Fth6YE9GLhRwxAlbOerkj2/Y8790Rr01sYBFjmGKaasO0Rxka9S5z/JsC4jbPDtw8u3e12kbUOKZjh29Ge1yjrTImNaNtDWW07enYaAKN/Agi1xlirLwN1RExOhrT1JIbh0biUMmVObIcjS9zizRSpjk52UimQ2ffWBqJpc8t+2Eqe2PYMKn8GjGQbTAM4OsYZqO+GdaSArWMiAVoh2SdE6DOjZwjRyU1plVoS1yDtfC8je56bl4VsgxzmlAnK3J1jlnOdWRFjt4RgEZHPZEV4mHJqixphZx63FBhYnu0ezU57lxs42ZyNMXcBdL7ctO8Oi7tcWBonu/a5lFDCwwvNvVQD5e9nsFDLrPsJp5OOeij4GxANE30dgFCRvRrGbdkTuirMHgo/d5tnNsNpsfNFI9q+Grb+phQhSNZQqLvo90tYYewEoFuQGmwZxXM1C3avJNjORGbNo17IMjM6J2yaeRCG9VR37BpdR4YUSS2+7WB9WPBpuT0lqc/i6PCD5qdXrRzwsxOuz6bana6TBKPcYAMG5BxC9ff4xDRmtumfzFooH5OEu0WlzeP4wwblt/uoU/nlGhOiJn5nmYObaSEN1Fucpnwdq/jKnb1jgA09rO+cmjuwDk0XOPmFTHCIKHk4EedgJwBzJFiSdfMqoGB8KTOUAfKqjmXmfoWhNXzpu8UabaA8PBI/WFsJOMHjPN0bOYrumLsVPx6Kv48s/5UPFp7z57jURWQdgX5W0dfo2TrhjSkw5xGDJM6s/pZT2M1p2WyejVYI0VW4NRRU0+b4qVnChem0zqp9x6dNd0/as2mfdy7nv+PbNqYv8ZoSrP+wmH7G1Z4oGTamCcfI13hhMZ9rauvcNrmTTUT8n1dMDN+EmPsBd19x/ovam8sGzU5dpELNUc5UrT8WemZX5ccO8e9Gomc5W1P5Wp4BqfOJWf5EwTl4pgd9UVO0is5RfH471oK9eP/xEEP/wE=</diagram></mxfile>
|
12
doc/frameworks/broker/connecting-connector.zeek
Normal file
12
doc/frameworks/broker/connecting-connector.zeek
Normal file
|
@ -0,0 +1,12 @@
|
|||
redef exit_only_after_terminate = T;
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Broker::peer("127.0.0.1");
|
||||
}
|
||||
|
||||
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer added", endpoint;
|
||||
terminate();
|
||||
}
|
17
doc/frameworks/broker/connecting-listener.zeek
Normal file
17
doc/frameworks/broker/connecting-listener.zeek
Normal file
|
@ -0,0 +1,17 @@
|
|||
redef exit_only_after_terminate = T;
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Broker::listen("127.0.0.1");
|
||||
}
|
||||
|
||||
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer added", endpoint;
|
||||
}
|
||||
|
||||
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer lost", endpoint;
|
||||
terminate();
|
||||
}
|
26
doc/frameworks/broker/events-connector.zeek
Normal file
26
doc/frameworks/broker/events-connector.zeek
Normal file
|
@ -0,0 +1,26 @@
|
|||
redef exit_only_after_terminate = T;
|
||||
global my_event: event(msg: string, c: count);
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Broker::peer("127.0.0.1");
|
||||
}
|
||||
|
||||
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer added", endpoint;
|
||||
Broker::publish("zeek/event/my_event", my_event, "hi", 0);
|
||||
Broker::publish("zeek/event/my_event", my_event, "...", 1);
|
||||
local e = Broker::make_event(my_event, "bye", 2);
|
||||
Broker::publish("zeek/event/my_event", e);
|
||||
}
|
||||
|
||||
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
terminate();
|
||||
}
|
||||
|
||||
event my_event(msg: string, c: count)
|
||||
{
|
||||
print "got my_event", msg, c;
|
||||
}
|
24
doc/frameworks/broker/events-listener.zeek
Normal file
24
doc/frameworks/broker/events-listener.zeek
Normal file
|
@ -0,0 +1,24 @@
|
|||
redef exit_only_after_terminate = T;
|
||||
global msg_count = 0;
|
||||
global my_event: event(msg: string, c: count);
|
||||
global my_auto_event: event(msg: string, c: count);
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Broker::subscribe("zeek/event/");
|
||||
Broker::listen("127.0.0.1");
|
||||
}
|
||||
|
||||
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer added", endpoint;
|
||||
}
|
||||
|
||||
event my_event(msg: string, c: count)
|
||||
{
|
||||
++msg_count;
|
||||
print "got my_event", msg, c;
|
||||
|
||||
if ( msg_count == 5 )
|
||||
terminate();
|
||||
}
|
36
doc/frameworks/broker/logs-connector.zeek
Normal file
36
doc/frameworks/broker/logs-connector.zeek
Normal file
|
@ -0,0 +1,36 @@
|
|||
@load ./testlog
|
||||
|
||||
redef exit_only_after_terminate = T;
|
||||
global n = 0;
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Broker::peer("127.0.0.1");
|
||||
}
|
||||
|
||||
event do_write()
|
||||
{
|
||||
if ( n == 6 )
|
||||
return;
|
||||
|
||||
Log::write(Test::LOG, [$msg = "ping", $num = n]);
|
||||
++n;
|
||||
event do_write();
|
||||
}
|
||||
|
||||
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer added", endpoint;
|
||||
event do_write();
|
||||
}
|
||||
|
||||
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
terminate();
|
||||
}
|
||||
|
||||
event Test::log_test(rec: Test::Info)
|
||||
{
|
||||
print "wrote log", rec;
|
||||
Broker::publish("zeek/logs/forward/test", Test::log_test, rec);
|
||||
}
|
22
doc/frameworks/broker/logs-listener.zeek
Normal file
22
doc/frameworks/broker/logs-listener.zeek
Normal file
|
@ -0,0 +1,22 @@
|
|||
@load ./testlog
|
||||
|
||||
redef exit_only_after_terminate = T;
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Broker::subscribe("zeek/logs");
|
||||
Broker::listen("127.0.0.1");
|
||||
}
|
||||
|
||||
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer added", endpoint;
|
||||
}
|
||||
|
||||
event Test::log_test(rec: Test::Info)
|
||||
{
|
||||
print "got log event", rec;
|
||||
|
||||
if ( rec$num == 5 )
|
||||
terminate();
|
||||
}
|
19
doc/frameworks/broker/store-sqlite-tuning.zeek
Normal file
19
doc/frameworks/broker/store-sqlite-tuning.zeek
Normal file
|
@ -0,0 +1,19 @@
|
|||
global h: opaque of Broker::Store;
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
# Use WAL mode.
|
||||
local sqlite_options=Broker::SQLiteOptions(
|
||||
$synchronous=Broker::SQLITE_SYNCHRONOUS_NORMAL,
|
||||
$journal_mode=Broker::SQLITE_JOURNAL_MODE_WAL,
|
||||
);
|
||||
local options = Broker::BackendOptions($sqlite=sqlite_options);
|
||||
h = Broker::create_master("persistent-store", Broker::SQLITE, options);
|
||||
|
||||
local c = 1000;
|
||||
while (c > 0)
|
||||
{
|
||||
Broker::put(h, cat(c), rand(10000));
|
||||
--c;
|
||||
}
|
||||
}
|
29
doc/frameworks/broker/stores-connector.zeek
Normal file
29
doc/frameworks/broker/stores-connector.zeek
Normal file
|
@ -0,0 +1,29 @@
|
|||
redef exit_only_after_terminate = T;
|
||||
|
||||
global h: opaque of Broker::Store;
|
||||
|
||||
global ready: event();
|
||||
|
||||
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
terminate();
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
h = Broker::create_master("mystore");
|
||||
|
||||
local myset: set[string] = {"a", "b", "c"};
|
||||
local myvec: vector of string = {"alpha", "beta", "gamma"};
|
||||
Broker::put(h, "one", 110);
|
||||
Broker::put(h, "two", 223);
|
||||
Broker::put(h, "myset", myset);
|
||||
Broker::put(h, "myvec", myvec);
|
||||
Broker::increment(h, "one");
|
||||
Broker::decrement(h, "two");
|
||||
Broker::insert_into_set(h, "myset", "d");
|
||||
Broker::remove_from(h, "myset", "b");
|
||||
Broker::push(h, "myvec", "delta");
|
||||
|
||||
Broker::peer("127.0.0.1");
|
||||
}
|
79
doc/frameworks/broker/stores-listener.zeek
Normal file
79
doc/frameworks/broker/stores-listener.zeek
Normal file
|
@ -0,0 +1,79 @@
|
|||
redef exit_only_after_terminate = T;
|
||||
|
||||
global h: opaque of Broker::Store;
|
||||
global expected_key_count = 4;
|
||||
global key_count = 0;
|
||||
|
||||
# Lookup a value in the store based on an arbitrary key string.
|
||||
function do_lookup(key: string)
|
||||
{
|
||||
when ( local res = Broker::get(h, key) )
|
||||
{
|
||||
++key_count;
|
||||
print "lookup", key, res;
|
||||
|
||||
# End after we iterated over looking up each key in the store twice.
|
||||
if ( key_count == expected_key_count * 2 )
|
||||
terminate();
|
||||
}
|
||||
# All data store queries must specify a timeout
|
||||
timeout 3sec
|
||||
{ print "timeout", key; }
|
||||
}
|
||||
|
||||
event check_keys()
|
||||
{
|
||||
# Here we just query for the list of keys in the store, and show how to
|
||||
# look up each one's value.
|
||||
when ( local res = Broker::keys(h) )
|
||||
{
|
||||
print "clone keys", res;
|
||||
|
||||
if ( res?$result )
|
||||
{
|
||||
# Since we know that the keys we are storing are all strings,
|
||||
# we can conveniently cast the result of Broker::keys to
|
||||
# a native Bro type, namely 'set[string]'.
|
||||
for ( k in res$result as string_set )
|
||||
do_lookup(k);
|
||||
|
||||
# Alternatively, we can use a generic iterator to iterate
|
||||
# over the results (which we know is of the 'set' type because
|
||||
# that's what Broker::keys() always returns). If the keys
|
||||
# we stored were not all of the same type, then you would
|
||||
# likely want to use this method of inspecting the store's keys.
|
||||
local i = Broker::set_iterator(res$result);
|
||||
|
||||
while ( ! Broker::set_iterator_last(i) )
|
||||
{
|
||||
do_lookup(Broker::set_iterator_value(i) as string);
|
||||
Broker::set_iterator_next(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
# All data store queries must specify a timeout.
|
||||
# You also might see timeouts on connecting/initializing a clone since
|
||||
# it hasn't had time to get fully set up yet.
|
||||
timeout 1sec
|
||||
{
|
||||
print "timeout";
|
||||
schedule 1sec { check_keys() };
|
||||
}
|
||||
}
|
||||
|
||||
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
|
||||
{
|
||||
print "peer added";
|
||||
# We could create a clone early, like in zeek_init and it will periodically
|
||||
# try to synchronize with its master once it connects, however, we just
|
||||
# create it now since we know the peer w/ the master store has just
|
||||
# connected.
|
||||
h = Broker::create_clone("mystore");
|
||||
|
||||
event check_keys();
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Broker::listen("127.0.0.1");
|
||||
}
|
17
doc/frameworks/broker/testlog.zeek
Normal file
17
doc/frameworks/broker/testlog.zeek
Normal file
|
@ -0,0 +1,17 @@
|
|||
module Test;
|
||||
|
||||
export {
|
||||
redef enum Log::ID += { LOG };
|
||||
|
||||
type Info: record {
|
||||
msg: string &log;
|
||||
num: count &log;
|
||||
};
|
||||
|
||||
global log_test: event(rec: Test::Info);
|
||||
}
|
||||
|
||||
event zeek_init() &priority=5
|
||||
{
|
||||
Log::create_stream(Test::LOG, [$columns=Test::Info, $ev=log_test, $path="test"]);
|
||||
}
|
630
doc/frameworks/cluster.rst
Normal file
630
doc/frameworks/cluster.rst
Normal file
|
@ -0,0 +1,630 @@
|
|||
|
||||
.. _cluster-framework:
|
||||
|
||||
=================
|
||||
Cluster Framework
|
||||
=================
|
||||
|
||||
The basic premise of Zeek clusterization is to break down network traffic into
|
||||
smaller pieces, while preserving the affinity of individual network sessions to
|
||||
a single analysis process. Cluster architecture thus allows Zeek to distribute
|
||||
that analysis across many dozens or hundreds of worker processes, allowing the
|
||||
monitoring system to scale up to line speeds of 100G or more.
|
||||
|
||||
.. figure:: /images/cluster-diagram.png
|
||||
|
||||
Figure 1: Block diagram of cluster setup showing multiple network feeds to a
|
||||
traffic aggregator. This device sends traffic to workers after symmetric
|
||||
hashing/load-balancing. Traffic is then fed to the Zeek cluster using
|
||||
load-balancing network cards.
|
||||
|
||||
Zeek's Cluster Components
|
||||
=========================
|
||||
|
||||
By distributing network traffic across hosts and processes, overall traffic
|
||||
finally reaches a volume that can be effectively analyzed by a single worker
|
||||
process. Zeek then acts as a distributed network security monitor to perform
|
||||
analysis across many dozens or hundreds of workers, all acting on a small
|
||||
fraction of the overall traffic volume. The analysis of the worker process is
|
||||
further facilitated by nodes such as manager and proxies, ultimately logging
|
||||
the alerts and or relevant network logs. A Zeek cluster therefore consists of
|
||||
four main components: a manager, workers, proxies, and a logger.
|
||||
|
||||
Manager
|
||||
-------
|
||||
|
||||
The manager is a Zeek process that has two primary jobs. It normally receives
|
||||
log messages and notices from the rest of the nodes in the cluster using the
|
||||
Zeek communications protocol. It combines the individual logs that each worker
|
||||
produces, so that the result is a set of joint logs instead of many discrete
|
||||
logs that you would have to combine in some manner with post-processing. (Note
|
||||
that if you use a separate logger node, then the logger receives all logs
|
||||
instead of the manager.) The manager also supports other functionality and
|
||||
analysis which requires a centralized, global view of events or data.
|
||||
|
||||
Worker
|
||||
------
|
||||
|
||||
The worker is the Zeek process that sniffs network traffic and does protocol
|
||||
analysis on the reassembled traffic streams. Most of the work of an active
|
||||
cluster takes place on the workers. Workers typically represent the bulk of the
|
||||
Zeek processes that are running in a cluster. The fastest memory and CPU core
|
||||
speed you can afford is recommended since all of the protocol parsing and most
|
||||
analysis will take place here. There are no particular requirements for the
|
||||
disks in workers since almost all logging is done remotely to the manager (or
|
||||
dedicated logger). Normally, very little is written to disk.
|
||||
|
||||
Proxy
|
||||
-----
|
||||
|
||||
A proxy is a Zeek process that may be used to offload data storage or any
|
||||
arbitrary workload. A cluster may contain multiple proxy nodes.
|
||||
Zeek's default scripts make only minimal use of proxies.
|
||||
Custom scripts or third-party packages may exercise proxies more heavily
|
||||
to partition data or workloads, providing greater cluster scalability potential.
|
||||
The number of required proxy nodes in a cluster depends on the deployed scripts,
|
||||
cluster size and traffic characteristics. For small clusters with four or less workers,
|
||||
a single proxy node is usually sufficient. For larger clusters, you may want to
|
||||
closely monitor :ref:`CPU and memory usage <framework-telemetry>` of proxy
|
||||
nodes and increase their number as needed.
|
||||
|
||||
Zeek processes acting as proxies don’t tend to be extremely hard on CPU or
|
||||
memory, and users frequently run proxy processes on the same physical host as
|
||||
the manager.
|
||||
|
||||
Logger
|
||||
------
|
||||
|
||||
A logger is an optional Zeek process that receives log messages from the rest
|
||||
of the nodes in the cluster using the Zeek communications protocol. The purpose
|
||||
of having a logger to receive logs instead of the manager is to reduce the load
|
||||
on the manager. If no logger is needed, then the manager will receive logs
|
||||
instead.
|
||||
|
||||
Running a Zeek Cluster
|
||||
======================
|
||||
|
||||
Zeek Cluster Setup
|
||||
------------------
|
||||
|
||||
This :ref:`link <cluster-configuration>` describes the cluster setup in great
|
||||
detail.
|
||||
|
||||
General Usage and Deployment
|
||||
----------------------------
|
||||
|
||||
The biggest advantage to using a Zeek cluster is that most of its inner
|
||||
workings are transparent to the user. Clusterization is a clever trick to
|
||||
divide-and-conquer ever increasing network traffic volume.
|
||||
|
||||
As a practitioner one must know how to set up a cluster by defining components
|
||||
such as the manager, proxies, loggers and workers in the
|
||||
:samp:`{<prefix>}/etc/node.cfg` file on the manager.
|
||||
|
||||
Edit the ZeekControl node configuration file, :samp:`{<prefix>}/etc/node.cfg`,
|
||||
to define where the logger, manager, proxies, and workers will run. For a
|
||||
cluster configuration, comment-out (or remove) the standalone node in that
|
||||
file, and either uncomment or add node entries for each node in your cluster
|
||||
(logger, manager, proxy, and workers).
|
||||
|
||||
For example, to run five Zeek nodes (two workers, one proxy, a logger, and a
|
||||
manager) on a cluster consisting of three machines, the cluster configuration
|
||||
would look like this::
|
||||
|
||||
[logger]
|
||||
type=logger
|
||||
host=10.0.0.10
|
||||
|
||||
[manager]
|
||||
type=manager
|
||||
host=10.0.0.10
|
||||
|
||||
[proxy-1]
|
||||
type=proxy
|
||||
host=10.0.0.10
|
||||
|
||||
[worker-1]
|
||||
type=worker
|
||||
host=10.0.0.11
|
||||
interface=eth0
|
||||
|
||||
[worker-2]
|
||||
type=worker
|
||||
host=10.0.0.12
|
||||
interface=eth0
|
||||
|
||||
|
||||
To set up a cluster we need a network-aggregator/load balancing device which
|
||||
can aggregate inputs from network sources, such as taps or span ports. This
|
||||
device also performs the critical function of ensuring each TCP session is
|
||||
distributed to a single link. This function is provided through symmetric
|
||||
hashing.
|
||||
|
||||
Once the tap aggregator is set, output from each port is sent to a “Zeek node”
|
||||
which is typically built on commodity hardware. Zeek clusters have evolved from
|
||||
running the manager, workers and proxies on individual servers, to most often
|
||||
now running a “cluster-in-a-box” setup, where a powerful multi-core box with
|
||||
dedicated cores hosts the workers, proxies logger and manager. We’ve seen
|
||||
instances of 90 workers running on a single physical server.
|
||||
|
||||
At present the preferred way to run a cluster is to use a load-balancing
|
||||
network card such as Myricom NICs or Intel cards with PF_RING or AF_PACKET
|
||||
support. The NIC (and associated software) further divides the traffic to
|
||||
multiple Zeek worker processes running on the ‘Zeek- node’.
|
||||
|
||||
While the Zeek cluster allows us to monitor traffic at scale, an optional
|
||||
add-on technology called “shunting” is helpful to reduce the volume that needs
|
||||
be processed.. Shunting can detect specific large data flows based on
|
||||
predetermined characteristics and communicate with the network tap via an API
|
||||
to stop sending those flows to Zeek for analysis. This allows Zeek to maintain
|
||||
awareness and logs of these shunted large flows while dramatically reducing the
|
||||
analysis load necessary to process traffic.
|
||||
|
||||
The following links gives more specific information on how to set up
|
||||
clusterization using one of the above approaches: :ref:`cluster-configuration`.
|
||||
|
||||
Developing Scripts/Heuristics
|
||||
=============================
|
||||
|
||||
This section is for developers who are interested in writing
|
||||
packages/scripts/heuristics and want to take advantage of clusterization.
|
||||
|
||||
In order to make your scripts/packages “clusterized,” one must understand the
|
||||
purpose of each of the cluster components (manager, workers, proxies and
|
||||
logger) and how/where the data is generated and how to move data/information
|
||||
across the different nodes in the cluster.
|
||||
|
||||
* **Workers**: Workers are a good first choice for doing the brunt of any work.
|
||||
They should be spending a lot of time parsing or analyzing incoming data from
|
||||
packets. You might choose them to do a “first pass” analysis and then decide
|
||||
how the results should be shared with other nodes in the cluster.
|
||||
|
||||
* **Proxies**: Proxies serve as intermediaries for data storage and computation
|
||||
offloading. Proxies help offload work or data in a scalable and distributed
|
||||
way. Since any given worker is connected to all proxies and can agree on an
|
||||
“arbitrary key -> proxy node” mapping (discussed later), you can partition
|
||||
work or data amongst them in a uniform manner. You might choose to use
|
||||
proxies as a method to share non-persistent state or as a “second pass”
|
||||
analysis for any work that you don’t want interfering with the workers’
|
||||
capacity to keep up with capturing and parsing packets. The default scripts
|
||||
that come with Zeek make minimal use of proxies. If you are migrating from a
|
||||
previous ZeekControl deployment, you may want to implement a single proxy
|
||||
node. If you have custom or community scripts that utilize proxies,
|
||||
considering scaling up the number of proxies to meet demand.
|
||||
|
||||
* **Manager**: A manager will make decisions that require a global view, as it
|
||||
is in a centralized location and connected to everything. However, that
|
||||
connectivity also makes it easy to overload it. Try to use a manager
|
||||
sparingly and only for tasks that must be done in a centralized or
|
||||
authoritative location. Optionally, for some deployments, the manager can
|
||||
also serve as the sole logger.
|
||||
|
||||
* **Loggers**: Loggers should simply write logs to disk. In the default cluster
|
||||
configuration, log content gets distributed among available loggers in a
|
||||
round-robin fashion, providing failover capability should any given logger
|
||||
temporarily go offline.
|
||||
|
||||
The Need to Move Data and Events Across Different Nodes
|
||||
-------------------------------------------------------
|
||||
|
||||
Imagine you have a list of IP addresses that you want to distribute across all
|
||||
workers to keep in a watch list, such as the Intel framework. You may also want
|
||||
to aggregate results across workers to see if that count crosses a threshold,
|
||||
such as using scan detection. Finally, you might want to extract URLs from
|
||||
emails and then redistribute the extracted URLs to all workers to be able to
|
||||
find which of these extracted URLs got clicked on. All these examples tend to
|
||||
introduce challenges in a Zeek cluster setup due to data centrality issues. In
|
||||
other words, the very advantageous divide-and-conquer approach of
|
||||
clusterization also introduces complexity in Zeek scripts. However, with the
|
||||
introduction of the Broker communication framework and additional helper
|
||||
functions, data centrality complexities can be addressed efficiently. One must
|
||||
rely on clusterization techniques provided by Zeek scripting, the Broker API,
|
||||
and clusterization components.
|
||||
|
||||
When clustering your scripts, the fundamental work to move data or events in
|
||||
the context of a cluster falls primarily on few high level abstractions of
|
||||
communication patterns:
|
||||
|
||||
1. Manager-to-worker
|
||||
2. Worker-to-manager
|
||||
3. Worker-to-proxy
|
||||
4. Worker-to-manager-to-worker
|
||||
5. Manager-to-worker-to-manager
|
||||
|
||||
All the communication between workers, proxies and manager is established by
|
||||
Zeek via the Broker framework. The Broker framework provides basic facilities
|
||||
for connecting Zeek instances to each other and exchanging messages, events or
|
||||
data.
|
||||
|
||||
Cluster Topics
|
||||
--------------
|
||||
|
||||
All Broker-based messaging involves two components: the information you want to
|
||||
send, such as an event with its arguments, along with an associated topic name
|
||||
string. The topic strings are used as a filtering mechanism: Broker uses a
|
||||
publish-subscribe communication pattern where peers advertise interest in topic
|
||||
prefixes and only receive messages which match one of their prefix
|
||||
subscriptions. Broker itself supports arbitrary topic strings. However, Zeek
|
||||
generally follows certain conventions in choosing these topics to help avoid
|
||||
conflicts and generally make them easier to remember.
|
||||
|
||||
To communicate between workers, proxies and manager one needs to know the topic
|
||||
name to which all workers, proxies and manager are subscribed to. These are:
|
||||
|
||||
1. :zeek:see:`Cluster::worker_topic` - to which all workers are subscribed
|
||||
2. :zeek:see:`Cluster::proxy_topic` - to which all proxies are subscribed
|
||||
3. :zeek:see:`Cluster::manager_topic` - to which manager is subscribed
|
||||
|
||||
|
||||
The following table illustrates all the topics and communication events for
|
||||
clusterization, along with potential use cases:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Event
|
||||
- Topic
|
||||
- Use cases
|
||||
|
||||
* - Manager to worker
|
||||
- :zeek:see:`Cluster::worker_topic`
|
||||
- * Read input file on manager
|
||||
* Distribute data and events from manager to workers
|
||||
|
||||
* - Worker to manager
|
||||
- :zeek:see:`Cluster::manager_topic`
|
||||
- * Find characteristics of a “scan” eg. SYN-only pkts
|
||||
* Send data to manager for aggregation
|
||||
|
||||
* - Worker or manager to proxy
|
||||
- :zeek:see:`Cluster::proxy_topic`
|
||||
- * Run operation on all proxies
|
||||
* Disseminate notice suppression
|
||||
|
||||
* - Worker to manager to worker
|
||||
- :zeek:see:`Cluster::manager_topic` + :zeek:see:`Cluster::worker_topic`
|
||||
- * Find URLs in emails
|
||||
* Send to manager
|
||||
* Distribute to workers to check against HTTP GET requests
|
||||
|
||||
* - Manager to worker to manager
|
||||
- :zeek:see:`Cluster::worker_topic` + :zeek:see:`Cluster::manager_topic`
|
||||
- * Read input file on manager
|
||||
* Distribute data to workers
|
||||
* Workers to report counts of connections to manager
|
||||
* Aggregate the counts on manager
|
||||
|
||||
Cluster Pools
|
||||
-------------
|
||||
|
||||
In addition to topics, Zeek nodes can join a :zeek:see:`Cluster::Pool`.
|
||||
Using :zeek:see:`Cluster::publish_hrw` and :zeek:see:`Cluster::publish_rr`,
|
||||
pools allow to publish events to individual proxies without prior knowledge
|
||||
of a cluster's shape and size.
|
||||
|
||||
A popular pool is the :zeek:see:`Cluster::proxy_pool`. It comprises all
|
||||
the proxies of a cluster. Examples of its use are listed in the following table.
|
||||
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Event
|
||||
- Pool
|
||||
- Use cases
|
||||
|
||||
* - Workers to individual proxy processes
|
||||
- :zeek:see:`Cluster::proxy_pool`
|
||||
- * Aggregation based on Highest Random Weight (eg. DNS query types, see the :ref:`section below <cluster-framework-proxies-uniform>` for details.)
|
||||
* Aggregation of Software versions for a given host
|
||||
* Offloading tasks in round-robin fashion across proxies
|
||||
|
||||
|
||||
Publishing Events Across the Cluster
|
||||
------------------------------------
|
||||
|
||||
Broker, as well as Zeek’s higher-level cluster framework, provide a set of
|
||||
function to publish events, including:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Function
|
||||
- Description
|
||||
- Use
|
||||
|
||||
* - :zeek:see:`Cluster::publish`
|
||||
- Publishes an event at a given topic
|
||||
- Standard function to send an event to all nodes subscribed to a given
|
||||
topic.
|
||||
|
||||
* - :zeek:see:`Cluster::publish_hrw`
|
||||
- Publishes an event to a node within a pool according to
|
||||
Highest Random Weight (HRW) hashing strategy; see details below
|
||||
- Use this in cases of any aggregation needs - eg. scan detection or
|
||||
anything that needs a counter going.
|
||||
|
||||
* - :zeek:see:`Cluster::publish_rr`
|
||||
- Publishes an event to a node within a pool according to Round-Robin
|
||||
distribution strategy.
|
||||
- Generally used inside Zeek for multiple logger nodes.
|
||||
|
||||
* - :zeek:see:`Broker::publish`
|
||||
- Publishes an event at a given topic
|
||||
- Standard function to send an event to all nodes subscribed to a given
|
||||
topic.
|
||||
|
||||
Starting with Zeek 7.1, this function should only be used in
|
||||
Broker-specific scripts. Use :zeek:see:`Cluster::publish` otherwise.
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
The ``Cluster::publish`` function was added in Zeek 7.1. In contrast to
|
||||
``Broker:publish``, it publishes events even when a non-Broker cluster
|
||||
backend is in use. Going forward, ``Cluster:publish`` should be preferred
|
||||
over ``Broker::publish``, unless the script is specific to the Broker backend,
|
||||
e.g. when interacting with an external application using native Python
|
||||
bindings for Broker.
|
||||
|
||||
|
||||
An example sending an event from worker to manager:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event worker_to_manager(worker_name: string)
|
||||
{
|
||||
print "got event from worker", worker_name;
|
||||
}
|
||||
|
||||
event some_event_handled_on_worker()
|
||||
{
|
||||
Broker::publish(Cluster::manager_topic, worker_to_manager,
|
||||
Cluster::node);
|
||||
}
|
||||
|
||||
More details and code snippets and documentation on Broker communication
|
||||
frameworks are available at :ref:`broker-framework`.
|
||||
|
||||
|
||||
.. _cluster-framework-proxies-uniform:
|
||||
|
||||
Distributing Events Uniformly Across Proxies
|
||||
--------------------------------------------
|
||||
|
||||
If you want to offload some data/work from a worker to your proxies, we can
|
||||
make use of a `Highest Random Weight (HRW) hashing
|
||||
<https://en.wikipedia.org/wiki/Rendezvous_hashing>`_ distribution strategy to
|
||||
uniformly map an arbitrary key space across all available proxies through
|
||||
:zeek:see:`Cluster::publish_hrw`. This function publishes an event to one node
|
||||
within a pool according to a Highest Random Weight hashing strategy. By
|
||||
assigning :zeek:see:`Cluster::proxy_pool` to this event, one can utilize
|
||||
proxies to handle it. Note that :zeek:see:`Cluster::publish_hrw` requires a
|
||||
unique key as an input to the hashing function to uniformly distribute keys
|
||||
among available nodes. Often this key is a source or destination IP address. If
|
||||
you are using :zeek:see:`Cluster::publish_hrw` for an aggregate function, such
|
||||
as counts unique across the workers, make sure to appropriately select the
|
||||
hashing key.
|
||||
|
||||
The following example illustrates this issue. Assume that we are counting the
|
||||
number of scanner IPs from each ``/24`` subnet. If the key were the source IP,
|
||||
then depending on the hashing, different IP addresses from the same ``/24``
|
||||
might end up on different proxies for the aggregation function. In this case
|
||||
one might instead want to use a more inclusive hashing key, such as the subnet
|
||||
(``/24``) itself. To illustrate the issue, in the notice log below, you see
|
||||
that 3 scanners each from ``52.100.165.0/24`` went to ``proxy-1`` and
|
||||
``proxy-2``. Ideally we want a single count of 6 scanners instead.
|
||||
|
||||
::
|
||||
|
||||
1600212249.061779 Scan::Subnet 52.100.165.0/24 has 3 spf IPs originating from it 52.100.165.249 52.100.165.237 52.100.165.246 - 52.100.165.246 - - proxy-2 Notice::ACTION_LOG 3600.000000 F
|
||||
|
||||
1600212293.581745 Scan::Subnet 52.100.165.0/24 has 3 spf IPs originating from it 52.100.165.247 52.100.165.244 52.100.165.205 - 52.100.165.205 - - proxy-1 Notice::ACTION_LOG 3600.000000
|
||||
|
||||
Instead, we can ensure the hash key is ``52.100.165.0/24`` instead of the
|
||||
original IP, as the hash for ``52.100.165.0/24`` will be the same for all
|
||||
addresses belonging to this subnet. Then the data will reach only one proxy.
|
||||
To that end, we can use the ``mask_address`` function to extract subnet
|
||||
information for a given IP address to use as a key in the hash function:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
local spf = mask_address(orig);
|
||||
|
||||
@if ( Cluster::is_enabled())
|
||||
Cluster::publish_hrw(Cluster::proxy_pool, spf, smtpsink::aggregate_stats, c) ;
|
||||
@else
|
||||
event smtpsink::aggregate_stats(c);
|
||||
@endif
|
||||
|
||||
Carefully select the key for :zeek:see:`Cluster::publish_hrw`. If done right,
|
||||
this feature will bring tremendous benefits in code scalability, especially
|
||||
when working with aggregate and threshold functions.
|
||||
|
||||
.. note::
|
||||
|
||||
In scripting for clusterization, using the correct module names and
|
||||
namespaces is crucial as both events and data are transmitted to different
|
||||
systems. In order to make sure the contexts are correct, all functions,
|
||||
events and datasets should be scoped within their respective namespaces and
|
||||
modules. An easy rule of thumb is to always use the explicit module namespace
|
||||
scoping. See :ref:`event-namespacing-pitfall` for further explanation and
|
||||
examples.
|
||||
|
||||
Clusterization of Zeek scripts can be an intimidating task for beginners.
|
||||
However, with reliance on the new Broker framework, clusterization has become
|
||||
simpler and straightforward. Consider the following:
|
||||
|
||||
1. Communication overhead: Be sure not to generate unnecessary communication
|
||||
overhead. For example, scan detection is one of the worst cases for
|
||||
distributed analysis. One needs to count connections from a given IP address
|
||||
across all workers and then aggregate them on a proxy or manager. All the
|
||||
connections have to reach an aggregate function before Zeek can determine if
|
||||
a given source is a scanner or not. This happens because each worker only
|
||||
has a limited picture of the activity generated by a given remote IP.
|
||||
|
||||
2. Communication optimizations: Once a given remote IP is identified as
|
||||
desired, make sure a manager reports that to the worker, and workers stop
|
||||
sending any further data for that IP to the manager. This is especially
|
||||
useful in scan detection where it takes only a few connections to identify
|
||||
scans, while a given scanner might send millions of probes eventually. If
|
||||
done right, workers will only send the first N connections, and stop after
|
||||
that, thus saving a lot of communication overheads. However, it makes sense
|
||||
to stop workers from sending any further connection information
|
||||
|
||||
3. Clusterization also requires timely state synchronization across the
|
||||
workers, to make sure that all workers have a common view of a particular
|
||||
heuristic.
|
||||
|
||||
4. When writing scripts for clusterization make sure your detection runs in
|
||||
both cluster and standalone setup.
|
||||
|
||||
A Cluster Script Walkthrough
|
||||
----------------------------
|
||||
|
||||
Let's say we want to count how many connections a remote IP is making to a host
|
||||
in our network on port 3389 UDP. Due to the distributed nature of Zeek
|
||||
clusters, connections are distributed across the workers based on a 5-tuple
|
||||
hash (source IP, source port, destination IP, destination port, and protocol).
|
||||
To get a central view of a connection between a given IP pair, one must deploy
|
||||
a clusterized scripting approach. The following example highlights how to go
|
||||
about doing so.
|
||||
|
||||
In this use case, we intend to create an aggregation function.
|
||||
:zeek:see:`Cluster::publish_hrw` appears to be the appropriate function, since
|
||||
it allows offloading a lot of work to proxies, thus leaving workers and manager
|
||||
to process traffic.
|
||||
|
||||
In order to make sure all the connections between two hosts go to a single
|
||||
specific proxy, we need to make sure the key for the hashing function
|
||||
accommodates this constraint. We will use ``orig_h+resp_h`` as the key. We
|
||||
create a new data-type called ``pair`` as seen in code below. This allows us
|
||||
to use the ``orig+resp`` as a unique key across the code, including in the
|
||||
candidate table. Further, we create a new data type called ``stats`` to keep
|
||||
track of additional data associated with a connection pair.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
module DoS;
|
||||
|
||||
export {
|
||||
|
||||
redef enum Notice::Type += {
|
||||
Threshold,
|
||||
Victim_3389UDP,
|
||||
};
|
||||
|
||||
type pair: record {
|
||||
orig: addr;
|
||||
resp: addr;
|
||||
};
|
||||
|
||||
type stats: record {
|
||||
orig: addr;
|
||||
resp: addr ;
|
||||
orig_bytes: count &default=0;
|
||||
resp_bytes: count &default=0;
|
||||
conns: count &default=0;
|
||||
};
|
||||
|
||||
global dos_candidates: table [pair] of stats &create_expire=1 day;
|
||||
|
||||
global DoS::aggregate_stats:event(s: stats);
|
||||
}
|
||||
|
||||
We choose the :zeek:see:`connection_state_remove` event as the primary event to
|
||||
tap into. :zeek:see:`connection_state_remove` is generated when a connection’s
|
||||
internal state is about to be removed from memory. It's appropriate for this
|
||||
case, as all the information about the connection is now included in the
|
||||
:zeek:see:`connection` record ``c``. One disadvantage of using
|
||||
:zeek:see:`connection_state_remove` is that the event is fired at the very end
|
||||
of the connection, after the expiration timeouts are over. Thus, there are
|
||||
delays, and any operation which happens on the data is “after-the-fact” that
|
||||
connection is over. While this could be a problem in approaches such as
|
||||
proactive blocking and early detection heuristics, in this case of aggregation
|
||||
it is not an issue.
|
||||
|
||||
The thing to pay attention to in the code snippet below is the
|
||||
:zeek:see:`@if`-:zeek:see:`@else`-:zeek:see:`@endif` directives which
|
||||
differentiate between clusterized and standalone operation of the script. With
|
||||
the :zeek:see:`@if` construct, the specified expression must evaluate to type
|
||||
bool. If the value is true, then the following script lines (up to the next
|
||||
:zeek:see:`@else` or :zeek:see:`@endif`) are available to be executed. In this
|
||||
case we check if :zeek:see:`Cluster::is_enabled`. If so, we call
|
||||
:zeek:see:`Cluster::publish_hrw` along with the key (``hash_pair``) and the
|
||||
aggregate function followed by parameters, which is the stats record in this
|
||||
case. If the cluster isn’t running that aggregate function, it is directly
|
||||
called.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event connection_state_remove(c: connection)
|
||||
{
|
||||
local service = c$id$resp_p;
|
||||
local resp = c$id$resp_h;
|
||||
|
||||
if ( service != 3389/udp )
|
||||
return;
|
||||
|
||||
if ( resp !in Site::local_nets )
|
||||
return;
|
||||
|
||||
local s: stats;
|
||||
s$orig = c$id$orig_h;
|
||||
s$resp = c$id$resp_h;
|
||||
s$orig_bytes = c$conn$orig_ip_bytes;
|
||||
s$resp_bytes = c$conn$resp_ip_bytes;
|
||||
|
||||
local hash_pair: pair;
|
||||
hash_pair$orig = c$id$orig_h;
|
||||
hash_pair$resp = resp;
|
||||
|
||||
@if ( Cluster::is_enabled() )
|
||||
Cluster::publish_hrw(Cluster::proxy_pool, hash_pair, DoS::aggregate_stats, s);
|
||||
@else
|
||||
event DoS::aggregate_stats(s);
|
||||
@endif
|
||||
}
|
||||
|
||||
Since ``hash_pair`` makes the key unique, irrespective of what worker this
|
||||
specific connection has gone to, it will end up on a one specific proxy only.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
event DoS::aggregate_stats(s: stats)
|
||||
{
|
||||
local p: pair ;
|
||||
p$orig = s$orig;
|
||||
p$resp = s$resp ;
|
||||
|
||||
if ( p !in dos_candidates )
|
||||
{
|
||||
local tmp_s: stats;
|
||||
tmp_s$orig = s$orig;
|
||||
tmp_s$resp = s$resp;
|
||||
tmp_s$orig_bytes = 0;
|
||||
tmp_s$resp_bytes= 0;
|
||||
tmp_s$conns = 0;
|
||||
|
||||
dos_candidates[p] = tmp_s;
|
||||
}
|
||||
|
||||
dos_candidates[p]$conns += 1;
|
||||
dos_candidates[p]$orig_bytes += s$orig_bytes;
|
||||
dos_candidates[p]$resp_bytes += s$resp_bytes;
|
||||
|
||||
local n = dos_candidates[p]$conns;
|
||||
|
||||
local thresh = check_ip_threshold(dos_threshold, ip_pair_threshold_idx, p, n);
|
||||
|
||||
if ( thresh )
|
||||
{
|
||||
local msg = fmt("%s pair has reached %s threshold %s",
|
||||
p, n, dos_candidates[p]);
|
||||
NOTICE([$note=DoS::Threshold, $src=p$orig, $msg=msg]);
|
||||
|
||||
if ( dos_candidates[p]$resp_bytes > 0 )
|
||||
NOTICE([$note=DoS::Victim, $src=p$orig, $msg=msg,
|
||||
$identifier=cat(p$resp), $suppress_for=1 hrs]);
|
||||
}
|
||||
}
|
356
doc/frameworks/configuration.rst
Normal file
356
doc/frameworks/configuration.rst
Normal file
|
@ -0,0 +1,356 @@
|
|||
|
||||
.. _framework-configuration:
|
||||
|
||||
=======================
|
||||
Configuration Framework
|
||||
=======================
|
||||
|
||||
Zeek includes a configuration framework that allows updating script options at
|
||||
runtime. This functionality consists of an :zeek:see:`option` declaration in
|
||||
the Zeek language, configuration files that enable changing the value of
|
||||
options at runtime, option-change callbacks to process updates in your Zeek
|
||||
scripts, a couple of script-level functions to manage config settings directly,
|
||||
and a log file (:file:`config.log`) that contains information about every
|
||||
option value change according to :zeek:see:`Config::Info`.
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
The configuration framework provides an alternative to using Zeek script
|
||||
constants to store various Zeek settings.
|
||||
|
||||
While traditional constants work well when a value is not expected to change at
|
||||
runtime, they cannot be used for values that need to be modified occasionally.
|
||||
While a :zeek:see:`redef` allows a re-definition of an already defined constant
|
||||
in Zeek, these redefinitions can only be performed when Zeek first starts.
|
||||
Afterwards, constants can no longer be modified.
|
||||
|
||||
However, it is clearly desirable to be able to change at runtime many of the
|
||||
configuration options that Zeek offers. Restarting Zeek can be time-consuming
|
||||
and causes it to lose all connection state and knowledge that it accumulated.
|
||||
Zeek’s configuration framework solves this problem.
|
||||
|
||||
Declaring Options
|
||||
=================
|
||||
|
||||
The :zeek:see:`option` keyword allows variables to be declared as configuration
|
||||
options:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
module Test;
|
||||
|
||||
export {
|
||||
option my_networks: set[subnet] = {};
|
||||
option enable_feature = F;
|
||||
option hostname = "testsystem";
|
||||
option timeout_after = 1min;
|
||||
option my_ports: vector of port = {};
|
||||
}
|
||||
|
||||
Options combine aspects of global variables and constants. Like global
|
||||
variables, options cannot be declared inside a function, hook, or event
|
||||
handler. Like constants, options must be initialized when declared (the type
|
||||
can often be inferred from the initializer but may need to be specified when
|
||||
ambiguous). The value of an option can change at runtime, but options cannot be
|
||||
assigned a new value using normal assignments.
|
||||
|
||||
The initial value of an option can be redefined with a :zeek:see:`redef`
|
||||
declaration just like for global variables and constants. However, there is no
|
||||
need to specify the :zeek:see:`&redef` attribute in the declaration of an
|
||||
option. For example, given the above option declarations, here are possible
|
||||
redefs that work anyway:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
redef Test::enable_feature = T;
|
||||
redef Test::my_networks += { 10.1.0.0/16, 10.2.0.0/16 };
|
||||
|
||||
Changing Options
|
||||
================
|
||||
|
||||
The configuration framework facilitates reading in new option values from
|
||||
external files at runtime. Configuration files contain a mapping between option
|
||||
names and their values. Each line contains one option assignment, formatted as
|
||||
follows::
|
||||
|
||||
[option name][tab/spaces][new value]
|
||||
|
||||
Lines starting with ``#`` are comments and ignored.
|
||||
|
||||
You register configuration files by adding them to
|
||||
:zeek:see:`Config::config_files`, a set of filenames. Simply say something like
|
||||
the following in :file:`local.zeek`:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
redef Config::config_files += { "/path/to/config.dat" };
|
||||
|
||||
Zeek will then monitor the specified file continuously for changes. For
|
||||
example, editing a line containing::
|
||||
|
||||
Test::enable_feature T
|
||||
|
||||
to the config file while Zeek is running will cause it to automatically update
|
||||
the option’s value in the scripting layer. The next time your code accesses the
|
||||
option, it will see the new value.
|
||||
|
||||
.. note::
|
||||
|
||||
The config framework is clusterized. In a cluster configuration, only the
|
||||
manager node watches the specified configuration files, and relays option
|
||||
updates across the cluster.
|
||||
|
||||
Config File Formatting
|
||||
----------------------
|
||||
|
||||
The formatting of config option values in the config file is not the same as in
|
||||
Zeek’s scripting language. Keep an eye on the :file:`reporter.log` for warnings
|
||||
from the config reader in case of incorrectly formatted values, which it’ll
|
||||
generally ignore when encountered. The following table summarizes supported
|
||||
types and their value representations:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Data Type
|
||||
- Sample Config File Entry
|
||||
- Comments
|
||||
|
||||
* - :zeek:see:`addr`
|
||||
- ``1.2.3.4``
|
||||
- Plain IPv4 or IPv6 address, as in Zeek. No ``/32`` or similar netmasks.
|
||||
|
||||
* - :zeek:see:`bool`
|
||||
- ``T``
|
||||
- ``T`` or ``1`` for true, ``F`` or ``0`` for false
|
||||
|
||||
* - :zeek:see:`count`
|
||||
- ``42``
|
||||
- Plain, nonnegative integer.
|
||||
|
||||
* - :zeek:see:`double`
|
||||
- ``-42.5``
|
||||
- Plain double number.
|
||||
|
||||
* - :zeek:see:`enum`
|
||||
- ``Enum::FOO_A``
|
||||
- Plain enum string.
|
||||
|
||||
* - :zeek:see:`int`
|
||||
- ``-1``
|
||||
- Plain integer.
|
||||
|
||||
* - :zeek:see:`interval`
|
||||
- ``3600.0``
|
||||
- Always in epoch seconds, with optional fraction of seconds. Never
|
||||
includes a time unit.
|
||||
|
||||
* - :zeek:see:`pattern`
|
||||
- ``/(foo|bar)/``
|
||||
- The regex pattern, within forward-slash characters.
|
||||
|
||||
* - :zeek:see:`port`
|
||||
- ``42/tcp``
|
||||
- Port number with protocol, as in Zeek. When the protocol part is missing,
|
||||
Zeek interprets it as ``/unknown``.
|
||||
|
||||
* - :zeek:see:`set`
|
||||
- ``80/tcp,53/udp``
|
||||
- The set members, formatted as per their own type, separated by commas.
|
||||
For an empty set, use an empty string: just follow the option name with
|
||||
whitespace.
|
||||
|
||||
Sets with multiple index types (e.g. ``set[addr,string]``) are currently
|
||||
not supported in config files.
|
||||
|
||||
* - :zeek:see:`string`
|
||||
- ``Don’t bite, Zeek``
|
||||
- Plain string, no quotation marks. Given quotation marks become part of
|
||||
the string. Everything after the whitespace separator delineating the
|
||||
option name becomes the string. Saces and special characters are fine.
|
||||
Backslash characters (e.g. ``\n``) have no special meaning.
|
||||
|
||||
* - :zeek:see:`subnet`
|
||||
- ``1.2.3.4/16``
|
||||
- Plain subnet, as in Zeek.
|
||||
|
||||
* - :zeek:see:`time`
|
||||
- ``1608164505.5``
|
||||
- Always in epoch seconds, with optional fraction of seconds. Never
|
||||
includes a time unit.
|
||||
|
||||
* - :zeek:see:`vector`
|
||||
- ``1,2,3,4``
|
||||
- The set members, formatted as per their own type, separated by commas.
|
||||
For an empty vector, use an empty string: just follow the option name
|
||||
with whitespace.
|
||||
|
||||
This leaves a few data types unsupported, notably tables and records. If you
|
||||
require these, build up an instance of the corresponding type manually (perhaps
|
||||
from a separate input framework file) and then call
|
||||
:zeek:see:`Config::set_value` to update the option:
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
module Test;
|
||||
|
||||
export {
|
||||
option host_port: table[addr] of port = {};
|
||||
}
|
||||
|
||||
event zeek_init() {
|
||||
local t: table[addr] of port = { [10.0.0.2] = 123/tcp };
|
||||
Config::set_value("Test::host_port", t);
|
||||
}
|
||||
|
||||
|
||||
Regardless of whether an option change is triggered by a config file or via
|
||||
explicit :zeek:see:`Config::set_value` calls, Zeek always logs the change to
|
||||
:file:`config.log`. A sample entry::
|
||||
|
||||
#fields ts id old_value new_value location
|
||||
#types time string string string string
|
||||
1608167352.498872 Test::a_count 42 3 config.txt
|
||||
|
||||
Mentioning options repeatedly in the config files leads to multiple update
|
||||
events; the last entry “wins”. Mentioning options that do not correspond to
|
||||
existing options in the script layer is safe, but triggers warnings in
|
||||
:file:`reporter.log`::
|
||||
|
||||
warning: config.txt/Input::READER_CONFIG: Option 'an_unknown' does not exist. Ignoring line.
|
||||
|
||||
Internally, the framework uses the Zeek input framework to learn about config
|
||||
changes. If you inspect the configuration framework scripts, you will notice
|
||||
that the scripts simply catch input framework events and call
|
||||
:zeek:see:`Config::set_value` to set the relevant option to the new value. If
|
||||
you want to change an option in your scripts at runtime, you can likewise call
|
||||
:zeek:see:`Config::set_value` directly from a script (in a cluster
|
||||
configuration, this only needs to happen on the manager, as the change will be
|
||||
automatically sent to all other nodes in the cluster).
|
||||
|
||||
.. note::
|
||||
|
||||
The input framework is usually very strict about the syntax of input files, but
|
||||
that is not the case for configuration files. These require no header lines,
|
||||
and both tabs and spaces are accepted as separators. A custom input reader,
|
||||
specifically for reading config files, facilitates this.
|
||||
|
||||
.. tip::
|
||||
|
||||
The gory details of option-parsing reside in ``Ascii::ParseValue()`` in
|
||||
:file:`src/threading/formatters/Ascii.cc` and ``Value::ValueToVal`` in
|
||||
:file:`src/threading/SerialTypes.cc` in the Zeek core.
|
||||
|
||||
Change Handlers
|
||||
===============
|
||||
|
||||
A change handler is a user-defined function that Zeek calls each time an option
|
||||
value changes. This allows you to react programmatically to option changes. The
|
||||
following example shows how to register a change handler for an option that has
|
||||
a data type of :zeek:see:`addr` (for other data types, the return type and
|
||||
second parameter data type must be adjusted accordingly):
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
module Test;
|
||||
|
||||
export {
|
||||
option testaddr = 127.0.0.1;
|
||||
}
|
||||
|
||||
# Note: the data type of 2nd parameter and return type must match
|
||||
function change_addr(id: string, new_value: addr): addr
|
||||
{
|
||||
print fmt("Value of %s changed from %s to %s", id, testaddr, new_value);
|
||||
return new_value;
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Option::set_change_handler("Test::testaddr", change_addr);
|
||||
}
|
||||
|
||||
Immediately before Zeek changes the specified option value, it invokes any
|
||||
registered change handlers. The value returned by the change handler is the
|
||||
value Zeek assigns to the option. This allows, for example, checking of values
|
||||
to reject invalid input (the original value can be returned to override the
|
||||
change).
|
||||
|
||||
.. note::
|
||||
|
||||
:zeek:see:`Option::set_change_handler` expects the name of the option to
|
||||
invoke the change handler for, not the option itself. Also, that name
|
||||
includes the module name, even when registering from within the module.
|
||||
|
||||
It is possible to define multiple change handlers for a single option. In this
|
||||
case, the change handlers are chained together: the value returned by the first
|
||||
change handler is the “new value” seen by the next change handler, and so on.
|
||||
The built-in function :zeek:see:`Option::set_change_handler` takes an optional
|
||||
third argument that can specify a priority for the handlers.
|
||||
|
||||
A change handler function can optionally have a third argument of type string.
|
||||
When a config file triggers a change, then the third argument is the pathname
|
||||
of the config file. When the :zeek:see:`Config::set_value` function triggers a
|
||||
change, then the third argument of the change handler is the value passed to
|
||||
the optional third argument of the :zeek:see:`Config::set_value` function.
|
||||
|
||||
.. tip::
|
||||
|
||||
Change handlers are also used internally by the configuration framework. If
|
||||
you look at the script-level source code of the config framework, you can see
|
||||
that change handlers log the option changes to :file:`config.log`.
|
||||
|
||||
When Change Handlers Trigger
|
||||
----------------------------
|
||||
|
||||
Change handlers often implement logic that manages additional internal state.
|
||||
For example, depending on a performance toggle option, you might initialize or
|
||||
clean up a caching structure. In such scenarios you need to know exactly when
|
||||
and whether a handler gets invoked. The following hold:
|
||||
|
||||
* When no config files get registered in :zeek:see:`Config::config_files`,
|
||||
change handlers do not run.
|
||||
* When none of any registered config files exist on disk, change handlers do
|
||||
not run.
|
||||
|
||||
That is, change handlers are tied to config files, and don’t automatically run
|
||||
with the option’s default values.
|
||||
|
||||
* When a config file exists on disk at Zeek startup, change handlers run with
|
||||
the file’s config values.
|
||||
* When the config file contains the same value the option already defaults to,
|
||||
its change handlers are invoked anyway.
|
||||
* :zeek:see:`zeek_init` handlers run before any change handlers — i.e., they
|
||||
run with the options’ default values.
|
||||
* Since the config framework relies on the input framework, the input
|
||||
framework’s inherent asynchrony applies: you can’t assume when exactly an
|
||||
option change manifests in the code.
|
||||
|
||||
If your change handler needs to run consistently at startup and when options
|
||||
change, you can call the handler manually from :zeek:see:`zeek_init` when you
|
||||
register it. That way, initialization code always runs for the option’s default
|
||||
value, and also for any new values.
|
||||
|
||||
.. code-block:: zeek
|
||||
|
||||
module Test;
|
||||
|
||||
export {
|
||||
option use_cache = T;
|
||||
}
|
||||
|
||||
function use_cache_hdlr(id: string, new_value: bool): bool
|
||||
{
|
||||
if ( new_value ) {
|
||||
# Ensure caching structures are set up properly
|
||||
}
|
||||
|
||||
return new_value;
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
use_cache_hdlr("Test::use_cache", use_cache);
|
||||
Option::set_change_handler("Test::use_cache", use_cache_hdlr);
|
||||
}
|
3
doc/frameworks/denylist.jsonl
Normal file
3
doc/frameworks/denylist.jsonl
Normal file
|
@ -0,0 +1,3 @@
|
|||
{"ip": "192.168.17.1", "timestamp": 1333252748, "reason": "Malware host"}
|
||||
{"ip": "192.168.27.2", "timestamp": 1330235733, "reason": "Botnet server"}
|
||||
{"ip": "192.168.250.3", "timestamp": 1333145108, "reason": "Virus detected"}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue