Merge remote-tracking branch 'origin/master' into topic/johanna/tls12-decryption

This commit is contained in:
Johanna Amann 2022-01-05 10:27:55 +00:00
commit d1e7134156
640 changed files with 14727 additions and 14980 deletions

View file

@ -10,10 +10,11 @@ btest_jobs: &BTEST_JOBS 4
btest_retries: &BTEST_RETRIES 2
memory: &MEMORY 4GB
config: &CONFIG --build-type=release --enable-cpp-tests --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install
static_config: &STATIC_CONFIG --build-type=release --enable-cpp-tests --disable-broker-tests --enable-static-broker --enable-static-binpac --prefix=$CIRRUS_WORKING_DIR/install
sanitizer_config: &SANITIZER_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers --enable-coverage
mobile_ipv6_config: &MOBILE_IPV6_CONFIG --build-type=release --enable-cpp-tests --enable-mobile-ipv6 --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install
config: &CONFIG --build-type=release --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install
static_config: &STATIC_CONFIG --build-type=release --disable-broker-tests --enable-static-broker --enable-static-binpac --prefix=$CIRRUS_WORKING_DIR/install
sanitizer_config: &SANITIZER_CONFIG --build-type=debug --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers --enable-coverage
mobile_ipv6_config: &MOBILE_IPV6_CONFIG --build-type=release --enable-mobile-ipv6 --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install
openssl30_config: &OPENSSL30_CONFIG --build-type=release --disable-broker-tests --with-openssl=/opt/openssl --prefix=$CIRRUS_WORKING_DIR/install
resources_template: &RESOURCES_TEMPLATE
cpu: *CPUS
@ -93,6 +94,13 @@ env:
# Linux EOL timelines: https://linuxlifecycle.com/
# Fedora (~13 months): https://fedoraproject.org/wiki/Fedora_Release_Life_Cycle
fedora35_task:
container:
# Fedora 35 EOL: Around Dec 2022
dockerfile: ci/fedora-35/Dockerfile
<< : *RESOURCES_TEMPLATE
<< : *CI_TEMPLATE
fedora34_task:
container:
# Fedora 34 EOL: Around May 2022
@ -212,16 +220,16 @@ alpine_task:
# Apple doesn't publish official long-term support timelines.
# We aim to support both the current and previous macOS release.
macos_big_sur_task:
macos_monterey_task:
macos_instance:
image: big-sur-xcode-12.5
image: monterey-xcode-13.1
prepare_script: ./ci/macos/prepare.sh
<< : *CI_TEMPLATE
<< : *MACOS_RESOURCES_TEMPLATE
macos_catalina_task:
macos_big_sur_task:
macos_instance:
image: catalina-xcode
image: big-sur-xcode-12.5
prepare_script: ./ci/macos/prepare.sh
<< : *CI_TEMPLATE
<< : *MACOS_RESOURCES_TEMPLATE
@ -261,6 +269,17 @@ freebsd12_task:
prepare_script: ./ci/freebsd/prepare.sh
<< : *CI_TEMPLATE
# This can be removed as soon as the first distribution that we use ships
# OpenSSL 3.0
openssl30_task:
container:
# Tweaked Ubuntu 20.04 EOL: April 2025
dockerfile: ci/openssl-3.0/Dockerfile
<< : *RESOURCES_TEMPLATE
<< : *CI_TEMPLATE
env:
ZEEK_CI_CONFIGURE_FLAGS: *OPENSSL30_CONFIG
sanitizer_task:
container:
# Just uses a recent/common distro to run memory error/leak checks.

View file

@ -1,10 +1,5 @@
# Clang-format configuration for Zeek. This configuration requires
# at least clang-format 12.0.1 to format correctly.
#
# The easiest way to run this from the command-line is using the
# python script in auxil/run-clang-format:
#
# python3 auxil/run-clang-format/run-clang-format.py --clang-format-executable /path/to/clang-format -r src -i
Language: Cpp
Standard: c++17

View file

@ -1,17 +0,0 @@
# Ignore everything 3rdparty
src/3rdparty/*
# These are files that are technically sourced from other places but aren't in 3rdparty
# and shouldn't be reformatted.
src/ConvertUTF.*
src/bro_inet_ntop.*
src/bsd-getopt-long.*
src/in_cksum.*
src/nb_dns.*
src/modp_numtoa.*
src/patricia.*
src/strsep.c
src/setsignal.c
# These files are generated code
src/DebugCmdInfoConstants.*

26
.git-blame-ignore-revs Normal file
View file

@ -0,0 +1,26 @@
# Reformat the world (initial clang-format formatting)
b2f171ec69eae3a833a9db1b16e5234bd3eaf0b6
# clang-format: Force zeek-config.h to be earlier in the config ordering
9cb54f5d449b63006cc9a1f451a47732c92fef2d
# clang-format: A few minor comment-spacing fixes
07e276ab2e351ce71b709139f1933b9ead40d094
# clang-format: Enforce ordering of includes in ZBody
cb99ae2b7c9988656b097ad2789dffd2c0c37939
# clang-format: Other include ordering changes
e97c14add5b04aedc7f3f9dba59f665cbad793af
# clang-format: Other minor formatting changes
02206f3215f977ba7752476ba89ca06abe93375c
# clang-format: Set IndentCaseBlocks to false
4423574d265749da8e707ab0fbcffcbfaed26614
# clang-format: Set penalty for breaking after assignment operator
9af6b2f48d11b4e287d0f18034a486f76f9f2d61
# Remove trailing whitespace from script files
a6378531dbc5c357926d98fe785bb719cc70e1b4

View file

@ -13,12 +13,17 @@ defaults:
run:
shell: bash
env:
IMAGE_NAME: zeek-image.tar
IMAGE_FILE: /tmp/zeek-image.tar
IMAGE_PATH: /tmp
jobs:
build:
docker-build:
runs-on: ubuntu-latest
env:
TEST_TAG: zeek:latest
CONFFLAGS: --generator=Ninja --build-type=Release
CONFFLAGS: --generator=Ninja --build-type=Release --enable-zeek-client
steps:
- uses: actions/checkout@v2
with:
@ -27,7 +32,8 @@ jobs:
# Create and boot a loader. This will e.g., provide caching
# so we avoid rebuilds of the same image after this step.
- uses: docker/setup-buildx-action@v1
- name: Build
- name: Build image
uses: docker/build-push-action@v2
with:
context: ./
@ -40,9 +46,13 @@ jobs:
- name: Run btests
run: make -C docker/btest
- name: Save image tarball
run: docker save -o ${{ env.IMAGE_FILE }} ${{ env.TEST_TAG }}
- name: Get version
id: version
run: echo "::set-output name=RELEASE_VERSION::$(cat VERSION)"
- name: Compute target tag
id: target
env:
@ -59,21 +69,22 @@ jobs:
echo "::set-output name=tag::zeek:latest"
elif [ "${GITHUB_REF}" = "refs/heads/master" ]; then
echo "::set-output name=tag::zeek-dev:latest"
elif [[ "${GITHUB_REF}" = refs/heads/v* ]] && [[ "${GITHUB_REF}" != refs/heads/v*-dev ]]; then
elif [[ "${GITHUB_REF}" = refs/tags/v* ]] && [[ "${GITHUB_REF}" != refs/tags/v*-dev ]]; then
echo "::set-output name=tag::zeek:${RELEASE_VERSION}"
fi
- name: Login to DockerHub
uses: docker/login-action@v1
# Secrets for the login are not available for pull requests.
if: github.event_name == 'push'
# Don't publish on forks. Also note that secrets for the login are not
# available for pull requests, so trigger on pushes only.
if: github.repository == 'zeek/zeek' && github.event_name == 'push'
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Push
- name: Push image
# Only publish if we did compute a tag.
if: github.event_name == 'push' && steps.target.outputs.tag != ''
if: github.repository == 'zeek/zeek' && github.event_name == 'push' && steps.target.outputs.tag != ''
uses: docker/build-push-action@v2
with:
context: ./
@ -84,10 +95,65 @@ jobs:
tags: |
zeekurity/${{ steps.target.outputs.tag }}
- name: Preserve artifacts
- name: Preserve image artifact
uses: actions/upload-artifact@v2
with:
name: ${{ env.IMAGE_NAME }}
path: ${{ env.IMAGE_FILE }}
retention-days: 1
- name: Preserve btest artifacts
uses: actions/upload-artifact@v2
if: failure()
with:
name: docker-btest
path: docker/btest/.tmp
if-no-files-found: ignore
cluster-testing:
# We need the Zeek Docker image build job to complete first, since we need
# the resulting image for our docker-compose setup.
needs: docker-build
runs-on: ubuntu-latest
steps:
# Grab the sources so we have access to btest. Could also use pip, but it
# seems appealing to be using the in-tree version of btest. btest is in a
# submodule; we check it out selectively to save time.
- uses: actions/checkout@v2
- name: Check out btest
run: git submodule update --init ./auxil/btest
- name: Download Docker image artifact
uses: actions/download-artifact@v2
with:
name: ${{ env.IMAGE_NAME }}
path: ${{ env.IMAGE_PATH }}
- name: Load Docker image
run: |
docker load --input ${{ env.IMAGE_FILE }}
docker tag zeek:latest zeektest:latest
# The testsuite ref to use for this version of Zeek is stored in a file in
# the Zeek source tree.
- name: Get testsuite version
run: |
echo "TESTSUITE_COMMIT=$(cat ./testing/external/commit-hash.zeek-testing-cluster)" >> $GITHUB_ENV
- name: Retrieve cluster testsuite
uses: actions/checkout@v2
with:
repository: zeek/zeek-testing-cluster
path: testing/external/zeek-testing-cluster
ref: ${{ ENV.TESTSUITE_COMMIT }}
- name: Run testsuite
run: make -C testing/external/zeek-testing-cluster
- name: Preserve btest artifacts
uses: actions/upload-artifact@v2
if: failure()
with:
name: cluster-btest
path: testing/external/zeek-testing-cluster/.tmp
if-no-files-found: ignore

14
.github/workflows/pre-commit.yml vendored Normal file
View file

@ -0,0 +1,14 @@
name: pre-commit
on:
pull_request:
push:
branches: [master]
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: pre-commit/action@v2.0.3

3
.gitmodules vendored
View file

@ -49,6 +49,3 @@
[submodule "auxil/zeek-client"]
path = auxil/zeek-client
url = https://github.com/zeek/zeek-client
[submodule "auxil/run-clang-format"]
path = auxil/run-clang-format
url = https://github.com/Sarcasm/run-clang-format

19
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,19 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
#
repos:
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: 'v13.0.0'
hooks:
- id: clang-format
- repo: https://github.com/maxwinterstein/shfmt-py
rev: 3.3.1.8
hooks:
- id: shfmt
args: ["-w", "-i", "4", "-ci"]
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.31.0
hooks:
- id: yapf

2
.style.yapf Normal file
View file

@ -0,0 +1,2 @@
[style]
column_limit=100

477
CHANGES
View file

@ -1,3 +1,480 @@
4.2.0-dev.514 | 2022-01-03 13:56:12 -0700
* deprecation warning on use of out-of-scope local (Vern Paxson, Corelight)
4.2.0-dev.510 | 2022-01-03 13:54:52 -0700
* Switch BitTorrent analyzer to Zeek's regex engine (Avinal Kumar)
- Removes dependency on <regex.h>
- Replaces regex function with Zeek's standard regex functions
- Some replacements are workaround, may be improved later via an
appropiate API
- Update test baseline to fix what seems to be capturing on a bug in the
existing code.
Edit pass by Robin Sommer. Note that our test doesn't cover all the code
paths, but it does go through the one with the most substantial change.
* Adding test for BitTorrent tracker. (Robin Sommer, Corelight)
Our test trace is extracted from https://www.cloudshark.org/captures/b9089aac6eee.
There actually seems to be a bug in the existing code: the URI passed to
bt_tracker_request() includes a partial HTTP version. This commits
includes the baseline as the current code produces it, we'll fix that in
a subsequent comment.
4.2.0-dev.506 | 2022-01-03 09:33:43 -0800
* Expansion of the emerging cluster controller framework (Christian Kreibich, Corelight)
- Controller/agent connectivity is now controlled by pushed configurations
- The Request module now supports state timeouts
- Use Result records consistently for responses to the client
- Track successful config deployment in cluster controller
- Add ClusterController::API::notify_agents_ready event
- Make all globals start with a "g_" prefix
- Add missing debug() log function to log module's API
- Add separate utility module for controller and agent
- Additional infrastructure for printing types
- Bump zeek-client to v0.2.0
- Add Github action job for cluster tests
- Tweak Docker image configure invocation to include zeek-client
- Zeekygen documentation pass
4.2.0-dev.477 | 2021-12-14 16:53:57 -0700
* fixes for double-delete and reducing '?' operator with constant alternatives (Vern Paxson, Corelight)
* correct usage info for -u flag; -uu no longer supported (Vern Paxson, Corelight)
4.2.0-dev.468 | 2021-12-14 11:34:47 -0700
* factoring of generating C++ initializations, no semantic changes (Vern Paxson, Corelight)
* restored support for incremental compilation of scripts to C++ (Vern Paxson, Corelight)
* fixes for -O gen-standalone-C++ (Vern Paxson, Corelight)
* new ZEEK_FILE_ONLY and ZEEK_FUNC_ONLY environment variables for debugging script optimization - replaces ZEEK_ONLY (Vern Paxson, Corelight)
* fix for compiling record constructors to C++ (Vern Paxson, Corelight)
* fixes for compiling vector operations to C++ (Vern Paxson, Corelight)
* fixed for profiling missing some profile elements (Vern Paxson, Corelight)
* minor efficiency tweak for ZAM record construction (Vern Paxson, Corelight)
4.2.0-dev.456 | 2021-12-14 09:23:47 -0700
* GH-1860: Add double_to_int() bif (Tim Wojtulewicz, Corelight)
4.2.0-dev.454 | 2021-12-13 09:41:32 -0700
* Check for sets before attempting to check for same Yield types (Tim Wojtulewicz)
* Add early bail-outs to same_type() (Tim Wojtulewicz)
* Fix types for Analyzer::register_for_port(s) to be the same (Tim Wojtulewicz)
* Update cmake submodule across all other submodules (Tim Wojtulewicz, Corelight)
4.2.0-dev.448 | 2021-12-10 15:35:34 -0700
* update btest to no longer use (unsupported) %S formatting, no longer needed (Vern Paxson, Corelight)
* replace --optimize-only with --optimize-funcs and --optimize-files (Vern Paxson, Corelight)
4.2.0-dev.444 | 2021-12-10 13:13:13 -0700
* reintroduction of "-O add-C++" option (Vern Paxson, Corelight)
4.2.0-dev.442 | 2021-12-10 13:12:43 -0700
* fixes for vector operations (Vern Paxson, Corelight)
* flag globals initialized to opaque values as non-compilable (Vern Paxson, Corelight)
* skip type signatures for lambdas (Vern Paxson, Corelight)
* fix for translating filenames beginning with numbers to C++ variable names (Vern Paxson, Corelight)
4.2.0-dev.436 | 2021-12-10 13:11:36 -0700
* update script-to-C++ compilation for new record constructor internals (Vern Paxson, Corelight)
4.2.0-dev.434 | 2021-12-10 13:11:10 -0700
* updates to ZAM to track recent changes in script semantics (Vern Paxson, Corelight)
4.2.0-dev.432 | 2021-12-10 09:28:23 -0700
* GH-1741: Print error if calling a non-hook with hook keyword (Tim Wojtulewicz, Corelight)
* GH-1740: Report a better error message if table key is not a list (Tim Wojtulewicz, Corelight)
4.2.0-dev.428 | 2021-12-09 14:58:53 -0700
* GH-1125: Support GRE ARUBA headers (Tim Wojtulewicz, Corelight)
* Fix ethertype for ARP in Geneve forwarding rules (Tim Wojtulewicz, Corelight)
4.2.0-dev.425 | 2021-12-09 13:45:17 -0800
* Add LogAscii::json_include_unset_fields flag to control unset field rendering (Christian Kreibich, Corelight)
4.2.0-dev.423 | 2021-12-09 19:56:43 +0000
* Improve error message for clash between variable and function name (Johanna Amann, Corelight)
Fixes GH-1832
* Restore --disable-zeekctl configure argument (Tim Wojtulewicz, Corelight)
* Update plugin.hooks baseline for recent Geneve change (Tim Wojtulewicz, Corelight)
4.2.0-dev.419 | 2021-12-07 09:34:45 -0700
* GH-1764: Update mappings for Geneve analyzer to IP4/IP6/ARP (Tim Wojtulewicz, Corelight)
4.2.0-dev.417 | 2021-12-06 17:00:16 -0800
* Flip C++ unit tests to being enabled by default (Christian Kreibich, Corelight)
To disable them, configure with --disable-cpp-tests.
* Support for unit tests in plugins (Christian Kreibich, Corelight)
4.2.0-dev.410 | 2021-12-06 11:29:32 -0700
* Remove separate Tag types, note breaking change in NEWS (Tim Wojtulewicz, Corelight)
4.2.0-dev.408 | 2021-12-06 09:15:24 -0700
* GH-1768: Properly cleanup existing log stream when recreated on with the same ID (Tim Wojtulewicz, Corelight)
4.2.0-dev.406 | 2021-12-01 10:32:34 -0700
* Format Python scripts with yapf. (Benjamin Bannier, Corelight)
We also add a very basic yapf configuration file. Most of the changes in
this patch were performed automatically, but we broke one overly long
string into multiple components on `src/make_dbg_constants.py`.
* Format shell scripts with shfmt. (Benjamin Bannier, Corelight)
All changes in this patch were performed automatically with `shfmt` with
configuration flags specified in `.pre-commit-config.yaml`.
4.2.0-dev.403 | 2021-12-01 10:25:32 -0700
* fix btest comment to more accurately describe the test (Vern Paxson, Corelight)
* btests for erroneous script conditionals (Vern Paxson, Corelight)
* avoid compiling-to-C++ for functions potentially influenced by conditionals (Vern Paxson, Corelight)
* track the use of conditionals in functions and files (Vern Paxson, Corelight)
* AST profiles track the associated function/body/expression (Vern Paxson, Corelight)
4.2.0-dev.396 | 2021-12-01 09:44:03 -0700
* GH-1873: Deprecate the tag types differently to avoid type clashes (Tim Wojtulewicz, Corelight)
4.2.0-dev.394 | 2021-11-30 11:53:35 -0700
* Fix for the recent patch that allows segment offloaded packets. (Johanna Amann, Corelight)
We recently added support for segment offloaded packets. It turns out
that this can lead to problems in UDP/ICMP based parsers since I missed
correctly also updating the payloadlength there, and using the capture
length instead when segment offloading is enabled.
Credit to OSS-Fuzz for discovery
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=41391
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=41394
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=41395
(Link to details becomes public 30 days after patch release)
4.2.0-dev.393 | 2021-11-29 13:46:59 -0700
* Fix a number of Coverity findings (Tim Wojtulewicz, Corelight)
1466460: Uninitialized field in gtp-analyzer.pac
1462465: Null pointer dereference in CompositeHash::SingleValHash
1462463: Copy/paste error in TCPSessionAdapter::build_syn_packet_val
1462067: Uninitialized fields in Zinst
4.2.0-dev.391 | 2021-11-29 13:44:11 -0700
* suppress unneeded initializations (Vern Paxson, Corelight)
4.2.0-dev.387 | 2021-11-24 13:32:33 -0700
* fixes for constructing and assigning records with fields that are empty vectors (Vern Paxson, Corelight)
4.2.0-dev.385 | 2021-11-23 19:43:48 -0700
* Changes to speed up compilation of Compiled-to-C++ Zeek Scripts (Vern Paxson, Corelight)
* removing unused SubNetType class (Vern Paxson, Corelight)
4.2.0-dev.371 | 2021-11-23 19:41:10 -0700
* Add new tunnel packet analyzers, remove old ones (Tim Wojtulewicz, Corelight)
* Add PacketAnalyzer::register_for_port(s) functions (Tim Wojtulewicz, Corelight)
These allow packet analyzers to register ports as identifiers to forward from
parent analyzers, while also adding those ports to the now-global
Analyzer::ports table at the same time.
* Add analyzer_confirmation and analyzer_violation events (Tim Wojtulewicz, Corelight)
* Add utility function for tunnel analyzers to setup encapsulation (Tim Wojtulewicz, Corelight)
* Store some additional information in the packet during processing (Tim Wojtulewicz, Corelight)
- Session related to the packet
- is_orig information if a UDP header was found
* Minor fix in UDP to avoid duplicating tunnels (Tim Wojtulewicz, Corelight)
* Fix error text in IPTunnel analyzer (Tim Wojtulewicz, Corelight)
* Change Packet::ip_hdr to be a shared_ptr so it can be copied into EncapsulatingConn (Tim Wojtulewicz, Corelight)
* Add method for packet analyzers to register for protocol detection (Tim Wojtulewicz, Corelight)
* Add concept of "parent" tag namespaces (Tim Wojtulewicz, Corelight)
This allows us to create an EnumType that groups all of the analyzer
tag values into a single type, while still having the existing types
that split them up. We can then use this for certain events that benefit
from taking all of the tag types at once.
* Unify plugin::Component and plugin::TaggedComponent into a single class (Tim Wojtulewicz, Corelight)
These two are almost always used in conjunction with each other, and
TaggedComponent is never used by itself. Combining them together into
a single class will help simplify some of the code around managing
the mapping between Tags and Components.
* Remove uses of deprecated Tag types (Tim Wojtulewicz, Corelight)
* Unify all of the Tag types into one type (Tim Wojtulewicz, Corelight)
- Remove tag types for each component type (analyzer, etc)
- Add deprecated versions of the old types
- Remove unnecessary tag element from templates for TaggedComponent and ComponentManager
- Enable TaggedComponent to pass an EnumType when initializing Tag objects
- Update some tests that are affected by the tag enum values changing order
4.2.0-dev.350 | 2021-11-23 15:35:06 +0000
* Add testcase for TCP segment offloading (GH-1829). (Johanna Amann, Corelight)
4.2.0-dev.348 | 2021-11-23 13:45:39 +0000
* OpenSSL 3 compatibility (Johanna Amann, Corelight)
Zeek is now compatible with OpenSSL 3.0, our test baselines pass cleanly, and
we have a CI run for OpenSSL 3.0. This has a certain amount of new code for
X.509 certificate parsing. Apart from that, the main chainge is that we
use an older, legacy, API for OpaqueVal hashing, since the newer API
does not allow us to serialize data anymore. For details see ticket 1379.
4.2.0-dev.340 | 2021-11-23 10:10:13 +0000
* Accept packets that use tcp segment offloading. (Johanna Amann, Corelight)
When checksum offloading is enabled, we now forward packets that
have 0 header lengths set - and assume that they have TSO enabled.
If checksum offloading is not enabled, we drop the packets (GH-1829)
* Updates to NEWS to cover recent additions. [nomail] [skip ci] (Christian Kreibich, Corelight)
* Update doc and auxil/zeek-aux submodules [nomail] [skip ci] (Christian Kreibich, Corelight)
* Update cmake and aux/zeek-aux submodules [nomail] [skip ci] (Christian Kreibich, Corelight)
4.2.0-dev.333 | 2021-11-17 11:57:04 -0800
* Clean up fully after successful Docker btests (Christian Kreibich, Corelight)
4.2.0-dev.331 | 2021-11-15 10:10:52 -0800
* Fix ref-naming typo in the Github Docker workflow (Christian Kreibich, Corelight)
4.2.0-dev.328 | 2021-11-12 13:46:32 -0700
* Update libkqueue submodule (Tim Wojtulewicz, Corelight)
4.2.0-dev.326 | 2021-11-12 09:30:54 -0700
* Added plugin.unprocessed_packet_hook btest (Tim Wojtulewicz, Corelight)
* Fix whitespace in help output (Tim Wojtulewicz, Corelight)
* Add command-line option to write unprocessed packets to a file (Tim Wojtulewicz, Corelight)
This commit also changes the PcapDumper to automatically flush after
every called to Dump(). This is because pcap_dump has an internal buffer
of some sort that only writes to the file after a set amount of bytes.
When using the new option on a low-traffic network, it might be a while
before you see any packets written since it has to overcome that buffer
limit first.
* GH-1620: Add event and plugin hook to track packets not processed (Tim Wojtulewicz, Corelight)
4.2.0-dev.319 | 2021-11-10 10:20:01 -0700
* Install include headers from `src/3rdparty/`. (Benjamin Bannier, Corelight)
This is a fixup commit for 72cbc7cd13b7c1bda98658104431c3b530ff68d6
where we move some header files from `src/` to `src/3rdparty/` but
missed adding install rules for these header. Since some of these
headers are exposed in installed headers they need to be installed as
well.
4.2.0-dev.317 | 2021-11-10 11:33:29 +0000
* Add case-insensitive search for find_str and rfind_str (Abdel)
4.2.0-dev.314 | 2021-11-10 11:16:28 +0100
* GH-1757: Add new hook `HookLoadFileExtended` that allows plugins
to supply Zeek script and signature code to parse. (Robin Sommer)
The new hook works similar to the existing `HookLoadFile` but,
additionally, allows the plugin to return a string that contains
the code to be used for the file being loaded. If the plugin does
so, the content of any actual file on disk will be ignored. This
works for both Zeek scripts and signatures.
* Fix an issue where signature files supplied on the command line
wouldn't pass through the file loading hooks. (Robin Sommer,
Corelight)
4.2.0-dev.310 | 2021-11-09 10:29:59 -0700
* Add Github action exercising pre-commit (Benjamin Bannier, Corelight)
This patch adds a Github action which exercises pre-commit linters for
commits to the `master` branch or for pull requests. We adds this task
as a Github action since we expect it to finish quickly; running outside
of Cirrus makes it possible provide feedback quickly.
* Add pre-commit config. (Benjamin Bannier, Corelight)
This patch adds `clang-format` as only linter for now. This replaces the
previously used script from `auxil/run-clang-format` which we remove.
This requires the Python program `pre-commit`
(https://pypi.org/project/pre-commit/). With that one can then run
`clang-format` on the whole codebase with
$ pre-commit run -a clang-format
or on just the staged files
# Explicitly selecting linter.
$ pre-commit run clang-format
# Run all linters (currently just `clang-format`).
$ pre-commit
`pre-commit` supports managing Git commit hooks so that linters are run
on commit. Linters can be installed with
$ pre-commit install
The documentation at https://pre-commit.com/ covers these topics in
addition to more information.
* Format code with `clang-format` (Benjamin Bannier, Corelight)
This patch formats files not conforming to the C++ formatting with
`clang-format`.
* Remove stale files `src/DebugCmdInfoConstants.*` (Benjamin Bannier, Corelight)
The files generated from `src/DebugCmdInfoConstants.in` are placed in
`build/src/` by the build setup, and generated file in `src/` removed
here were unused and possibly out-of-date.
* Disable formatting for files in `testing/btest/plugins` (Benjamin Bannier, Corelight)
Files in that folder were previously not formatted. With this patch we
now disable formatting in that folder explicitly by adding a dedicated
`clang-format` config which deactivates any formatting changes.
* Move 3rdparty source files to `3rdparty/` (Benjamin Bannier, Corelight)
This patch moves in-tree 3rdparty source files to `3rdparty/`. With that
we can remove special treatment of these files for `run-clang-format`.
4.2.0-dev.303 | 2021-11-09 09:45:57 -0700
* GH-1819: Handle recursive types when describing type in binary mode (Tim Wojtulewicz, Corelight)
4.2.0-dev.301 | 2021-11-09 09:28:18 -0700
* Remove no-op false-teredo test (Tim Wojtulewicz, Corelight)
4.2.0-dev.297 | 2021-11-05 12:49:55 -0700
* Only push CI's Docker images when we're on the main repo (Christian Kreibich, Corelight)
* Add macOS Monterey and drop Catalina in CI (Christian Kreibich, Corelight)
* Add Fedora 35 to CI (Christian Kreibich, Corelight)
4.2.0-dev.292 | 2021-11-04 14:28:35 -0700
* Fix C++ set intersection code (Yacin Nadji, Corelight)
4.2.0-dev.286 | 2021-11-03 09:36:41 -0700
* GH-693: use pcap_dump_open_append where supported (Tim Wojtulewicz, Corelight)
4.2.0-dev.284 | 2021-11-03 09:35:10 -0700
* GH-1781: Add .git-blame-ignore-revs file (Tim Wojtulewicz, Corelight)
4.2.0-dev.280 | 2021-11-01 09:20:16 -0700
* Fix issue with broken libpcaps that return repeat packets (Tim Wojtulewicz, Corelight)
This is apparently a problem with the Myricom version of libpcap, where
instead of returning a null or a zero if no packets are available, it
returns the previous packet. This causes Zeek to improperly parse the
packet and crash. We thought we had fixed this previously with a check
for a null packet but that fix was not enough.
4.2.0-dev.277 | 2021-10-21 17:23:46 -0700
* Apply some missing clang-format changes (Tim Wojtulewicz, Corelight)
4.2.0-dev.274 | 2021-10-20 11:13:16 -0700
* Remove trailing whitespace from script files (Tim Wojtulewicz, Corelight)
4.2.0-dev.271 | 2021-10-19 14:54:56 +0200
* Add parsing of DNS SVCB/HTTPS records (FlyingWithJerome)
4.2.0-dev.260 | 2021-10-15 09:45:45 +0100
* logging/writers/ascii: shadow files: Add fsync() before rename(). This
prevents potential problems with leftover files after unclean shutdowns.
(Arne Welzel, Corelight)
* Fix typo in typedef changes that broke tests on 32-bit Debian 9 (Tim Wojtulewicz, Corelight)
4.2.0-dev.255 | 2021-10-12 09:22:37 -0700
* Replace most uses of typedef with using for type aliasing (Tim Wojtulewicz, Corelight)

View file

@ -490,6 +490,9 @@ include(FindKqueue)
if ( (OPENSSL_VERSION VERSION_EQUAL "1.1.0") OR (OPENSSL_VERSION VERSION_GREATER "1.1.0") )
set(ZEEK_HAVE_OPENSSL_1_1 true CACHE INTERNAL "" FORCE)
endif()
if ( (OPENSSL_VERSION VERSION_EQUAL "3.0.0") OR (OPENSSL_VERSION VERSION_GREATER "3.0.0") )
set(ZEEK_HAVE_OPENSSL_3_0 true CACHE INTERNAL "" FORCE)
endif()
# Tell the plugin code that we're building as part of the main tree.
set(ZEEK_PLUGIN_INTERNAL_BUILD true CACHE INTERNAL "" FORCE)

View file

@ -250,7 +250,7 @@ PROJECT (https://github.com/zeek) UNDER BSD LICENCE.
==============================================================================
%%% in_cksum.cc
%%% 3rdparty/in_cksum.cc
==============================================================================
@ -283,7 +283,7 @@ SUCH DAMAGE.
==============================================================================
%%% Patricia.c
%%% 3rdparty/patricia.c
==============================================================================
@ -328,7 +328,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
==============================================================================
%%% strsep.c
%%% 3rdparty/strsep.c
==============================================================================
@ -365,7 +365,7 @@ SUCH DAMAGE.
==============================================================================
%%% ConvertUTF.c
%%% 3rdparty/ConvertUTF.c
==============================================================================
@ -479,7 +479,7 @@ SUCH DAMAGE.
==============================================================================
%%% bsd-getopt-long.c
%%% 3rdparty/bsd-getopt-long.c
==============================================================================
@ -555,7 +555,7 @@ limitations under the License.
==============================================================================
%%% bro_inet_ntop.c
%%% 3rdparty/bro_inet_ntop.c
==============================================================================
@ -578,7 +578,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
==============================================================================
%%% modp_numtoa.h
%%% 3rdparty/modp_numtoa.h
==============================================================================

163
NEWS
View file

@ -6,6 +6,17 @@ release. For an exhaustive list of changes, see the ``CHANGES`` file
Zeek 4.2.0
==========
Breaking Changes
----------------
- The existing ``Tag`` types in C++ (``zeek::Analyzer::Tag``, etc) have been
merged into a single type called ``zeek::Tag``. This is a breaking change, and
may result in plugins failing to build where they were relying on those types
being different for function overloading and such. We attempted to include
deprecated versions of the old types, but were unable to do so because of
changes to return types from a number of methods. With this change, any uses
of the `zeek::*::Tag` types will need to be replaced by `zeek::Tag`.
New Functionality
-----------------
@ -22,19 +33,89 @@ New Functionality
example to build a Zeek plugin. You can add any required system packages in a
derived image, or install them directly in the running container.
- Zeek now supports formatting the C++ code using clang-format. It requires at
least clang-format 12.0.1 due to some additions that were made in that version
to better support the Whitesmiths style. Zeek also includes a set of python
scripts to more easily reformat in the auxil/run-clang-format directory. An
example command to reformat the code:
`python3 auxil/run-clang-format/run-clang-format.py --clang-format-executable `which clang-format-12` -r src -i`
- Zeek now supports formatting the C++ code using clang-format. Also provided is
a configuration for ``pre-commit`` to run clang-format when add new commits via
``git``. More details can be found at https://github.com/zeek/zeek/wiki/Coding-Style-and-Conventions#clang-format.
- Experimental support for speeding up Zeek script execution by compiling
scripts to a low-level form called "ZAM". You activate this feature by
specifying ``-O ZAM`` on the command line. See
``src/script_opt/ZAM/README.md`` for more information.
- Improvements for compiling scripts to C++ (an experimental optimization
feature introduced in 4.1). The generated C++ now compiles much faster than
previously, though it can still take quite a while when using C++ optimization
on large sets of scripts. You can incrementally compile additional scripts
using ``-O add-C++``. See ``src/script_opt/CPP/README.md`` for details.
- The new flags --optimize-files=/pat/ and --optimize-funcs=/pat/ apply
to both ZAM and compile-to-C++ script optimization. The first instructs
Zeek to optimize any functions/hooks/event handlers residing in files
matching the given pattern (unanchored). The second does the same but
based on the function name, and with the pattern anchored (so for example
--optimize-funcs=foo will optimize any functions named "foo" but not
those named "foobar", or "MYSCOPE::foo"). The flags can be combined
and can also be used multiple times to specify a set of patterns.
If neither flag is used then optimization is applied to all loaded
scripts; if used, then only to those that match.
- The ``-uu`` flag for analyzing potentially unused record fields has been
removed because, due to other changes in script optimization, keeping it
would now require about 1,800 lines of code not otherwise needed.
- The DNS analyzer has initial support for the SVCB and HTTPS types. The new
events are ``dns_SVCB`` and ``dns_HTTPS``.
- The ``find_str`` and ``rfind_str`` bifs now support case-insensitive searches.
- Added a new plugin hook for capturing packets that made it through analysis
without being processed called ``Plugin::HookUnprocessedPacket``. Currently
ARP packets or packets with a valid IP-based transport header are marked as
processed. This also adds an event called ``packet_not_processed`` that
reports the same packets.
- A new command-line option ``-c`` or ``--capture-unprocessed`` will dump any
packets not marked as being processed, similar to the new hook and event
above.
- In Zeek plugins, the new cmake function ``zeek_plugin_scripts()`` should be
used alongside ``zeek_plugin_cc()`` and related functions to establish
dependency tracking between Zeek scripts shipped with the plugin and plugin
rebuilds. Previously, updates to included Zeek scripts didn't reliably
trigger a rebuild.
- Added PacketAnalyzer::register_for_port(s) functions to the packet analyzer
framework in script-land. This allows a packet analyzer to register a port
mapping with a parent analyzer just like any other numeric identifier, while
also adding that port to the now-global Analyzer::ports table used by BPF
filtering.
- Added AllAnalyzers::Tag enum type that combines the existing Analyzer::Tag,
PacketAnalyzer::Tag, and Files::Tags into a single enum. The existing types
still exist, but the new type can be used as an argument for
functions/hooks/events that need to handle any of the analyzer types.
- Added protocol detection functionality to the packet analyzer framework.
Packet analyzers can register for protocol detection using the
``PacketAnalyzer::register_protocol_detection`` script function and implement
the ``PacketAnalyzer::DetectProtocol`` method in C++. This allows packet
analyzer plugins to detect a protocol via byte matching or other heuristics
instead of relying solely on a numeric identifier for forwarding.
- The JSON logger's new LogAscii::json_include_unset_fields flag provides
control over how to handle unset "&optional" fields. By default it continues
to skip such fields entirely. When redef'ing the flag to T it includes such
fields, with a "null" value. This simplifies data import use cases that
require fields to be present at all times, regardless of their value.
- A new external testsuite, https://github.com/zeek/zeek-testing-cluster,
focuses on testing the emerging controller framework. It leverages the new
official Zeek Docker image for building docker-compose test setups, driven via
btest. The Github CI setup now includes a workflow that deploys and runs this
testsuite.
- The GRE analyzer now supports the Aruba WLAN protocol type.
Changed Functionality
---------------------
@ -43,19 +124,75 @@ Changed Functionality
to serialize, meaning that you can now also index with sets, vectors,
patterns, and even tables.
- The traditional TSV Zeek logs are now valid UTF8 by default. It's possible
to revert to the previous behavior by setting ``LogAscii::enable_utf_8`` to
- The traditional TSV Zeek logs are now valid UTF8 by default. It's possible to
revert to the previous behavior by setting ``LogAscii::enable_utf_8`` to
false.
- The ``SYN_packet`` record now records TCP timestamps (TSval/TSecr)
when available.
- The ``SYN_packet`` record now records TCP timestamps (TSval/TSecr) when
available.
Removed Functionality
---------------------
- The ``init-plugin`` script now focuses purely on dynamic Zeek plugins. It no
longer generates Zeek packages. To instantiate new Zeek packages, use the
``zkg create`` command instead.
- The ``ignore_checksums`` options and the ``-C`` command-line option now
additionally cause Zeek to accept IPv4 packets that provide a length of zero
in the total-length IPv4 header field. When the length is set to zero, the
capture length of the packet is used instead. This can be used to replay
traces, or analyze traffic when TCP sequence offloading is enabled on the
local NIC - which typically causes the total-length of affected packets to be
set to zero.
- The existing tunnel analyzers for AYIYA, Geneve, GTPv1, Teredo, and VXLAN are
now packet analyzers.
- C++ unit tests are now compiled in by default and can be disabled by
configuring the build with --disable-cpp-tests. We removed the former
--enable-cpp-tests configure flag. Unit tests now also work in (static and
dynamic) Zeek plugins.
- This release expands the emerging cluster controller framework. Most changes
concern internals of the framework. Agent/controller connectivity management
has become more flexible: configuration updates pushed by the client can now
convey the agent topology, removing the need to hardwire/redef settings
in the controller. The new ClusterController::API::notify_agents_ready event
declares the management infrastructure ready for use. zeek-client's CLI has
expanded to support the new functionality.
The framework is still experimental and provides only a small subset of
ZeekControl's functionality. ZeekControl remains the recommended tool for
maintaining your cluster.
Deprecated Functionality
------------------------
- The ``protocol_confirmation`` and ``protocol_violation`` events along with the
corresponding ``Analyzer::ProtocolConfirmation` and
``Analyzer::ProtocolViolation`` C++ methods are marked as deprecated. They are
replaced by ``analyzer_confirmation`` and ``analyzer_violation`` which can
also now be implemented in packet analyzers.
- Declaring a local variable in an inner scope and then accessing it in an
outer scope is now deprecated. For example,
if ( foo() )
{
local a = 5;
...
}
print a;
is deprecated. You can address the issue by hoisting the declaration
to the outer scope, such as:
local a: count;
if ( foo() )
{
a = 5;
...
}
print a;
Zeek 4.1.0
==========

View file

@ -1 +1 @@
4.2.0-dev.255
5.0.0-dev.2

@ -1 +1 @@
Subproject commit 6bd264351813eedb92753d2d4ed76ac6ddc076b3
Subproject commit a7d9233b37daac558314625566bb8c8a993f2904

@ -1 +1 @@
Subproject commit 8169f1630993b34189b2c221d0e5ab8ba9777967
Subproject commit 8b1322d3060a1fecdc586693e6215ad7ef8ab0e9

@ -1 +1 @@
Subproject commit 47cac80cbe1e1bde8e3b425903e50d62715972a2
Subproject commit d9e84400534b968e33ab01cfadfb569c0d7b2929

@ -1 +1 @@
Subproject commit 0a37819d484358999a47e76ac473da74799ab08d
Subproject commit 5f954ec65cb78b17f7156455c8c3c905a816ae96

@ -1 +1 @@
Subproject commit 6c1717dea2dc34a91d32e07d2cae34b1afa0a84e
Subproject commit aeaeed21198d6f41d0cf70bda63fe0f424922ac5

@ -1 +1 @@
Subproject commit dfbe1db9da455552f7a9ad5d2aea17dd9d832ac1
Subproject commit fd3dc29a5c2852df569e1ea81dbde2c412ac5051

@ -1 +0,0 @@
Subproject commit 39081c9c42768ab5e8321127a7494ad1647c6a2f

@ -1 +1 @@
Subproject commit f3a1e8fe464c0425688eff67e30f35c678914ad2
Subproject commit 479e8a85fd58936c16d361dbf3de4e7268d751f8

@ -1 +1 @@
Subproject commit 296383d577a3f089c4f491061a985293cf6736e6
Subproject commit 12be5e3e51a4a97ab3aa0fa4a02da194a83c7f24

@ -1 +1 @@
Subproject commit afe253c77591e87b2a6cf6d5682cd02caa78e9d2
Subproject commit 553d897734b6d9abbc2e4467fae89f68a2c7315d

@ -1 +1 @@
Subproject commit d31885671d74932d951778c029fa74d44cf3e542
Subproject commit 95b048298a77bb14d2c54dcca8bb549c86eb96b9

View file

@ -10,6 +10,11 @@ set -x
# some problems with Catalina specifically, but it doesn't break anything on Big Sur either.
if [[ "${CIRRUS_OS}" == "darwin" ]]; then
export ZEEK_CI_CONFIGURE_FLAGS="${ZEEK_CI_CONFIGURE_FLAGS} --osx-sysroot=$(xcrun --show-sdk-path)"
# Starting with Monterey & Xcode 13.1 we need to help it find OpenSSL
if [ -d /usr/local/opt/openssl@1.1/lib/pkgconfig ]; then
export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/opt/openssl@1.1/lib/pkgconfig
fi
fi
if [[ "${ZEEK_CI_CREATE_ARTIFACT}" != "1" ]]; then

23
ci/fedora-35/Dockerfile Normal file
View file

@ -0,0 +1,23 @@
FROM fedora:35
RUN dnf -y install \
bison \
cmake \
diffutils \
findutils \
flex \
git \
gcc \
gcc-c++ \
libpcap-devel \
make \
openssl-devel \
python3-devel \
python3-pip\
sqlite \
swig \
which \
zlib-devel \
&& dnf clean all && rm -rf /var/cache/dnf
RUN pip3 install junit2html

View file

@ -8,6 +8,6 @@ set -x
env ASSUME_ALWAYS_YES=YES pkg bootstrap
pkg install -y bash git cmake swig bison python3 base64
pkg upgrade -y curl
pyver=`python3 -c 'import sys; print(f"py{sys.version_info[0]}{sys.version_info[1]}")'`
pyver=$(python3 -c 'import sys; print(f"py{sys.version_info[0]}{sys.version_info[1]}")')
pkg install -y $pyver-sqlite3 $pyver-pip
pip install junit2html

View file

@ -1,7 +1,6 @@
#! /usr/bin/env bash
function banner
{
function banner {
local msg="${1}"
printf "+--------------------------------------------------------------+\n"
printf "| %-60s |\n" "$(date)"

36
ci/openssl-3.0/Dockerfile Normal file
View file

@ -0,0 +1,36 @@
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND="noninteractive" TZ="America/Los_Angeles"
RUN apt-get update && apt-get -y install \
git \
cmake \
make \
gcc \
g++ \
flex \
bison \
libpcap-dev \
libssl-dev \
python3 \
python3-dev \
python3-pip\
swig \
zlib1g-dev \
libmaxminddb-dev \
libkrb5-dev \
bsdmainutils \
sqlite3 \
curl \
wget \
unzip \
ruby \
bc \
lcov \
&& rm -rf /var/lib/apt/lists/*
# Note - the symlink is important, otherwise cmake uses the wrong .so files.
RUN wget https://www.openssl.org/source/openssl-3.0.0.tar.gz && tar xvf ./openssl-3.0.0.tar.gz && cd ./openssl-3.0.0 && ./Configure --prefix=/opt/openssl && make install && cd .. && rm -rf openssl-3.0.0 && ln -sf /opt/openssl/lib64 /opt/openssl/lib
RUN pip3 install junit2html
RUN gem install coveralls-lcov

View file

@ -1,65 +0,0 @@
#! /bin/sh
#
# Copyright (c) 2020 by the Zeek Project. See LICENSE for details.
base=$(git rev-parse --show-toplevel)
fix=0
pre_commit_hook=0
# Directories to run on by default. When changing, adapt .pre-commit-config.yam
# as well.
files="src"
error() {
test "${pre_commit_hook}" = 0 && echo "$@" >&2 && exit 1
exit 0
}
if [ $# != 0 ]; then
case "$1" in
--fixit)
shift
fix=1
;;
--pre-commit-hook)
shift
fix=1
pre_commit_hook=1
;;
-*)
echo "usage: $(basename $0) [--fixit | --pre-commit-hook] [<files>]"
exit 1
esac
fi
test $# != 0 && files="$@"
if [ -z "${CLANG_FORMAT}" ]; then
CLANG_FORMAT=$(which clang-format 2>/dev/null)
fi
if [ -z "${CLANG_FORMAT}" -o ! -x "${CLANG_FORMAT}" ]; then
error "Cannot find clang-format. If not in PATH, set CLANG_FORMAT."
fi
if ! (cd / && ${CLANG_FORMAT} -dump-config | grep -q SpacesInConditionalStatement); then
error "${CLANG_FORMAT} does not support SpacesInConditionalStatement. Install custom version and put it into PATH, or point CLANG_FORMAT to it."
fi
if [ ! -e .clang-format ]; then
error "Must execute in top-level directory."
fi
cmd="${base}/auxil/run-clang-format/run-clang-format.py -r --clang-format-executable ${CLANG_FORMAT} --exclude '*/3rdparty/*' ${files}"
tmp=/tmp/$(basename $0).$$.tmp
trap "rm -f ${tmp}" EXIT
eval "${cmd}" >"${tmp}"
if [ "${fix}" = 1 ]; then
test -s "${tmp}" && cat "${tmp}" | git apply -p0
true
else
cat "${tmp}"
fi

View file

@ -36,5 +36,4 @@ for fuzzer_path in ${fuzzers}; do
echo "-----------------------------------------"
done
exit ${result}

View file

@ -19,18 +19,15 @@ fi
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
. ${SCRIPT_DIR}/common.sh
function pushd
{
function pushd {
command pushd "$@" >/dev/null || exit 1
}
function popd
{
function popd {
command popd "$@" >/dev/null || exit 1
}
function banner
{
function banner {
local msg="${1}"
printf "+--------------------------------------------------------------+\n"
printf "| %-60s |\n" "$(date)"
@ -38,8 +35,7 @@ function banner
printf "+--------------------------------------------------------------+\n"
}
function run_unit_tests
{
function run_unit_tests {
banner "Running unit tests"
pushd build
@ -48,15 +44,13 @@ function run_unit_tests
return 0
}
function prep_artifacts
{
function prep_artifacts {
banner "Prepare artifacts"
[[ -d .tmp ]] && rm -rf .tmp/script-coverage && tar -czf tmp.tar.gz .tmp
junit2html btest-results.xml btest-results.html
}
function run_btests
{
function run_btests {
banner "Running baseline tests: zeek"
pushd testing/btest
@ -75,8 +69,7 @@ function run_btests
return 0
}
function run_external_btests
{
function run_external_btests {
# Commenting out this line in btest.cfg causes the script profiling/coverage
# to be disabled. We do this for the sanitizer build right now because of a
# fairly significant performance bug when running tests.

View file

@ -1,12 +1,12 @@
#! /usr/bin/env bash
unset ZEEK_DISABLE_ZEEKYGEN;
unset ZEEK_DISABLE_ZEEKYGEN
# If running this from btest, unset any of the environment
# variables that alter default script values.
unset ZEEK_DEFAULT_LISTEN_ADDRESS;
unset ZEEK_DEFAULT_LISTEN_RETRY;
unset ZEEK_DEFAULT_CONNECT_RETRY;
unset ZEEK_DEFAULT_LISTEN_ADDRESS
unset ZEEK_DEFAULT_LISTEN_RETRY
unset ZEEK_DEFAULT_CONNECT_RETRY
dir="$(cd "$(dirname "$0")" && pwd)"
source_dir="$(cd $dir/.. && pwd)"
@ -21,15 +21,14 @@ fi
case $output_dir in
/*) ;;
*) output_dir=`pwd`/$output_dir ;;
*) output_dir=$(pwd)/$output_dir ;;
esac
cd $build_dir
. zeek-path-dev.sh
export ZEEK_SEED_FILE=$source_dir/testing/btest/random.seed
function run_zeek
{
function run_zeek {
ZEEK_ALLOW_INIT_ERRORS=1 zeek -X $conf_file zeekygen >/dev/null 2>$zeek_error_file
if [ $? -ne 0 ]; then
@ -48,8 +47,7 @@ run_zeek
script_ref_dir=$output_dir/script-reference
mkdir -p $script_ref_dir
function generate_index
{
function generate_index {
echo "Generating $script_ref_dir/$2"
printf "$1\t*\t$script_ref_dir/$2\n" >$conf_file
run_zeek

2
cmake

@ -1 +1 @@
Subproject commit 4d1990f0e4c273cf51ec52278add6ff256f9c889
Subproject commit 12fbc1a3bc206a57b079505e3df938c3a993ba58

155
configure vendored
View file

@ -54,51 +54,51 @@ Usage: $0 [OPTION]... [VAR=VALUE]...
install --home [PATH/lib/python]
Optional Features:
--enable-debug compile in debugging mode (like --build-type=Debug)
--enable-coverage compile with code coverage support (implies debugging mode)
--enable-debug compile in debugging mode (like --build-type=Debug)
--enable-fuzzers build fuzzer targets
--enable-jemalloc link against jemalloc
--enable-mobile-ipv6 analyze mobile IPv6 features defined by RFC 6275
--enable-perftools enable use of Google perftools (use tcmalloc)
--enable-perftools-debug use Google's perftools for debugging
--enable-jemalloc link against jemalloc
--enable-static-broker build Broker statically (ignored if --with-broker is specified)
--enable-static-binpac build binpac statically (ignored if --with-binpac is specified)
--enable-cpp-tests build Zeek's C++ unit tests
--enable-static-broker build Broker statically (ignored if --with-broker is specified)
--enable-zeek-client install the Zeek cluster management client (experimental)
--disable-zeekctl don't install ZeekControl
--disable-auxtools don't build or install auxiliary tools
--disable-archiver don't build or install zeek-archiver tool
--disable-auxtools don't build or install auxiliary tools
--disable-broker-tests don't try to build Broker unit tests
--disable-btest don't install BTest
--disable-btest-pcaps don't install Zeek's BTest input pcaps
--disable-cpp-tests don't build Zeek's C++ unit tests
--disable-python don't try to build python bindings for Broker
--disable-broker-tests don't try to build Broker unit tests
--disable-zeekctl don't install ZeekControl
--disable-zkg don't install zkg
Required Packages in Non-Standard Locations:
--with-openssl=PATH path to OpenSSL install root
--with-bind=PATH path to BIND install root
--with-pcap=PATH path to libpcap install root
--with-binpac=PATH path to BinPAC executable
(useful for cross-compiling)
--with-bifcl=PATH path to Zeek BIF compiler executable
(useful for cross-compiling)
--with-flex=PATH path to flex executable
--with-bind=PATH path to BIND install root
--with-binpac=PATH path to BinPAC executable
(useful for cross-compiling)
--with-bison=PATH path to bison executable
--with-python=PATH path to Python executable
--with-broker=PATH path to Broker install root
(Zeek uses an embedded version by default)
--with-caf=PATH path to C++ Actor Framework install root
(a Broker dependency that is embedded by default)
--with-flex=PATH path to flex executable
--with-libkqueue=PATH path to libkqueue install root
(Zeek uses an embedded version by default)
--with-openssl=PATH path to OpenSSL install root
--with-pcap=PATH path to libpcap install root
--with-python=PATH path to Python executable
Optional Packages in Non-Standard Locations:
--with-geoip=PATH path to the libmaxminddb install root
--with-jemalloc=PATH path to jemalloc install root
--with-krb5=PATH path to krb5 install root
--with-perftools=PATH path to Google Perftools install root
--with-jemalloc=PATH path to jemalloc install root
--with-python-lib=PATH path to libpython
--with-python-inc=PATH path to Python headers
--with-python-lib=PATH path to libpython
--with-swig=PATH path to SWIG executable
Packaging Options (for developers):
@ -128,8 +128,8 @@ This typically means that you performed a non-recursive git clone of
Zeek. To check out the required subdirectories, please execute:
( cd $sourcedir && git submodule update --recursive --init )
" >&2;
exit 1;
" >&2
exit 1
fi
# Function to append a CMake cache entry definition to the
@ -163,6 +163,7 @@ append_cache_entry ZEEK_ETC_INSTALL_DIR PATH $prefix/etc
append_cache_entry ENABLE_DEBUG BOOL false
append_cache_entry ENABLE_PERFTOOLS BOOL false
append_cache_entry ENABLE_JEMALLOC BOOL false
append_cache_entry ENABLE_ZEEK_UNIT_TESTS BOOL true
append_cache_entry BUILD_SHARED_LIBS BOOL true
append_cache_entry INSTALL_AUX_TOOLS BOOL true
append_cache_entry INSTALL_BTEST BOOL true
@ -179,7 +180,7 @@ has_enable_mobile_ipv6=0
# parse arguments
while [ $# -ne 0 ]; do
case "$1" in
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
-*=*) optarg=$(echo "$1" | sed 's/[-_a-zA-Z0-9]*=//') ;;
*) optarg= ;;
esac
@ -260,11 +261,14 @@ while [ $# -ne 0 ]; do
append_cache_entry ENABLE_COVERAGE BOOL true
append_cache_entry ENABLE_DEBUG BOOL true
;;
--enable-debug)
append_cache_entry ENABLE_DEBUG BOOL true
;;
--enable-fuzzers)
append_cache_entry ZEEK_ENABLE_FUZZERS BOOL true
;;
--enable-debug)
append_cache_entry ENABLE_DEBUG BOOL true
--enable-jemalloc)
append_cache_entry ENABLE_JEMALLOC BOOL true
;;
--enable-mobile-ipv6)
has_enable_mobile_ipv6=1
@ -276,32 +280,24 @@ while [ $# -ne 0 ]; do
append_cache_entry ENABLE_PERFTOOLS BOOL true
append_cache_entry ENABLE_PERFTOOLS_DEBUG BOOL true
;;
--sanitizers=*)
append_cache_entry ZEEK_SANITIZERS STRING $optarg
;;
--enable-jemalloc)
append_cache_entry ENABLE_JEMALLOC BOOL true
--enable-static-binpac)
append_cache_entry BUILD_STATIC_BINPAC BOOL true
;;
--enable-static-broker)
append_cache_entry BUILD_STATIC_BROKER BOOL true
;;
--enable-static-binpac)
append_cache_entry BUILD_STATIC_BINPAC BOOL true
;;
--enable-cpp-tests)
append_cache_entry ENABLE_ZEEK_UNIT_TESTS BOOL true
;;
--enable-zeek-client)
append_cache_entry INSTALL_ZEEK_CLIENT BOOL true
;;
--disable-zeekctl)
append_cache_entry INSTALL_ZEEKCTL BOOL false
--disable-archiver)
append_cache_entry INSTALL_ZEEK_ARCHIVER BOOL false
;;
--disable-auxtools)
append_cache_entry INSTALL_AUX_TOOLS BOOL false
;;
--disable-archiver)
append_cache_entry INSTALL_ZEEK_ARCHIVER BOOL false
--disable-broker-tests)
append_cache_entry BROKER_DISABLE_TESTS BOOL true
append_cache_entry BROKER_DISABLE_DOC_EXAMPLES BOOL true
;;
--disable-btest)
append_cache_entry INSTALL_BTEST BOOL false
@ -309,72 +305,77 @@ while [ $# -ne 0 ]; do
--disable-btest-pcaps)
append_cache_entry INSTALL_BTEST_PCAPS BOOL false
;;
--disable-cpp-tests)
append_cache_entry ENABLE_ZEEK_UNIT_TESTS BOOL false
;;
--disable-python)
append_cache_entry DISABLE_PYTHON_BINDINGS BOOL true
;;
--disable-broker-tests)
append_cache_entry BROKER_DISABLE_TESTS BOOL true
append_cache_entry BROKER_DISABLE_DOC_EXAMPLES BOOL true
--disable-zeekctl)
append_cache_entry INSTALL_ZEEKCTL BOOL false
;;
--disable-zkg)
append_cache_entry INSTALL_ZKG BOOL false
;;
--with-openssl=*)
append_cache_entry OPENSSL_ROOT_DIR PATH $optarg
--with-bifcl=*)
append_cache_entry BIFCL_EXE_PATH PATH $optarg
;;
--with-bind=*)
append_cache_entry BIND_ROOT_DIR PATH $optarg
;;
--with-pcap=*)
append_cache_entry PCAP_ROOT_DIR PATH $optarg
;;
--with-binpac=*)
append_cache_entry BINPAC_EXE_PATH PATH $optarg
;;
--with-bifcl=*)
append_cache_entry BIFCL_EXE_PATH PATH $optarg
;;
--with-flex=*)
append_cache_entry FLEX_EXECUTABLE PATH $optarg
;;
--with-bison=*)
append_cache_entry BISON_EXECUTABLE PATH $optarg
;;
--with-geoip=*)
append_cache_entry LibMMDB_ROOT_DIR PATH $optarg
;;
--with-krb5=*)
append_cache_entry LibKrb5_ROOT_DIR PATH $optarg
;;
--with-perftools=*)
append_cache_entry GooglePerftools_ROOT_DIR PATH $optarg
;;
--with-jemalloc=*)
append_cache_entry JEMALLOC_ROOT_DIR PATH $optarg
append_cache_entry ENABLE_JEMALLOC BOOL true
;;
--with-python=*)
append_cache_entry PYTHON_EXECUTABLE PATH $optarg
;;
--with-python-lib=*)
append_cache_entry PYTHON_LIBRARY PATH $optarg
;;
--with-python-inc=*)
append_cache_entry PYTHON_INCLUDE_DIR PATH $optarg
append_cache_entry PYTHON_INCLUDE_PATH PATH $optarg
;;
--with-swig=*)
append_cache_entry SWIG_EXECUTABLE PATH $optarg
;;
--with-broker=*)
append_cache_entry BROKER_ROOT_DIR PATH $optarg
;;
--with-caf=*)
append_cache_entry CAF_ROOT PATH $optarg
;;
--with-flex=*)
append_cache_entry FLEX_EXECUTABLE PATH $optarg
;;
--with-geoip=*)
append_cache_entry LibMMDB_ROOT_DIR PATH $optarg
;;
--with-jemalloc=*)
append_cache_entry JEMALLOC_ROOT_DIR PATH $optarg
append_cache_entry ENABLE_JEMALLOC BOOL true
;;
--with-krb5=*)
append_cache_entry LibKrb5_ROOT_DIR PATH $optarg
;;
--with-libkqueue=*)
append_cache_entry LIBKQUEUE_ROOT_DIR PATH $optarg
;;
--with-pcap=*)
append_cache_entry PCAP_ROOT_DIR PATH $optarg
;;
--with-perftools=*)
append_cache_entry GooglePerftools_ROOT_DIR PATH $optarg
;;
--with-openssl=*)
append_cache_entry OPENSSL_ROOT_DIR PATH $optarg
;;
--with-python=*)
append_cache_entry PYTHON_EXECUTABLE PATH $optarg
;;
--with-python-inc=*)
append_cache_entry PYTHON_INCLUDE_DIR PATH $optarg
append_cache_entry PYTHON_INCLUDE_PATH PATH $optarg
;;
--with-python-lib=*)
append_cache_entry PYTHON_LIBRARY PATH $optarg
;;
--with-swig=*)
append_cache_entry SWIG_EXECUTABLE PATH $optarg
;;
--sanitizers=*)
append_cache_entry ZEEK_SANITIZERS STRING $optarg
;;
--binary-package)
append_cache_entry BINARY_PACKAGING_MODE BOOL true
;;
@ -408,7 +409,7 @@ if [ -z "$CMakeCommand" ]; then
echo "This package requires CMake, please install it first."
echo "Then you may use this script to configure the CMake build."
echo "Note: pass --cmake=PATH to use cmake in non-standard locations."
exit 1;
exit 1
fi
fi

2
doc

@ -1 +1 @@
Subproject commit fefd7e6ceb67dd011c268c658171967f1281b970
Subproject commit b8ae1f336272371d6c46fda133e472a075f69e3d

View file

@ -1,11 +1,11 @@
DIAG=diag.log
BTEST=../../auxil/btest/btest
all: cleanup btest-verbose
all: btest-verbose clean
# Showing all tests.
btest-verbose:
@$(BTEST) -d -j -f $(DIAG)
cleanup:
@rm -f $(DIAG)
clean:
@rm -rf $(DIAG) .tmp .btest.failed.dat

View file

@ -66,7 +66,7 @@ print version and exit
print contents of state file
.TP
\fB\-C\fR,\ \-\-no\-checksums
ignore checksums
When this option is set, Zeek ignores invalid packet checksums and does process the packets. Furthermore, if this option is set Zeek also processes IP packets with a zero total length field, which is typically caused by TCP (TCP Segment Offloading) on the NIC.
.TP
\fB\-F\fR,\ \-\-force\-dns
force DNS

View file

@ -9,6 +9,13 @@
##! These tags are defined internally by
##! the analyzers themselves, and documented in their analyzer-specific
##! description along with the events that they generate.
##!
##! Analyzer tags are also inserted into a global :zeek:type:`AllAnalyzers::Tag` enum
##! type. This type contains duplicates of all of the :zeek:type:`Analyzer::Tag`,
##! :zeek:type:`PacketAnalyzer::Tag` and :zeek:type:`Files::Tag` enum values
##! and can be used for arguments to function/hook/event definitions where they
##! need to handle any analyzer type. See :zeek:id:`Analyzer::register_for_ports`
##! for an example.
@load base/frameworks/packet-filter/utils
@ -66,13 +73,13 @@ export {
## tag: The tag of the analyzer.
##
## Returns: The set of ports.
global registered_ports: function(tag: Analyzer::Tag) : set[port];
global registered_ports: function(tag: AllAnalyzers::Tag) : set[port];
## Returns a table of all ports-to-analyzer mappings currently registered.
##
## Returns: A table mapping each analyzer to the set of ports
## registered for it.
global all_registered_ports: function() : table[Analyzer::Tag] of set[port];
global all_registered_ports: function() : table[AllAnalyzers::Tag] of set[port];
## Translates an analyzer type to a string with the analyzer's name.
##
@ -126,12 +133,16 @@ export {
global disabled_analyzers: set[Analyzer::Tag] = {
ANALYZER_TCPSTATS,
} &redef;
## A table of ports mapped to analyzers that handle those ports. This is
## used by BPF filtering and DPD. Session analyzers can add to this using
## Analyzer::register_for_port(s) and packet analyzers can add to this
## using PacketAnalyzer::register_for_port(s).
global ports: table[AllAnalyzers::Tag] of set[port];
}
@load base/bif/analyzer.bif
global ports: table[Analyzer::Tag] of set[port];
event zeek_init() &priority=5
{
if ( disable_all )
@ -176,22 +187,22 @@ function register_for_port(tag: Analyzer::Tag, p: port) : bool
return T;
}
function registered_ports(tag: Analyzer::Tag) : set[port]
function registered_ports(tag: AllAnalyzers::Tag) : set[port]
{
return tag in ports ? ports[tag] : set();
}
function all_registered_ports(): table[Analyzer::Tag] of set[port]
function all_registered_ports(): table[AllAnalyzers::Tag] of set[port]
{
return ports;
}
function name(atype: Analyzer::Tag) : string
function name(atype: AllAnalyzers::Tag) : string
{
return __name(atype);
}
function get_tag(name: string): Analyzer::Tag
function get_tag(name: string): AllAnalyzers::Tag
{
return __tag(name);
}
@ -223,4 +234,3 @@ function get_bpf(): string
}
return output;
}

View file

@ -53,7 +53,7 @@ event zeek_init() &priority=5
Log::create_stream(DPD::LOG, [$columns=Info, $path="dpd", $policy=log_policy]);
}
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=10
event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=10
{
local analyzer = Analyzer::name(atype);
@ -63,7 +63,7 @@ event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &pr
add c$service[analyzer];
}
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count,
reason: string) &priority=10
{
local analyzer = Analyzer::name(atype);
@ -85,7 +85,7 @@ event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
c$dpd = info;
}
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason: string) &priority=5
event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, reason: string) &priority=5
{
if ( atype in ignore_violations )
return;
@ -114,7 +114,7 @@ event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason
}
}
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count,
reason: string) &priority=-5
{
if ( c?$dpd )

View file

@ -302,4 +302,3 @@ signature file-iso9660 {
file-mime "application/x-iso9660-image", 99
file-magic /CD001/
}

View file

@ -1,4 +1,3 @@
# This signature is non-specific and terrible but after
# searching for a long time there doesn't seem to be a
# better option.

View file

@ -66,6 +66,11 @@ export {
## This option is also available as a per-filter ``$config`` option.
const json_timestamps: JSON::TimestampFormat = JSON::TS_EPOCH &redef;
## Handling of optional fields when writing out JSON. By default the
## JSON formatter skips key and val when the field is absent. Setting
## the following field to T includes the key, with a null value.
const json_include_unset_fields = F &redef;
## If true, include lines with log meta information such as column names
## with types, the values of ASCII logging options that are in use, and
## the time when the file was opened and closed (the latter at the end).

View file

@ -83,5 +83,3 @@ function create_debug(do_something: bool) : PluginState
return p;
}

View file

@ -110,4 +110,3 @@ function create_packetfilter() : PluginState
return p;
}

View file

@ -23,7 +23,6 @@ hook notice(n: Notice::Info)
if ( |Site::local_admins| > 0 &&
ACTION_EMAIL_ADMIN in n$actions )
{
local email = "";
if ( n?$src && |Site::get_emails(n$src)| > 0 )
add n$email_dest[Site::get_emails(n$src)];
if ( n?$dst && |Site::get_emails(n$dst)| > 0 )

View file

@ -311,4 +311,3 @@ event signature_match(state: signature_state, msg: string, data: string)
last_vthresh[orig] = vcount;
}
}

View file

@ -568,4 +568,3 @@ function threshold_crossed(ss: SumStat, key: Key, result: Result)
ss$threshold_crossed(key, result);
}

View file

@ -90,20 +90,9 @@ export {
global finalize_tunnel: Conn::RemovalHook;
}
const ayiya_ports = { 5072/udp };
const teredo_ports = { 3544/udp };
const gtpv1_ports = { 2152/udp, 2123/udp };
redef likely_server_ports += { ayiya_ports, teredo_ports, gtpv1_ports, vxlan_ports, geneve_ports };
event zeek_init() &priority=5
{
Log::create_stream(Tunnel::LOG, [$columns=Info, $path="tunnel", $policy=log_policy]);
Analyzer::register_for_ports(Analyzer::ANALYZER_AYIYA, ayiya_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_TEREDO, teredo_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_GTPV1, gtpv1_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_VXLAN, vxlan_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_GENEVE, geneve_ports);
}
function register_all(ecv: EncapsulatingConnVector)

View file

@ -1016,9 +1016,16 @@ const TCP_RESET = 6; ##< Endpoint has sent RST.
const UDP_INACTIVE = 0; ##< Endpoint is still inactive.
const UDP_ACTIVE = 1; ##< Endpoint has sent something.
## If true, don't verify checksums. Useful for running on altered trace
## files, and for saving a few cycles, but at the risk of analyzing invalid
## data. Note that the ``-C`` command-line option overrides the setting of this
## If true, don't verify checksums, and accept packets that give a length of
## zero in the IPv4 header. This is useful when running against traces of local
## traffic and the NIC checksum offloading feature is enabled. It can also
## be useful for running on altered trace files, and for saving a few cycles
## at the risk of analyzing invalid data.
## With this option, packets that have a value of zero in the total-length field
## of the IPv4 header are also accepted, and the capture-length is used instead.
## The total-length field is commonly set to zero when the NIC sequence offloading
## feature is enabled.
## Note that the ``-C`` command-line option overrides the setting of this
## variable.
const ignore_checksums = F &redef;
@ -3884,6 +3891,14 @@ type dns_loc_rr: record {
is_query: count; ##< The RR is a query/Response.
};
## DNS SVCB and HTTPS RRs
##
## .. zeek:see:: dns_SVCB dns_HTTPS
type dns_svcb_rr: record {
svc_priority: count; ##< Service priority for the current record, 0 indicates that this record is in AliasMode and cannot carry svc_params; otherwise this is in ServiceMode, and may include svc_params
target_name: string; ##< Target name, the hostname of the service endpoint.
};
# DNS answer types.
#
# .. zeek:see:: dns_answerr
@ -5021,14 +5036,14 @@ export {
## With this set, the Teredo analyzer waits until it sees both sides
## of a connection using a valid Teredo encapsulation before issuing
## a :zeek:see:`protocol_confirmation`. If it's false, the first
## a :zeek:see:`analyzer_confirmation`. If it's false, the first
## occurrence of a packet with valid Teredo encapsulation causes a
## confirmation.
const delay_teredo_confirmation = T &redef;
## With this set, the GTP analyzer waits until the most-recent upflow
## and downflow packets are a valid GTPv1 encapsulation before
## issuing :zeek:see:`protocol_confirmation`. If it's false, the
## issuing :zeek:see:`analyzer_confirmation`. If it's false, the
## first occurrence of a packet with valid GTPv1 encapsulation causes
## confirmation. Since the same inner connection can be carried
## differing outer upflow/downflow connections, setting to false
@ -5045,17 +5060,6 @@ export {
## may choose whether to perform the validation.
const validate_vxlan_checksums = T &redef;
## The set of UDP ports used for VXLAN traffic. Traffic using this
## UDP destination port will attempt to be decapsulated. Note that if
## if you customize this, you may still want to manually ensure that
## :zeek:see:`likely_server_ports` also gets populated accordingly.
const vxlan_ports: set[port] = { 4789/udp } &redef;
## The set of UDP ports used for Geneve traffic. Traffic using this
## UDP destination port will attempt to be decapsulated. Note that if
## if you customize this, you may still want to manually ensure that
## :zeek:see:`likely_server_ports` also gets populated accordingly.
const geneve_ports: set[port] = { 6081/udp } &redef;
} # end export
module Reporter;

View file

@ -1,3 +1,5 @@
@load ./main.zeek
@load base/packet-protocols/root
@load base/packet-protocols/ip
@load base/packet-protocols/skip
@ -12,9 +14,15 @@
@load base/packet-protocols/pppoe
@load base/packet-protocols/vlan
@load base/packet-protocols/mpls
@load base/packet-protocols/gre
@load base/packet-protocols/iptunnel
@load base/packet-protocols/vntag
@load base/packet-protocols/udp
@load base/packet-protocols/tcp
@load base/packet-protocols/icmp
@load base/packet-protocols/gre
@load base/packet-protocols/iptunnel
@load base/packet-protocols/ayiya
@load base/packet-protocols/geneve
@load base/packet-protocols/vxlan
@load base/packet-protocols/teredo
@load base/packet-protocols/gtpv1

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,19 @@
module PacketAnalyzer::AYIYA;
# Needed for port registration for BPF
@load base/frameworks/analyzer/main
const IPPROTO_IPV4 : count = 4;
const IPPROTO_IPV6 : count = 41;
const ayiya_ports = { 5072/udp };
redef likely_server_ports += { ayiya_ports };
event zeek_init() &priority=20
{
PacketAnalyzer::register_protocol_detection(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_AYIYA);
PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_AYIYA, IPPROTO_IPV4, PacketAnalyzer::ANALYZER_IP);
PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_AYIYA, IPPROTO_IPV6, PacketAnalyzer::ANALYZER_IP);
PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_AYIYA, ayiya_ports);
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,27 @@
module PacketAnalyzer::Geneve;
export {
## The set of UDP ports used for Geneve traffic. Traffic using this
## UDP destination port will attempt to be decapsulated. Note that if
## if you customize this, you may still want to manually ensure that
## :zeek:see:`likely_server_ports` also gets populated accordingly.
const geneve_ports: set[port] = { 6081/udp } &redef;
}
redef likely_server_ports += { geneve_ports };
event zeek_init() &priority=20
{
PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_GENEVE, geneve_ports);
# This is defined by IANA as being "Trans Ether Bridging" but the Geneve RFC
# says to use it for Ethernet. See
# https://datatracker.ietf.org/doc/html/draft-gross-geneve-00#section-3.4
# for details.
PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x6558, PacketAnalyzer::ANALYZER_ETHERNET);
# Some additional mappings for protocols that we already handle natively.
PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x0800, PacketAnalyzer::ANALYZER_IP);
PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x08DD, PacketAnalyzer::ANALYZER_IP);
PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x0806, PacketAnalyzer::ANALYZER_ARP);
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,28 @@
module PacketAnalyzer::GTPV1;
# This needs to be loaded here so the function is available. Function BIFs normally aren't
# loaded until after the packet analysis init scripts are run, and then zeek complains it
# can't find the function.
@load base/bif/plugins/Zeek_GTPv1.functions.bif
# Needed for port registration for BPF
@load base/frameworks/analyzer/main
export {
## Default analyzer
const default_analyzer: PacketAnalyzer::Tag = PacketAnalyzer::ANALYZER_IP &redef;
}
const gtpv1_ports = { 2152/udp, 2123/udp };
redef likely_server_ports += { gtpv1_ports };
event zeek_init() &priority=20
{
PacketAnalyzer::register_protocol_detection(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_GTPV1);
PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_GTPV1, gtpv1_ports);
}
event connection_state_remove(c: connection)
{
remove_gtpv1_connection(c$id);
}

View file

@ -0,0 +1,61 @@
module PacketAnalyzer;
@load base/frameworks/analyzer/main.zeek
export {
## Registers a set of well-known ports for an analyzer. If a future
## connection on one of these ports is seen, the analyzer will be
## automatically assigned to parsing it. The function *adds* to all ports
## already registered, it doesn't replace them.
##
## tag: The tag of the analyzer.
##
## ports: The set of well-known ports to associate with the analyzer.
##
## Returns: True if the ports were successfully registered.
global register_for_ports: function(parent: PacketAnalyzer::Tag,
child: PacketAnalyzer::Tag,
ports: set[port]) : bool;
## Registers an individual well-known port for an analyzer. If a future
## connection on this port is seen, the analyzer will be automatically
## assigned to parsing it. The function *adds* to all ports already
## registered, it doesn't replace them.
##
## tag: The tag of the analyzer.
##
## p: The well-known port to associate with the analyzer.
##
## Returns: True if the port was successfully registered.
global register_for_port: function(parent: PacketAnalyzer::Tag,
child: PacketAnalyzer::Tag,
p: port) : bool;
}
function register_for_ports(parent: PacketAnalyzer::Tag,
child: PacketAnalyzer::Tag,
ports: set[port]) : bool
{
local rc = T;
for ( p in ports )
{
if ( ! register_for_port(parent, child, p) )
rc = F;
}
return rc;
}
function register_for_port(parent: PacketAnalyzer::Tag,
child: PacketAnalyzer::Tag,
p: port) : bool
{
register_packet_analyzer(parent, port_to_count(p), child);
if ( child !in Analyzer::ports )
Analyzer::ports[child] = set();
add Analyzer::ports[child][p];
return T;
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,28 @@
module PacketAnalyzer::TEREDO;
# This needs to be loaded here so the functions are available. Function BIFs normally aren't
# loaded until after the packet analysis init scripts are run, and then zeek complains it
# can't find the function.
@load base/bif/plugins/Zeek_Teredo.functions.bif
# Needed for port registration for BPF
@load base/frameworks/analyzer/main
export {
## Default analyzer
const default_analyzer: PacketAnalyzer::Tag = PacketAnalyzer::ANALYZER_IP &redef;
}
const teredo_ports = { 3544/udp };
redef likely_server_ports += { teredo_ports };
event zeek_init() &priority=20
{
PacketAnalyzer::register_protocol_detection(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_TEREDO);
PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_TEREDO, teredo_ports);
}
event connection_state_remove(c: connection)
{
remove_teredo_connection(c$id);
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,20 @@
module PacketAnalyzer::VXLAN;
export {
# There's no indicator in the VXLAN packet header format about what the next protocol
# in the chain is. All of the documentation just lists Ethernet, so default to that.
const default_analyzer: PacketAnalyzer::Tag = PacketAnalyzer::ANALYZER_ETHERNET &redef;
## The set of UDP ports used for VXLAN traffic. Traffic using this
## UDP destination port will attempt to be decapsulated. Note that if
## if you customize this, you may still want to manually ensure that
## :zeek:see:`likely_server_ports` also gets populated accordingly.
const vxlan_ports: set[port] = { 4789/udp } &redef;
}
redef likely_server_ports += { vxlan_ports };
event zeek_init() &priority=20
{
PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_VXLAN, vxlan_ports);
}

View file

@ -6,7 +6,7 @@ module Conn;
export {
## Define inactivity timeouts by the service detected being used over
## the connection.
option analyzer_inactivity_timeouts: table[Analyzer::Tag] of interval = {
option analyzer_inactivity_timeouts: table[AllAnalyzers::Tag] of interval = {
# For interactive services, allow longer periods of inactivity.
[[Analyzer::ANALYZER_SSH, Analyzer::ANALYZER_FTP]] = 1 hrs,
};
@ -18,7 +18,7 @@ export {
}
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count)
event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count)
{
if ( atype in analyzer_inactivity_timeouts )
set_inactivity_timeout(c$id, analyzer_inactivity_timeouts[atype]);

View file

@ -172,4 +172,15 @@ export {
[4] = "SHA384",
} &default = function(n: count): string { return fmt("digest-%d", n); };
## SVCB/HTTPS SvcParam keys, as defined in
## https://www.ietf.org/archive/id/draft-ietf-dnsop-svcb-https-07.txt, sec 14.3.2
const svcparam_keys = {
[0] = "mandatory",
[1] = "alpn",
[2] = "no-default-alpn",
[3] = "port",
[4] = "ipv4hint",
[5] = "ech",
[6] = "ipv6hint",
} &default = function(n: count): string { return fmt("key-%d", n); };
}

View file

@ -265,7 +265,7 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
}
}
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=5
event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=5
{
if ( atype == Analyzer::ANALYZER_RDP )
{
@ -274,7 +274,7 @@ event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &pr
}
}
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason: string) &priority=5
event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, reason: string) &priority=5
{
# If a protocol violation occurs, then log the record immediately.
if ( c?$rdp )

View file

@ -355,7 +355,7 @@ event ssh_server_host_key(c: connection, hash: string) &priority=5
c$ssh$host_key = hash;
}
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=20
event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=20
{
if ( atype == Analyzer::ANALYZER_SSH )
{

View file

@ -474,7 +474,7 @@ hook finalize_ssl(c: connection)
finish(c, F);
}
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=5
event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=5
{
if ( atype == Analyzer::ANALYZER_SSL || atype == Analyzer::ANALYZER_DTLS )
{
@ -494,7 +494,7 @@ event ssl_plaintext_data(c: connection, is_orig: bool, record_version: count, co
Weird::weird(wi);
}
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count,
reason: string) &priority=5
{
if ( c?$ssl && ( atype == Analyzer::ANALYZER_SSL || atype == Analyzer::ANALYZER_DTLS ) )

View file

@ -1,14 +1,2 @@
# Provide DPD signatures for tunneling protocols that otherwise
# wouldn't be detected at all.
signature dpd_ayiya {
ip-proto = udp
payload /^..\x11\x29/
enable "ayiya"
}
signature dpd_teredo {
ip-proto = udp
payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f].{7}((\x20\x01\x00\x00)).{28})|([\x60-\x6f].{23}((\x20\x01\x00\x00))).{12}/
enable "teredo"
}

View file

@ -1,4 +1,3 @@
## Extract an integer from a string.
##
## s: The string to search for a number.

View file

@ -1,4 +1,3 @@
@load base/utils/dir
@load base/utils/paths

View file

@ -1,5 +1,4 @@
# The entry point for the cluster agent. It only runs bootstrap logic for
# launching via the Supervisor. If we're not running the Supervisor, this does
# nothing.
##! The entry point for the cluster agent. It runs bootstrap logic for launching
##! the agent process via Zeek's Supervisor.
@load ./boot

View file

@ -1,24 +1,108 @@
##! The event API of cluster agents. Most endpoints consist of event pairs,
##! where the agent answers a request event with a corresponding response
##! event. Such event pairs share the same name prefix and end in "_request" and
##! "_response", respectively.
@load base/frameworks/supervisor/control
@load policy/frameworks/cluster/controller/types
module ClusterAgent::API;
export {
## A simple versioning scheme, used to track basic compatibility of
## controller and agent.
const version = 1;
# Agent API events
## The controller sends this event to convey a new cluster configuration
## to the agent. Once processed, the agent responds with the response
## event.
##
## reqid: a request identifier string, echoed in the response event.
##
## config: a :zeek:see:`ClusterController::Types::Configuration` record
## describing the cluster topology. Note that this contains the full
## topology, not just the part pertaining to this agent. That's because
## the cluster framework requires full cluster visibility to establish
## the needed peerings.
##
global set_configuration_request: event(reqid: string,
config: ClusterController::Types::Configuration);
## Response to a set_configuration_request event. The agent sends
## this back to the controller.
##
## reqid: the request identifier used in the request event.
##
## result: the result record.
##
global set_configuration_response: event(reqid: string,
result: ClusterController::Types::Result);
## The controller sends this event to confirm to the agent that it is
## part of the current cluster topology. The agent acknowledges with the
## corresponding response event.
##
## reqid: a request identifier string, echoed in the response event.
##
global agent_welcome_request: event(reqid: string);
## Response to an agent_welcome_request event. The agent sends this
## back to the controller.
##
## reqid: the request identifier used in the request event.
##
## result: the result record.
##
global agent_welcome_response: event(reqid: string,
result: ClusterController::Types::Result);
## The controller sends this event to convey that the agent is not
## currently required. This status may later change, depending on
## updates from the client, so the Broker-level peering can remain
## active. The agent releases any cluster-related resources (including
## shutdown of existing Zeek cluster nodes) when processing the request,
## and confirms via the response event. Shutting down an agent at this
## point has no operational impact on the running cluster.
##
## reqid: a request identifier string, echoed in the response event.
##
global agent_standby_request: event(reqid: string);
## Response to an agent_standby_request event. The agent sends this
## back to the controller.
##
## reqid: the request identifier used in the request event.
##
## result: the result record.
##
global agent_standby_response: event(reqid: string,
result: ClusterController::Types::Result);
# Notification events, agent -> controller
# Report agent being available.
## The agent sends this event upon peering as a "check-in", informing
## the controller that an agent of the given name is now available to
## communicate with. It is a controller-level equivalent of
## `:zeek:see:`Broker::peer_added`.
##
## instance: an instance name, really the agent's name as per :zeek:see:`ClusterAgent::name`.
##
## host: the IP address of the agent. (This may change in the future.)
##
## api_version: the API version of this agent.
##
global notify_agent_hello: event(instance: string, host: addr,
api_version: count);
# The following are not yet implemented.
# Report node state changes.
global notify_change: event(instance: string,
n: ClusterController::Types::Node,

View file

@ -1,3 +1,9 @@
##! The cluster agent boot logic runs in Zeek's supervisor and instructs it to
##! launch an agent process. The agent's main logic resides in main.zeek,
##! similarly to other frameworks. The new process will execute that script.
##!
##! If the current process is not the Zeek supervisor, this does nothing.
@load ./config
# The agent needs the supervisor to listen for node management requests. We

View file

@ -1,51 +1,83 @@
##! Configuration settings for a cluster agent.
@load policy/frameworks/cluster/controller/types
module ClusterAgent;
export {
# The name this agent uses to represent the cluster instance
# it manages. When the environment variable isn't set and there's,
# no redef, this falls back to "agent-<hostname>".
## The name this agent uses to represent the cluster instance it
## manages. Defaults to the value of the ZEEK_AGENT_NAME environment
## variable. When that is unset and you don't redef the value,
## the implementation defaults to "agent-<hostname>".
const name = getenv("ZEEK_AGENT_NAME") &redef;
# Agent stdout/stderr log files to produce in Zeek's working
# directory. If empty, no such logs will result. The actual
# log files have the agent's name (as per above) dot-prefixed.
## Agent stdout log configuration. If the string is non-empty, Zeek will
## produce a free-form log (i.e., not one governed by Zeek's logging
## framework) in Zeek's working directory. The final log's name is
## "<name>.<suffix>", where the name is taken from :zeek:see:`ClusterAgent::name`,
## and the suffix is defined by the following variable. If left empty,
## no such log results.
##
## Note that the agent also establishes a "proper" Zeek log via the
## :zeek:see:`ClusterController::Log` module.
const stdout_file_suffix = "agent.stdout" &redef;
## Agent stderr log configuration. Like :zeek:see:`ClusterAgent::stdout_file_suffix`,
## but for the stderr stream.
const stderr_file_suffix = "agent.stderr" &redef;
# The address and port the agent listens on. When
# undefined, falls back to configurable default values.
## The network address the agent listens on. This only takes effect if
## the agent isn't configured to connect to the controller (see
## :zeek:see:`ClusterAgent::controller`). By default this uses the value of the
## ZEEK_AGENT_ADDR environment variable, but you may also redef to
## a specific value. When empty, the implementation falls back to
## :zeek:see:`ClusterAgent::default_address`.
const listen_address = getenv("ZEEK_AGENT_ADDR") &redef;
## The fallback listen address if :zeek:see:`ClusterAgent::listen_address`
## remains empty. Unless redefined, this uses Broker's own default listen
## address.
const default_address = Broker::default_listen_address &redef;
## The network port the agent listens on. Counterpart to
## :zeek:see:`ClusterAgent::listen_address`, defaulting to the ZEEK_AGENT_PORT
## environment variable.
const listen_port = getenv("ZEEK_AGENT_PORT") &redef;
## The fallback listen port if :zeek:see:`ClusterAgent::listen_port` remains empty.
const default_port = 2151/tcp &redef;
# The agent communicates under to following topic prefix,
# suffixed with "/<name>" (see above):
## The agent's Broker topic prefix. For its own communication, the agent
## suffixes this with "/<name>", based on :zeek:see:`ClusterAgent::name`.
const topic_prefix = "zeek/cluster-control/agent" &redef;
# The coordinates of the controller. When defined, it means
# agents peer with (connect to) the controller; otherwise the
# controller knows all agents and peers with them.
## The network coordinates of the controller. When defined, the agent
## peers with (and connects to) the controller; otherwise the controller
## will peer (and connect to) the agent, listening as defined by
## :zeek:see:`ClusterAgent::listen_address` and :zeek:see:`ClusterAgent::listen_port`.
const controller: Broker::NetworkInfo = [
$address="0.0.0.0", $bound_port=0/unknown] &redef;
# Agent and controller currently log only, not via the data cluster's
# logger. (This might get added later.) For now, this means that
# if both write to the same log file, it gets garbled. The following
# lets you specify the working directory specifically for the agent.
## An optional custom output directory for the agent's stdout and stderr
## logs. Agent and controller currently only log locally, not via the
## data cluster's logger node. (This might change in the future.) This
## means that if both write to the same log file, the output gets
## garbled.
const directory = "" &redef;
# Working directory for data cluster nodes. When relative, note
# that this will apply from the working directory of the agent,
# since it creates data cluster nodes.
## The working directory for data cluster nodes created by this
## agent. If you make this a relative path, note that the path is
## relative to the agent's working directory, since it creates data
## cluster nodes.
const cluster_directory = "" &redef;
# The following functions return the effective network endpoint
# information for this agent, in two related forms.
## Returns a :zeek:see:`ClusterController::Types::Instance` describing this
## instance (its agent name plus listening address/port, as applicable).
global instance: function(): ClusterController::Types::Instance;
## Returns a :zeek:see:`Broker::EndpointInfo` record for this instance.
## Similar to :zeek:see:`ClusterAgent::instance`, but with slightly different
## data format.
global endpoint_info: function(): Broker::EndpointInfo;
}

View file

@ -1,3 +1,8 @@
##! This is the main "runtime" of a cluster agent. Zeek does not load this
##! directly; rather, the agent's bootstrapping module (in ./boot.zeek)
##! specifies it as the script to run in the node newly created via Zeek's
##! supervisor.
@load base/frameworks/broker
@load policy/frameworks/cluster/controller/config
@ -6,21 +11,24 @@
@load ./api
module ClusterAgent::Runtime;
redef ClusterController::role = ClusterController::Types::AGENT;
# The global configuration as passed to us by the controller
global global_config: ClusterController::Types::Configuration;
global g_config: ClusterController::Types::Configuration;
# A map to make other instance info accessible
global instances: table[string] of ClusterController::Types::Instance;
global g_instances: table[string] of ClusterController::Types::Instance;
# A map for the nodes we run on this instance, via this agent.
global nodes: table[string] of ClusterController::Types::Node;
global g_nodes: table[string] of ClusterController::Types::Node;
# The node map employed by the supervisor to describe the cluster
# topology to newly forked nodes. We refresh it when we receive
# new configurations.
global data_cluster: table[string] of Supervisor::ClusterEndpoint;
global g_data_cluster: table[string] of Supervisor::ClusterEndpoint;
event SupervisorControl::create_response(reqid: string, result: string)
{
@ -86,43 +94,43 @@ event ClusterAgent::API::set_configuration_request(reqid: string, config: Cluste
# Adopt the global configuration provided.
# XXX this can later handle validation and persistence
# XXX should do this transactionally, only set when all else worked
global_config = config;
g_config = config;
# Refresh the instances table:
instances = table();
g_instances = table();
for ( inst in config$instances )
instances[inst$name] = inst;
g_instances[inst$name] = inst;
# Terminate existing nodes
for ( nodename in nodes )
for ( nodename in g_nodes )
supervisor_destroy(nodename);
nodes = table();
g_nodes = table();
# Refresh the data cluster and nodes tables
data_cluster = table();
g_data_cluster = table();
for ( node in config$nodes )
{
if ( node$instance == ClusterAgent::name )
nodes[node$name] = node;
g_nodes[node$name] = node;
local cep = Supervisor::ClusterEndpoint(
$role = node$role,
$host = instances[node$instance]$host,
$host = g_instances[node$instance]$host,
$p = node$p);
if ( node?$interface )
cep$interface = node$interface;
data_cluster[node$name] = cep;
g_data_cluster[node$name] = cep;
}
# Apply the new configuration via the supervisor
for ( nodename in nodes )
for ( nodename in g_nodes )
{
node = nodes[nodename];
node = g_nodes[nodename];
nc = Supervisor::NodeConfig($name=nodename);
if ( ClusterAgent::cluster_directory != "" )
@ -140,7 +148,7 @@ event ClusterAgent::API::set_configuration_request(reqid: string, config: Cluste
# XXX could use options to enable per-node overrides for
# directory, stdout, stderr, others?
nc$cluster = data_cluster;
nc$cluster = g_data_cluster;
supervisor_create(nc);
}
@ -149,22 +157,59 @@ event ClusterAgent::API::set_configuration_request(reqid: string, config: Cluste
# events asynchonously. The only indication of error will be
# notification events to the controller.
if ( reqid != "" )
{
local res = ClusterController::Types::Result(
$reqid = reqid,
$instance = ClusterAgent::name);
ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_response %s", reqid));
ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_response %s",
ClusterController::Types::result_to_string(res)));
event ClusterAgent::API::set_configuration_response(reqid, res);
}
}
event ClusterAgent::API::agent_welcome_request(reqid: string)
{
ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_welcome_request %s", reqid));
local res = ClusterController::Types::Result(
$reqid = reqid,
$instance = ClusterAgent::name);
ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_response %s",
ClusterController::Types::result_to_string(res)));
event ClusterAgent::API::agent_welcome_response(reqid, res);
}
event ClusterAgent::API::agent_standby_request(reqid: string)
{
ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_standby_request %s", reqid));
# We shut down any existing cluster nodes via an empty configuration,
# and fall silent. We do not unpeer/disconnect (assuming we earlier
# peered/connected -- otherwise there's nothing we can do here via
# Broker anyway), mainly to keep open the possibility of running
# cluster nodes again later.
event ClusterAgent::API::set_configuration_request("", ClusterController::Types::Configuration());
local res = ClusterController::Types::Result(
$reqid = reqid,
$instance = ClusterAgent::name);
ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_standby_response %s",
ClusterController::Types::result_to_string(res)));
event ClusterAgent::API::agent_standby_response(reqid, res);
}
event Broker::peer_added(peer: Broker::EndpointInfo, msg: string)
{
# This does not (cannot?) immediately verify that the new peer
# is in fact a controller, so we might send this redundantly.
# Controllers handle the hello event accordingly.
# is in fact a controller, so we might send this in vain.
# Controllers register the agent upon receipt of the event.
local epi = ClusterAgent::endpoint_info();
# XXX deal with unexpected peers, unless we're okay with it
event ClusterAgent::API::notify_agent_hello(epi$id,
to_addr(epi$network$address), ClusterAgent::API::version);
}
@ -192,6 +237,9 @@ event zeek_init()
# Auto-publish a bunch of events. Glob patterns or module-level
# auto-publish would be helpful here.
Broker::auto_publish(agent_topic, ClusterAgent::API::set_configuration_response);
Broker::auto_publish(agent_topic, ClusterAgent::API::agent_welcome_response);
Broker::auto_publish(agent_topic, ClusterAgent::API::agent_standby_response);
Broker::auto_publish(agent_topic, ClusterAgent::API::notify_agent_hello);
Broker::auto_publish(agent_topic, ClusterAgent::API::notify_change);
Broker::auto_publish(agent_topic, ClusterAgent::API::notify_error);

View file

@ -1,5 +1,4 @@
# The entry point for the cluster controller. It only runs bootstrap logic for
# launching via the Supervisor. If we're not running the Supervisor, this does
# nothing.
##! The entry point for the cluster controller. It runs bootstrap logic for
##! launching the controller process via Zeek's Supervisor.
@load ./boot

View file

@ -1,16 +1,96 @@
##! The event API of cluster controllers. Most endpoints consist of event pairs,
##! where the controller answers a zeek-client request event with a
##! corresponding response event. Such event pairs share the same name prefix
##! and end in "_request" and "_response", respectively.
@load ./types
module ClusterController::API;
export {
## A simple versioning scheme, used to track basic compatibility of
## controller, agents, and zeek-client.
const version = 1;
global get_instances_request: event(reqid: string);
global get_instances_response: event(reqid: string,
instances: vector of ClusterController::Types::Instance);
## zeek-client sends this event to request a list of the currently
## peered agents/instances.
##
## reqid: a request identifier string, echoed in the response event.
##
global get_instances_request: event(reqid: string);
## Response to a get_instances_request event. The controller sends
## this back to the client.
##
## reqid: the request identifier used in the request event.
##
## result: the result record. Its data member is a
## :zeek:see:`ClusterController::Types::Instance` record.
##
global get_instances_response: event(reqid: string,
result: ClusterController::Types::Result);
## zeek-client sends this event to establish a new cluster configuration,
## including the full cluster topology. The controller processes the update
## and relays it to the agents. Once each has responded (or a timeout occurs)
## the controller sends a corresponding response event back to the client.
##
## reqid: a request identifier string, echoed in the response event.
##
## config: a :zeek:see:`ClusterController::Types::Configuration` record
## specifying the cluster configuration.
##
global set_configuration_request: event(reqid: string,
config: ClusterController::Types::Configuration);
## Response to a set_configuration_request event. The controller sends
## this back to the client.
##
## reqid: the request identifier used in the request event.
##
## result: a vector of :zeek:see:`ClusterController::Types::Result` records.
## Each member captures one agent's response.
##
global set_configuration_response: event(reqid: string,
result: ClusterController::Types::ResultVec);
# Testing events. These don't provide operational value but expose
# internal functionality, triggered by test cases.
## This event causes no further action (other than getting logged) if
## with_state is F. When T, the controller establishes request state, and
## the controller only ever sends the response event when this state times
## out.
##
## reqid: a request identifier string, echoed in the response event when
## with_state is T.
##
## with_state: flag indicating whether the controller should keep (and
## time out) request state for this request.
##
global test_timeout_request: event(reqid: string, with_state: bool);
## Response to a test_timeout_request event. The controller sends this
## back to the client if the original request had the with_state flag.
##
## reqid: the request identifier used in the request event.
##
global test_timeout_response: event(reqid: string,
result: ClusterController::Types::Result);
# Notification events, agent -> controller
## The controller triggers this event when the operational cluster
## instances align with the ones desired by the cluster
## configuration. It's essentially a cluster management readiness
## event. This event is currently only used by the controller and not
## published to other topics.
##
## instances: the set of instance names now ready.
##
global notify_agents_ready: event(instances: set[string]);
}

View file

@ -1,3 +1,10 @@
##! The cluster controller's boot logic runs in Zeek's supervisor and instructs
##! it to launch the controller process. The controller's main logic resides in
##! main.zeek, similarly to other frameworks. The new process will execute that
##! script.
##!
##! If the current process is not the Zeek supervisor, this does nothing.
@load ./config
event zeek_init()

View file

@ -1,53 +1,78 @@
##! Configuration settings for the cluster controller.
@load policy/frameworks/cluster/agent/config
module ClusterController;
export {
# The name of this controller in the cluster.
# Without the environment variable and no redef, this
# falls back to "controller-<hostname>".
## The name of this controller. Defaults to the value of the
## ZEEK_CONTROLLER_NAME environment variable. When that is unset and the
## user doesn't redef the value, the implementation defaults to
## "controller-<hostname>".
const name = getenv("ZEEK_CONTROLLER_NAME") &redef;
# Controller stdout/stderr log files to produce in Zeek's
# working directory. If empty, no such logs will result.
## The controller's stdout log name. If the string is non-empty, Zeek will
## produce a free-form log (i.e., not one governed by Zeek's logging
## framework) in Zeek's working directory. If left empty, no such log
## results.
##
## Note that the controller also establishes a "proper" Zeek log via the
## :zeek:see:`ClusterController::Log` module.
const stdout_file = "controller.stdout" &redef;
## The controller's stderr log name. Like :zeek:see:`ClusterController::stdout_file`,
## but for the stderr stream.
const stderr_file = "controller.stderr" &redef;
# The address and port the controller listens on. When
# undefined, falls back to the default_address, which you can
# likewise customize.
## The network address the controller listens on. By default this uses
## the value of the ZEEK_CONTROLLER_ADDR environment variable, but you
## may also redef to a specific value. When empty, the implementation
## falls back to :zeek:see:`ClusterController::default_address`.
const listen_address = getenv("ZEEK_CONTROLLER_ADDR") &redef;
## The fallback listen address if :zeek:see:`ClusterController::listen_address`
## remains empty. Unless redefined, this uses Broker's own default
## listen address.
const default_address = Broker::default_listen_address &redef;
## The network port the controller listens on. Counterpart to
## :zeek:see:`ClusterController::listen_address`, defaulting to the
## ZEEK_CONTROLLER_PORT environment variable.
const listen_port = getenv("ZEEK_CONTROLLER_PORT") &redef;
## The fallback listen port if :zeek:see:`ClusterController::listen_port`
## remains empty.
const default_port = 2150/tcp &redef;
# A more aggressive default retry interval (vs default 30s)
## The controller's connect retry interval. Defaults to a more
## aggressive value compared to Broker's 30s.
const connect_retry = 1sec &redef;
# The controller listens for messages on this topic:
## The controller's Broker topic. Clients send requests to this topic.
const topic = "zeek/cluster-control/controller" &redef;
# The set of agents to interact with. When this is non-empty
# at startup, the controller contacts the agents; when it is
# empty, it waits for agents to connect. They key is a name of
# each instance. This should match the $name member of the
# instance records.
const instances: table[string] of ClusterController::Types::Instance = { } &redef;
# The role of this node in cluster management. Agent and
# controller both redef this. Used during logging.
## The role of this process in cluster management. Agent and controller
## both redefine this. Used during logging.
const role = ClusterController::Types::NONE &redef;
# Agent and controller currently log only, not via the data cluster's
# logger. (This might get added later.) For now, this means that
# if both write to the same log file, it gets garbled. The following
# lets you specify the working directory specifically for the agent.
## The timeout for request state. Such state (see the :zeek:see:`ClusterController::Request`
## module) ties together request and response event pairs. The timeout causes
## its cleanup in the absence of a timely response. It applies both to
## state kept for client requests, as well as state in the agents for
## requests to the supervisor.
const request_timeout = 10sec &redef;
## An optional custom output directory for the controller's stdout and
## stderr logs. Agent and controller currently only log locally, not via
## the data cluster's logger node. (This might change in the future.)
## This means that if both write to the same log file, the output gets
## garbled.
const directory = "" &redef;
# The following functions return the effective network endpoint
# information for this controller, in two related forms.
## Returns a :zeek:see:`Broker::NetworkInfo` record describing the controller.
global network_info: function(): Broker::NetworkInfo;
## Returns a :zeek:see:`Broker::EndpointInfo` record describing the controller.
global endpoint_info: function(): Broker::EndpointInfo;
}

View file

@ -1,3 +1,8 @@
##! This module implements straightforward logging abilities for cluster
##! controller and agent. It uses Zeek's logging framework, and works only for
##! nodes managed by the supervisor. In this setting Zeek's logging framework
##! operates locally, i.e., this logging does not involve any logger nodes.
@load ./config
module ClusterController::Log;
@ -9,6 +14,7 @@ export {
## A default logging policy hook for the stream.
global log_policy: Log::PolicyHook;
## The controller/agent log supports four different log levels.
type Level: enum {
DEBUG,
INFO,
@ -16,7 +22,7 @@ export {
ERROR,
};
## The record type which contains the column fields of the cluster log.
## The record type containing the column fields of the agent/controller log.
type Info: record {
## The time at which a cluster message was generated.
ts: time;
@ -30,10 +36,32 @@ export {
message: string;
} &log;
## The log level in use for this node.
global log_level = DEBUG &redef;
## A debug-level log message writer.
##
## message: the message to log.
##
global debug: function(message: string);
## An info-level log message writer.
##
## message: the message to log.
##
global info: function(message: string);
## A warning-level log message writer.
##
## message: the message to log.
##
global warning: function(message: string);
## An error-level log message writer. (This only logs a message, it does not
## terminate Zeek or have other runtime effects.)
##
## message: the message to log.
##
global error: function(message: string);
}

View file

@ -1,3 +1,8 @@
##! This is the main "runtime" of the cluster controller. Zeek does not load
##! this directly; rather, the controller's bootstrapping module (in ./boot.zeek)
##! specifies it as the script to run in the node newly created via Zeek's
##! supervisor.
@load base/frameworks/broker
@load policy/frameworks/cluster/agent/config
@ -6,51 +11,251 @@
@load ./api
@load ./log
@load ./request
@load ./util
module ClusterController::Runtime;
redef ClusterController::role = ClusterController::Types::CONTROLLER;
global check_instances_ready: function();
global add_instance: function(inst: ClusterController::Types::Instance);
global drop_instance: function(inst: ClusterController::Types::Instance);
global null_config: function(): ClusterController::Types::Configuration;
global is_null_config: function(config: ClusterController::Types::Configuration): bool;
# Checks whether the given instance is one that we know with different
# communication settings: a a different peering direction, a different listening
# port, etc. Used as a predicate to indicate when we need to drop the existing
# one from our internal state.
global is_instance_connectivity_change: function
(inst: ClusterController::Types::Instance): bool;
# The set of agents the controller interacts with to manage to currently
# configured cluster. This may be a subset of all the agents known to the
# controller, as tracked by the g_instances_known set. They key is the instance
# name and should match the $name member of the corresponding instance record.
global g_instances: table[string] of ClusterController::Types::Instance = table();
# The set of instances that have checked in with the controller. This is a
# superset of g_instances, since it covers any agent that has sent us a
# notify_agent_hello event.
global g_instances_known: set[string] = set();
# A corresponding set of instances/agents that we track in order to understand
# when all of the above instances have sent agent_welcome_response events. (An
# alternative would be to use a record that adds a single state bit for each
# instance, and store that above.)
global g_instances_ready: set[string] = set();
# The request ID of the most recent configuration update that's come in from
# a client. We track it here until we know we are ready to communicate with all
# agents required by the update.
global g_config_reqid_pending: string = "";
# The most recent configuration we have successfully deployed. This is also
# the one we send whenever the client requests it.
global g_config_current: ClusterController::Types::Configuration;
function send_config_to_agents(req: ClusterController::Request::Request,
config: ClusterController::Types::Configuration)
{
for ( name in g_instances )
{
if ( name !in g_instances_ready )
next;
local agent_topic = ClusterAgent::topic_prefix + "/" + name;
local areq = ClusterController::Request::create();
areq$parent_id = req$id;
# We track the requests sent off to each agent. As the
# responses come in, we can check them off as completed,
# and once all are, we respond back to the client.
req$set_configuration_state$requests += areq;
# We could also broadcast just once on the agent prefix, but
# explicit request/response pairs for each agent seems cleaner.
ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_request %s to %s", areq$id, name));
Broker::publish(agent_topic, ClusterAgent::API::set_configuration_request, areq$id, config);
}
}
# This is the &on_change handler for the g_instances_ready set, meaning
# it runs whenever a required agent has confirmed it's ready.
function check_instances_ready()
{
local cur_instances: set[string];
for ( inst in g_instances )
add cur_instances[inst];
if ( cur_instances == g_instances_ready )
event ClusterController::API::notify_agents_ready(cur_instances);
}
function add_instance(inst: ClusterController::Types::Instance)
{
g_instances[inst$name] = inst;
if ( inst?$listen_port )
Broker::peer(cat(inst$host), inst$listen_port,
ClusterController::connect_retry);
if ( inst$name in g_instances_known )
{
# The agent has already peered with us. Send welcome to indicate
# it's part of cluster management. Once it responds, we update
# the set of ready instances and proceed as feasible with config
# deployments.
local req = ClusterController::Request::create();
ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_request to %s", inst$name));
Broker::publish(ClusterAgent::topic_prefix + "/" + inst$name,
ClusterAgent::API::agent_welcome_request, req$id);
}
}
function drop_instance(inst: ClusterController::Types::Instance)
{
if ( inst$name !in g_instances )
return;
# Send the agent a standby so it shuts down its cluster nodes & state
ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_standby_request to %s", inst$name));
Broker::publish(ClusterAgent::topic_prefix + "/" + inst$name,
ClusterAgent::API::agent_standby_request, "");
delete g_instances[inst$name];
if ( inst$name in g_instances_ready )
delete g_instances_ready[inst$name];
# The agent remains in g_instances_known, to track that we're able
# to communicate with it in case it's required again.
ClusterController::Log::info(fmt("dropped instance %s", inst$name));
}
function null_config(): ClusterController::Types::Configuration
{
return ClusterController::Types::Configuration($id="");
}
function is_null_config(config: ClusterController::Types::Configuration): bool
{
return config$id == "";
}
function is_instance_connectivity_change(inst: ClusterController::Types::Instance): bool
{
# If we're not tracking this instance as part of a cluster config, it's
# not a change. (More precisely: we cannot say whether it's changed.)
if ( inst$name !in g_instances )
return F;
# The agent has peered with us and now uses a different host.
# XXX 0.0.0.0 is a workaround until we've resolved how agents that peer
# with us obtain their identity. Broker ID?
if ( inst$host != 0.0.0.0 && inst$host != g_instances[inst$name]$host )
return T;
# The agent has a listening port and the one we know does not, or vice
# versa. I.e., this is a change in the intended peering direction.
if ( inst?$listen_port != g_instances[inst$name]?$listen_port )
return T;
# Both have listening ports, but they differ.
if ( inst?$listen_port && g_instances[inst$name]?$listen_port &&
inst$listen_port != g_instances[inst$name]$listen_port )
return T;
return F;
}
event ClusterController::API::notify_agents_ready(instances: set[string])
{
local insts = ClusterController::Util::set_to_vector(instances);
ClusterController::Log::info(fmt("rx ClusterController::API:notify_agents_ready %s", join_string_vec(insts, ",")));
local req = ClusterController::Request::lookup(g_config_reqid_pending);
# If there's no pending request, when it's no longer available, or it
# doesn't have config state, don't do anything else.
if ( ClusterController::Request::is_null(req) || ! req?$set_configuration_state )
return;
# All instances requested in the pending configuration update are now
# known to us. Send them the config. As they send their response events
# we update the client's request state and eventually send the response
# event to the it.
send_config_to_agents(req, req$set_configuration_state$config);
}
event ClusterAgent::API::notify_agent_hello(instance: string, host: addr, api_version: count)
{
# See if we already know about this agent; if not, register
# it.
#
# XXX protection against rogue agents?
ClusterController::Log::info(fmt("rx ClusterAgent::API::notify_agent_hello %s %s", instance, host));
if ( instance in ClusterController::instances )
{
# Do nothing, unless this known agent checks in with a mismatching
# API version, in which case we kick it out.
# When an agent checks in with a mismatching API version, we log the
# fact and drop its state, if any.
if ( api_version != ClusterController::API::version )
{
local inst = ClusterController::instances[instance];
if ( inst?$listen_port )
{
# We peered with this instance, unpeer.
Broker::unpeer(cat(inst$host), inst$listen_port );
# XXX what to do if they connected to us?
}
delete ClusterController::instances[instance];
}
ClusterController::Log::warning(
fmt("instance %s/%s has checked in with incompatible API version %s",
instance, host, api_version));
# Update the instance name in the pointed-to record, in case it
# was previously named otherwise. Not being too picky here allows
# the user some leeway in spelling out the original config.
ClusterController::instances[instance]$name = instance;
if ( instance in g_instances )
drop_instance(g_instances[instance]);
if ( instance in g_instances_known )
delete g_instances_known[instance];
return;
}
if ( api_version != ClusterController::API::version )
add g_instances_known[instance];
if ( instance in g_instances && instance !in g_instances_ready )
{
ClusterController::Log::warning(
fmt("agent %s/%s speaks incompatible agent protocol (%s, need %s), unpeering",
instance, host, api_version, ClusterController::API::version));
# We need this instance for our cluster and have full context for
# it from the configuration. Tell agent.
local req = ClusterController::Request::create();
ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_request to %s", instance));
Broker::publish(ClusterAgent::topic_prefix + "/" + instance,
ClusterAgent::API::agent_welcome_request, req$id);
}
}
ClusterController::instances[instance] = ClusterController::Types::Instance($name=instance, $host=host);
ClusterController::Log::info(fmt("instance %s/%s has checked in", instance, host));
event ClusterAgent::API::agent_welcome_response(reqid: string, result: ClusterController::Types::Result)
{
ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_welcome_response %s", reqid));
local req = ClusterController::Request::lookup(reqid);
if ( ClusterController::Request::is_null(req) )
return;
ClusterController::Request::finish(req$id);
# An agent we've been waiting to hear back from is ready for cluster
# work. Double-check we still want it, otherwise drop it.
if ( ! result$success || result$instance !in g_instances )
{
ClusterController::Log::info(fmt(
"tx ClusterAgent::API::agent_standby_request to %s", result$instance));
Broker::publish(ClusterAgent::topic_prefix + "/" + result$instance,
ClusterAgent::API::agent_standby_request, "");
return;
}
add g_instances_ready[result$instance];
ClusterController::Log::info(fmt("instance %s ready", result$instance));
check_instances_ready();
}
event ClusterAgent::API::notify_change(instance: string, n: ClusterController::Types::Node,
old: ClusterController::Types::State,
@ -132,7 +337,13 @@ event ClusterAgent::API::set_configuration_response(reqid: string, result: Clust
ClusterController::Request::finish(r$id);
}
ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", req$id));
# We're now done with the original set_configuration request.
# Adopt the configuration as the current one.
g_config_current = req$set_configuration_state$config;
g_config_reqid_pending = "";
ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s",
ClusterController::Request::to_string(req)));
event ClusterController::API::set_configuration_response(req$id, req$results);
ClusterController::Request::finish(req$id);
}
@ -141,25 +352,24 @@ event ClusterController::API::set_configuration_request(reqid: string, config: C
{
ClusterController::Log::info(fmt("rx ClusterController::API::set_configuration_request %s", reqid));
local res: ClusterController::Types::Result;
local req = ClusterController::Request::create(reqid);
req$set_configuration_state = ClusterController::Request::SetConfigurationState();
# Compare new configuration to the current one and send updates
# to the instances as needed.
if ( config?$instances )
req$set_configuration_state = ClusterController::Request::SetConfigurationState($config = config);
# At the moment there can only be one pending request.
if ( g_config_reqid_pending != "" )
{
# XXX properly handle instance update: connect to new instances provided
# when they are listening, accept connections from new instances that are
# not
for ( inst in config$instances )
{
if ( inst$name !in ClusterController::instances )
{
local res = ClusterController::Types::Result($reqid=reqid, $instance=inst$name);
res$error = fmt("instance %s is unknown, skipping", inst$name);
res = ClusterController::Types::Result($reqid=reqid);
res$success = F;
res$error = fmt("request %s still pending", g_config_reqid_pending);
req$results += res;
}
}
ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s",
ClusterController::Request::to_string(req)));
event ClusterController::API::set_configuration_response(req$id, req$results);
ClusterController::Request::finish(req$id);
return;
}
# XXX validate the configuration:
@ -169,82 +379,177 @@ event ClusterController::API::set_configuration_request(reqid: string, config: C
# - Do node types with optional fields have required values?
# ...
# Transmit the configuration on to the agents. They need to be aware of
# each other's location and nodes, so the data cluster nodes can connect
# (for example, so a worker on instance 1 can connect to a logger on
# instance 2).
for ( name in ClusterController::instances )
# The incoming request is now the pending one. It gets cleared when all
# agents have processed their config updates successfully, or their
# responses time out.
g_config_reqid_pending = req$id;
# Compare the instance configuration to our current one. If it matches,
# we can proceed to deploying the new data cluster topology. If it does
# not, we need to establish connectivity with agents we connect to, or
# wait until all instances that connect to us have done so. Either triggers
# a notify_agents_ready event, upon which we then deploy the data cluster.
# The current & new set of instance names.
local insts_current: set[string];
local insts_new: set[string];
# A set of current instances not contained in the new config.
# Those will need to get dropped.
local insts_to_drop: set[string];
# The opposite: new instances not yet in our current set. Those we will need
# to establish contact with (or they with us).
local insts_to_add: set[string];
# The overlap: instances in both the current and new set. For those we verify
# that we're actually dealign with the same entities, and might need to re-
# connect if not.
local insts_to_keep: set[string];
# Alternative representation of insts_to_add, directly providing the instances.
local insts_to_peer: table[string] of ClusterController::Types::Instance;
# Helpful locals.
local inst_name: string;
local inst: ClusterController::Types::Instance;
for ( inst_name in g_instances )
add insts_current[inst_name];
for ( inst in config$instances )
add insts_new[inst$name];
# Populate TODO lists for instances we need to drop, check, or add.
insts_to_drop = insts_current - insts_new;
insts_to_add = insts_new - insts_current;
insts_to_keep = insts_new & insts_current;
for ( inst in config$instances )
{
local agent_topic = ClusterAgent::topic_prefix + "/" + name;
local areq = ClusterController::Request::create();
areq$parent_id = reqid;
# We track the requests sent off to each agent. As the
# responses come in, we can check them off as completed,
# and once all are, we respond back to the client.
req$set_configuration_state$requests += areq;
# XXX could also broadcast just once on the agent prefix, but
# explicit request/response pairs for each agent seems cleaner.
ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_request %s to %s",
areq$id, name));
Broker::publish(agent_topic, ClusterAgent::API::set_configuration_request, areq$id, config);
if ( inst$name in insts_to_add )
{
insts_to_peer[inst$name] = inst;
next;
}
# Response event gets sent via the agents' reponse event.
# Focus on the keepers: check for change in identity/location.
if ( inst$name !in insts_to_keep )
next;
if ( is_instance_connectivity_change(inst) )
{
# The endpoint looks different. We drop the current one
# and need to re-establish connectivity with the new
# one.
add insts_to_drop[inst$name];
add insts_to_add[inst$name];
}
}
# Process our TODO lists. Handle drops first, then additions, in
# case we need to re-establish connectivity with an agent.
for ( inst_name in insts_to_drop )
drop_instance(g_instances[inst_name]);
for ( inst_name in insts_to_peer )
add_instance(insts_to_peer[inst_name]);
# Updates to out instance tables are complete, now check if we're already
# able to send the config to the agents:
check_instances_ready();
}
event ClusterController::API::get_instances_request(reqid: string)
{
ClusterController::Log::info(fmt("rx ClusterController::API::set_instances_request %s", reqid));
local res = ClusterController::Types::Result($reqid = reqid);
local insts: vector of ClusterController::Types::Instance;
for ( i in ClusterController::instances )
insts += ClusterController::instances[i];
for ( i in g_instances )
insts += g_instances[i];
res$data = insts;
ClusterController::Log::info(fmt("tx ClusterController::API::get_instances_response %s", reqid));
event ClusterController::API::get_instances_response(reqid, insts);
event ClusterController::API::get_instances_response(reqid, res);
}
event ClusterController::Request::request_expired(req: ClusterController::Request::Request)
{
# Various handlers for timed-out request state. We use the state members
# to identify how to respond. No need to clean up the request itself,
# since we're getting here via the request module's expiration
# mechanism that handles the cleanup.
local res: ClusterController::Types::Result;
if ( req?$set_configuration_state )
{
# This timeout means we no longer have a pending request.
g_config_reqid_pending = "";
res = ClusterController::Types::Result($reqid=req$id);
res$success = F;
res$error = "request timed out";
req$results += res;
ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s",
ClusterController::Request::to_string(req)));
event ClusterController::API::set_configuration_response(req$id, req$results);
}
if ( req?$test_state )
{
res = ClusterController::Types::Result($reqid=req$id);
res$success = F;
res$error = "request timed out";
ClusterController::Log::info(fmt("tx ClusterController::API::test_timeout_response %s", req$id));
event ClusterController::API::test_timeout_response(req$id, res);
}
}
event ClusterController::API::test_timeout_request(reqid: string, with_state: bool)
{
ClusterController::Log::info(fmt("rx ClusterController::API::test_timeout_request %s %s", reqid, with_state));
if ( with_state )
{
# This state times out and triggers a timeout response in the
# above request_expired event handler.
local req = ClusterController::Request::create(reqid);
req$test_state = ClusterController::Request::TestState();
}
}
event zeek_init()
{
# Controller always listens -- it needs to be able to respond
# to the Zeek client. This port is also used by the agents
# if they connect to the client.
# Initialize null config at startup. We will replace it once we have
# persistence, and again whenever we complete a client's
# set_configuration request.
g_config_current = null_config();
# The controller always listens -- it needs to be able to respond to the
# Zeek client. This port is also used by the agents if they connect to
# the client. The client doesn't automatically establish or accept
# connectivity to agents: agents are defined and communicated with as
# defined via configurations defined by the client.
local cni = ClusterController::network_info();
Broker::listen(cat(cni$address), cni$bound_port);
Broker::subscribe(ClusterAgent::topic_prefix);
Broker::subscribe(ClusterController::topic);
# Events sent to the client:
Broker::auto_publish(ClusterController::topic,
ClusterController::API::get_instances_response);
Broker::auto_publish(ClusterController::topic,
ClusterController::API::set_configuration_response);
if ( |ClusterController::instances| > 0 )
{
# We peer with the agents -- otherwise, the agents peer
# with (i.e., connect to) us.
for ( i in ClusterController::instances )
{
local inst = ClusterController::instances[i];
if ( ! inst?$listen_port )
{
# XXX config error -- this must be there
next;
}
Broker::peer(cat(inst$host), inst$listen_port,
ClusterController::connect_retry);
}
}
# If ClusterController::instances is empty, agents peer with
# us and we do nothing. We'll build up state as the
# notify_agent_hello() events come int.
Broker::auto_publish(ClusterController::topic,
ClusterController::API::test_timeout_response);
ClusterController::Log::info("controller is live");
}

View file

@ -1,23 +1,33 @@
##! This module implements a request state abstraction that both cluster
##! controller and agent use to tie responses to received request events and be
##! able to time-out such requests.
@load ./types
@load ./config
module ClusterController::Request;
export {
## Request records track each request's state.
type Request: record {
## Each request has a hopfully unique ID provided by the requester.
id: string;
## For requests that result based upon another request (such as when
## the controller sends requests to agents based on a request it
## received by the client), this specifies that original, "parent"
## request.
parent_id: string &optional;
};
# API-specific state. XXX we may be able to generalize after this
# has settled a bit more.
# API-specific state. XXX we may be able to generalize after this has
# settled a bit more. It would also be nice to move request-specific
# state out of this module -- we could for example redef Request in
# main.zeek as needed.
# State specific to the set_configuration request/response events
type SetConfigurationState: record {
requests: vector of Request &default=vector();
};
# State specific to the set_nodes request/response events
type SetNodesState: record {
config: ClusterController::Types::Configuration;
requests: vector of Request &default=vector();
};
@ -26,51 +36,105 @@ export {
node: string;
};
# State for testing events
type TestState: record {
};
# The redef is a workaround so we can use the Request type
# while it is still being defined
# while it is still being defined.
redef record Request += {
results: ClusterController::Types::ResultVec &default=vector();
finished: bool &default=F;
set_configuration_state: SetConfigurationState &optional;
set_nodes_state: SetNodesState &optional;
supervisor_state: SupervisorState &optional;
test_state: TestState &optional;
};
## A token request that serves as a null/nonexistant request.
global null_req = Request($id="", $finished=T);
## This function establishes request state.
##
## reqid: the identifier to use for the request.
##
global create: function(reqid: string &default=unique_id("")): Request;
## This function looks up the request for a given request ID and returns
## it. When no such request exists, returns ClusterController::Request::null_req.
##
## reqid: the ID of the request state to retrieve.
##
global lookup: function(reqid: string): Request;
## This function marks a request as complete and causes Zeek to release
## its internal state. When the request does not exist, this does
## nothing.
##
## reqid: the ID of the request state to releaase.
##
global finish: function(reqid: string): bool;
## This event fires when a request times out (as per the
## ClusterController::request_timeout) before it has been finished via
## ClusterController::Request::finish().
##
## req: the request state that is expiring.
##
global request_expired: event(req: Request);
## This function is a helper predicate to indicate whether a given
## request is null.
##
## request: a Request record to check.
##
## Returns: T if the given request matches the null_req instance, F otherwise.
##
global is_null: function(request: Request): bool;
## For troubleshooting, this function renders a request record to a string.
##
## request: the request to render.
##
global to_string: function(request: Request): string;
}
# XXX this needs a mechanism for expiring stale requests
global requests: table[string] of Request;
function requests_expire_func(reqs: table[string] of Request, reqid: string): interval
{
event ClusterController::Request::request_expired(reqs[reqid]);
return 0secs;
}
# This is the global request-tracking table. The table maps from request ID
# strings to corresponding Request records. Entries time out after the
# ClusterController::request_timeout interval. Upon expiration, a
# request_expired event triggers that conveys the request state.
global g_requests: table[string] of Request
&create_expire=ClusterController::request_timeout
&expire_func=requests_expire_func;
function create(reqid: string): Request
{
local ret = Request($id=reqid);
requests[reqid] = ret;
g_requests[reqid] = ret;
return ret;
}
function lookup(reqid: string): Request
{
if ( reqid in requests )
return requests[reqid];
if ( reqid in g_requests )
return g_requests[reqid];
return null_req;
}
function finish(reqid: string): bool
{
if ( reqid !in requests )
if ( reqid !in g_requests )
return F;
local req = requests[reqid];
delete requests[reqid];
local req = g_requests[reqid];
delete g_requests[reqid];
req$finished = T;
@ -84,3 +148,23 @@ function is_null(request: Request): bool
return F;
}
function to_string(request: Request): string
{
local results: string_vec;
local res: ClusterController::Types::Result;
local parent_id = "";
if ( request?$parent_id )
parent_id = fmt(" (via %s)", request$parent_id);
for ( idx in request$results )
{
res = request$results[idx];
results[|results|] = ClusterController::Types::result_to_string(res);
}
return fmt("[request %s%s %s, results: %s]", request$id, parent_id,
request$finished ? "finished" : "pending",
join_string_vec(results, ","));
}

View file

@ -1,4 +1,6 @@
# Types for the Cluster Controller framework. These are used by both agent and controller.
##! This module holds the basic types needed for the Cluster Controller
##! framework. These are used by both agent and controller, and several
##! have corresponding equals in the zeek-client implementation.
module ClusterController::Types;
@ -14,67 +16,96 @@ export {
## A Zeek-side option with value.
type Option: record {
name: string; # Name of option
value: string; # Value of option
name: string; ##< Name of option
value: string; ##< Value of option
};
## Configuration describing a Zeek instance running a Cluster
## Agent. Normally, there'll be one instance per cluster
## system: a single physical system.
type Instance: record {
# Unique, human-readable instance name
## Unique, human-readable instance name
name: string;
# IP address of system
## IP address of system
host: addr;
# Agent listening port. Not needed if agents connect to controller.
## Agent listening port. Not needed if agents connect to controller.
listen_port: port &optional;
};
type InstanceVec: vector of Instance;
## State that a Cluster Node can be in. State changes trigger an
## API notification (see notify_change()).
type State: enum {
Running, # Running and operating normally
Stopped, # Explicitly stopped
Failed, # Failed to start; and permanently halted
Crashed, # Crashed, will be restarted,
Unknown, # State not known currently (e.g., because of lost connectivity)
Running, ##< Running and operating normally
Stopped, ##< Explicitly stopped
Failed, ##< Failed to start; and permanently halted
Crashed, ##< Crashed, will be restarted,
Unknown, ##< State not known currently (e.g., because of lost connectivity)
};
## Configuration describing a Cluster Node process.
type Node: record {
name: string; # Cluster-unique, human-readable node name
instance: string; # Name of instance where node is to run
p: port; # Port on which this node will listen
role: Supervisor::ClusterRole; # Role of the node.
state: State; # Desired, or current, run state.
scripts: vector of string &optional; # Additional Zeek scripts for node
options: set[Option] &optional; # Zeek options for node
interface: string &optional; # Interface to sniff
cpu_affinity: int &optional; # CPU/core number to pin to
env: table[string] of string &default=table(); # Custom environment vars
name: string; ##< Cluster-unique, human-readable node name
instance: string; ##< Name of instance where node is to run
p: port; ##< Port on which this node will listen
role: Supervisor::ClusterRole; ##< Role of the node.
state: State; ##< Desired, or current, run state.
scripts: vector of string &optional; ##< Additional Zeek scripts for node
options: set[Option] &optional; ##< Zeek options for node
interface: string &optional; ##< Interface to sniff
cpu_affinity: int &optional; ##< CPU/core number to pin to
env: table[string] of string &default=table(); ##< Custom environment vars
};
# Data structure capturing a cluster's complete configuration.
## Data structure capturing a cluster's complete configuration.
type Configuration: record {
id: string &default=unique_id(""); # Unique identifier for a particular configuration
id: string &default=unique_id(""); ##< Unique identifier for a particular configuration
## The instances in the cluster.
## XXX we may be able to make this optional
instances: set[Instance];
instances: set[Instance] &default=set();
## The set of nodes in the cluster, as distributed over the instances.
nodes: set[Node];
nodes: set[Node] &default=set();
};
# Return value for request-response API event pairs
## Return value for request-response API event pairs
type Result: record {
reqid: string; # Request ID of operation this result refers to
instance: string; # Name of associated instance (for context)
success: bool &default=T; # True if successful
data: any &optional; # Addl data returned for successful operation
error: string &default=""; # Descriptive error on failure
node: string &optional; # Name of associated node (for context)
reqid: string; ##< Request ID of operation this result refers to
instance: string &default=""; ##< Name of associated instance (for context)
success: bool &default=T; ##< True if successful
data: any &optional; ##< Addl data returned for successful operation
error: string &default=""; ##< Descriptive error on failure
node: string &optional; ##< Name of associated node (for context)
};
type ResultVec: vector of Result;
global result_to_string: function(res: Result): string;
}
function result_to_string(res: Result): string
{
local result = "";
if ( res$success )
result = "success";
else if ( res$error != "" )
result = fmt("error (%s)", res$error);
else
result = "error";
local details: string_vec;
if ( res$reqid != "" )
details[|details|] = fmt("reqid %s", res$reqid);
if ( res$instance != "" )
details[|details|] = fmt("instance %s", res$instance);
if ( res?$node && res$node != "" )
details[|details|] = fmt("node %s", res$node);
if ( |details| > 0 )
result = fmt("%s (%s)", result, join_string_vec(details, ", "));
return result;
}

View file

@ -0,0 +1,25 @@
##! Utility functions for the cluster controller framework, available to agent
##! and controller.
module ClusterController::Util;
export {
## Renders a set of strings to an alphabetically sorted vector.
##
## ss: the string set to convert.
##
## Returns: the vector of all strings in ss.
global set_to_vector: function(ss: set[string]): vector of string;
}
function set_to_vector(ss: set[string]): vector of string
{
local res: vector of string;
for ( s in ss )
res[|res|] = s;
sort(res, strcmp);
return res;
}

View file

@ -22,7 +22,7 @@ export {
type dir: enum { NONE, INCOMING, OUTGOING, BOTH };
option valids: table[Analyzer::Tag, addr, port] of dir = {
option valids: table[AllAnalyzers::Tag, addr, port] of dir = {
# A couple of ports commonly used for benign HTTP servers.
# For now we want to see everything.
@ -45,7 +45,7 @@ export {
# log files, this also saves memory because for these we don't
# need to remember which servers we already have reported, which
# for some can be a lot.
option suppress_servers: set [Analyzer::Tag] = {
option suppress_servers: set [AllAnalyzers::Tag] = {
# Analyzer::ANALYZER_HTTP
};
@ -61,7 +61,7 @@ export {
# Entry point for other analyzers to report that they recognized
# a certain (sub-)protocol.
global found_protocol: function(c: connection, analyzer: Analyzer::Tag,
global found_protocol: function(c: connection, analyzer: AllAnalyzers::Tag,
protocol: string);
# Table keeping reported (server, port, analyzer) tuples (and their
@ -74,7 +74,7 @@ export {
}
# Table that tracks currently active dynamic analyzers per connection.
global conns: table[conn_id] of set[Analyzer::Tag];
global conns: table[conn_id] of set[AllAnalyzers::Tag];
# Table of reports by other analyzers about the protocol used in a connection.
global protocols: table[conn_id] of set[string];
@ -84,7 +84,7 @@ type protocol : record {
sub: string; # "sub-protocols" reported by other sources
};
function get_protocol(c: connection, a: Analyzer::Tag) : protocol
function get_protocol(c: connection, a: AllAnalyzers::Tag) : protocol
{
local str = "";
if ( c$id in protocols )
@ -101,7 +101,7 @@ function fmt_protocol(p: protocol) : string
return p$sub != "" ? fmt("%s (via %s)", p$sub, p$a) : p$a;
}
function do_notice(c: connection, a: Analyzer::Tag, d: dir)
function do_notice(c: connection, a: AllAnalyzers::Tag, d: dir)
{
if ( d == BOTH )
return;
@ -198,7 +198,7 @@ hook finalize_protocol_detection(c: connection)
report_protocols(c);
}
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count)
event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count)
{
# Don't report anything running on a well-known port.
if ( c$id$resp_p in Analyzer::registered_ports(atype) )
@ -219,7 +219,7 @@ event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count)
}
}
function found_protocol(c: connection, atype: Analyzer::Tag, protocol: string)
function found_protocol(c: connection, atype: AllAnalyzers::Tag, protocol: string)
{
# Don't report anything running on a well-known port.
if ( c$id$resp_p in Analyzer::registered_ports(atype) )

View file

@ -11,7 +11,7 @@ module DPD;
export {
redef record Info += {
## A chunk of the payload that most likely resulted in the
## protocol violation.
## analyzer violation.
packet_segment: string &optional &log;
};
@ -20,7 +20,7 @@ export {
}
event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count,
reason: string) &priority=4
{
if ( ! c?$dpd ) return;

View file

@ -1,4 +1,3 @@
module Files;
export {

View file

@ -27,4 +27,3 @@ hook Intel::extend_match(info: Info, s: Seen, items: set[Item]) &priority=9
# Prevent logging
break;
}

View file

@ -35,4 +35,3 @@ event zeek_init()
if ( trim_interval > 0 secs )
schedule trim_interval { TrimTraceFile::go(T) };
}

View file

@ -262,7 +262,7 @@ function known_services_done(c: connection)
}
if ( ! has_active_service(c) )
# If we're here during a protocol_confirmation, it's still premature
# If we're here during a analyzer_confirmation, it's still premature
# to declare there's an actual service, so wait for the connection
# removal to check again (to get more timely reporting we'd have
# schedule some recurring event to poll for handshake/activity).
@ -293,7 +293,7 @@ function known_services_done(c: connection)
event service_info_commit(info);
}
event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=-5
event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=-5
{
known_services_done(c);
}
@ -314,4 +314,3 @@ event zeek_init() &priority=5
$path="known_services",
$policy=log_policy_services]);
}

View file

@ -79,4 +79,3 @@ event log_smtp(rec: Info)
}
}
}

View file

@ -49,4 +49,3 @@ event ssh_auth_successful(c: connection, auth_method_none: bool)
check_ssh_hostname(c$id, c$uid, host);
}
}

View file

@ -24,6 +24,7 @@
# @load frameworks/cluster/controller/main.zeek
@load frameworks/cluster/controller/request.zeek
@load frameworks/cluster/controller/types.zeek
@load frameworks/cluster/controller/util.zeek
@load frameworks/dpd/detect-protocols.zeek
@load frameworks/dpd/packet-segment-logging.zeek
@load frameworks/intel/do_notice.zeek

@ -1 +1 @@
Subproject commit d31b51e6a06ad4c71db81981920eb753954abbf8
Subproject commit cb626c94f67e0ac0437beba076da1184eb1f8ad7

View file

@ -280,7 +280,7 @@ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ZAM-AssignFlavorsDefs.h
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
set_source_files_properties(nb_dns.c PROPERTIES COMPILE_FLAGS
set_source_files_properties(3rdparty/nb_dns.c PROPERTIES COMPILE_FLAGS
-fno-strict-aliasing)
set(MAIN_SRCS
@ -297,7 +297,6 @@ set(MAIN_SRCS
CCL.cc
CompHash.cc
Conn.cc
ConvertUTF.c
DFA.cc
DbgBreakpoint.cc
DbgHelp.cc
@ -367,13 +366,6 @@ set(MAIN_SRCS
ZeekArgs.cc
ZeekString.cc
ZVal.cc
bsd-getopt-long.c
bro_inet_ntop.c
in_cksum.cc
patricia.c
setsignal.c
strsep.c
modp_numtoa.c
supervisor/Supervisor.cc
@ -387,7 +379,6 @@ set(MAIN_SRCS
plugin/Component.cc
plugin/ComponentManager.h
plugin/TaggedComponent.h
plugin/Manager.cc
plugin/Plugin.cc
@ -399,9 +390,10 @@ set(MAIN_SRCS
script_opt/CPP/Exprs.cc
script_opt/CPP/Func.cc
script_opt/CPP/GenFunc.cc
script_opt/CPP/HashMgr.cc
script_opt/CPP/Inits.cc
script_opt/CPP/RuntimeInit.cc
script_opt/CPP/InitsInfo.cc
script_opt/CPP/RuntimeInits.cc
script_opt/CPP/RuntimeInitSupport.cc
script_opt/CPP/RuntimeOps.cc
script_opt/CPP/RuntimeVec.cc
script_opt/CPP/Stmts.cc
@ -437,12 +429,20 @@ set(MAIN_SRCS
script_opt/ZAM/ZInst.cc
script_opt/ZAM/ZOp.cc
nb_dns.c
digest.h
)
set(THIRD_PARTY_SRCS
3rdparty/bro_inet_ntop.c
3rdparty/bsd-getopt-long.c
3rdparty/ConvertUTF.c
3rdparty/in_cksum.cc
3rdparty/modp_numtoa.c
3rdparty/nb_dns.c
3rdparty/patricia.c
3rdparty/setsignal.c
3rdparty/sqlite3.c
3rdparty/strsep.c
)
set(GEN_ZAM_SRCS
@ -620,7 +620,15 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/
)
install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/ConvertUTF.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/bro_inet_ntop.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/bsd-getopt-long.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/modp_numtoa.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/nb_dns.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/patricia.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/setsignal.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h
${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/doctest.h
DESTINATION include/zeek/3rdparty
)

View file

@ -501,6 +501,11 @@ bool CompositeHash::SingleValHash(HashKey& hk, const Val* v, Type* bt, bool type
return true;
}
// All of the rest of the code here depends on v not being null, since it needs
// to get values from it.
if ( ! v )
return false;
switch ( t )
{
case TYPE_INTERNAL_INT:
@ -695,7 +700,7 @@ bool CompositeHash::SingleValHash(HashKey& hk, const Val* v, Type* bt, bool type
}
break;
case TYPE_INTERNAL_ERROR:
default:
return false;
}

View file

@ -286,7 +286,7 @@ analyzer::Analyzer* Connection::FindAnalyzer(analyzer::ID id)
return adapter ? adapter->FindChild(id) : nullptr;
}
analyzer::Analyzer* Connection::FindAnalyzer(const analyzer::Tag& tag)
analyzer::Analyzer* Connection::FindAnalyzer(const zeek::Tag& tag)
{
return adapter ? adapter->FindChild(tag) : nullptr;
}

View file

@ -11,12 +11,12 @@
#include "zeek/IPAddr.h"
#include "zeek/IntrusivePtr.h"
#include "zeek/Rule.h"
#include "zeek/Tag.h"
#include "zeek/Timer.h"
#include "zeek/UID.h"
#include "zeek/WeirdState.h"
#include "zeek/ZeekArgs.h"
#include "zeek/analyzer/Analyzer.h"
#include "zeek/analyzer/Tag.h"
#include "zeek/iosource/Packet.h"
#include "zeek/session/Session.h"
@ -136,7 +136,7 @@ public:
void FlipRoles();
analyzer::Analyzer* FindAnalyzer(analyzer::ID id);
analyzer::Analyzer* FindAnalyzer(const analyzer::Tag& tag); // find first in tree.
analyzer::Analyzer* FindAnalyzer(const zeek::Tag& tag); // find first in tree.
analyzer::Analyzer* FindAnalyzer(const char* name); // find first in tree.
TransportProto ConnTransport() const { return proto; }

View file

@ -1,755 +0,0 @@
/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
*
* The LLVM Compiler Infrastructure
*
* This file is distributed under the University of Illinois Open Source
* License:
*
* University of Illinois/NCSA
* Open Source License
*
* Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
* All rights reserved.
*
* Developed by:
*
* LLVM Team
*
* University of Illinois at Urbana-Champaign
*
* http://llvm.org
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal with the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the
* following disclaimers.
*
* * Redistributions in binary form must reproduce the
* above copyright notice, this list of conditions and
* the following disclaimers in the documentation and/or
* other materials provided with the distribution.
*
* * Neither the names of the LLVM Team, University of
* Illinois at Urbana-Champaign, nor the names of its
* contributors may be used to endorse or promote
* products derived from this Software without specific
* prior written permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS WITH THE SOFTWARE.
*
*===------------------------------------------------------------------------=*/
/*
* Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
/* ---------------------------------------------------------------------
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
Author: Mark E. Davis, 1994.
Rev History: Rick McGowan, fixes & updates May 2001.
Sept 2001: fixed const & error conditions per
mods suggested by S. Parent & A. Lillich.
June 2002: Tim Dodd added detection and handling of incomplete
source sequences, enhanced error detection, added casts
to eliminate compiler warnings.
July 2003: slight mods to back out aggressive FFFE detection.
Jan 2004: updated switches in from-UTF8 conversions.
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
See the header file "ConvertUTF.h" for complete documentation.
------------------------------------------------------------------------ */
#include "zeek/ConvertUTF.h"
#ifdef CVTUTF_DEBUG
#include <stdio.h>
#endif
#include <assert.h>
static const int halfShift = 10; /* used for shifting by 10 bits */
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
#define UNI_SUR_HIGH_START (UTF32)0xD800
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
#define UNI_SUR_LOW_START (UTF32)0xDC00
#define UNI_SUR_LOW_END (UTF32)0xDFFF
#define false 0
#define true 1
/* --------------------------------------------------------------------- */
/*
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
/*
* Magic values subtracted from a buffer value during UTF8 conversion.
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
/* --------------------------------------------------------------------- */
/* The interface converts a whole buffer to avoid function-call overhead.
* Constants have been gathered. Loops & conditionals have been removed as
* much as possible for efficiency, in favor of drop-through switches.
* (See "Note A" at the bottom of the file for equivalent code.)
* If your compiler supports it, the "isLegalUTF8" call can be turned
* into an inline function.
*/
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF16 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
if (target >= targetEnd) {
result = targetExhausted; break;
}
ch = *source++;
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /* normal case */
}
} else if (ch > UNI_MAX_LEGAL_UTF32) {
if (flags == strictConversion) {
result = sourceIllegal;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
--source; /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF32 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF32* target = *targetStart;
UTF32 ch, ch2;
while (source < sourceEnd) {
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source buffer... */
if (source < sourceEnd) {
ch2 = *source;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
if (target >= targetEnd) {
source = oldSource; /* Back up source pointer! */
result = targetExhausted; break;
}
*target++ = ch;
}
*sourceStart = source;
*targetStart = target;
#ifdef CVTUTF_DEBUG
if (result == sourceIllegal) {
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
fflush(stderr);
}
#endif
return result;
}
ConversionResult ConvertUTF16toUTF8 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF16* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source buffer... */
if (source < sourceEnd) {
UTF32 ch2 = *source;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
} else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */
result = sourceExhausted;
break;
}
} else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/* Figure out how many bytes the result will require */
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
}
target += bytesToWrite;
if (target > targetEnd) {
source = oldSource; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF8 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF32* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
ch = *source++;
if (flags == strictConversion ) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = sourceIllegal;
break;
}
}
/*
* Figure out how many bytes the result will require. Turn any
* illegally large UTF32 things (> Plane 17) into replacement chars.
*/
if (ch < (UTF32)0x80) { bytesToWrite = 1;
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
} else { bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal;
}
target += bytesToWrite;
if (target > targetEnd) {
--source; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
/*
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
* If not calling this from ConvertUTF8to*, then the length can be set by:
* length = trailingBytesForUTF8[*source]+1;
* and the sequence is illegal right away if there aren't that many bytes
* available.
* If presented with a length > 4, this returns false. The Unicode
* definition of UTF-8 goes up to 4-byte sequences.
*/
static Boolean isLegalUTF8(const UTF8 *source, int length) {
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
default: return false;
/* Everything else falls through when "true"... */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return false; break;
case 0xED: if (a > 0x9F) return false; break;
case 0xF0: if (a < 0x90) return false; break;
case 0xF4: if (a > 0x8F) return false; break;
default: if (a < 0x80) return false;
}
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
}
if (*source > 0xF4) return false;
return true;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return whether a UTF-8 sequence is legal or not.
* This is not used here; it's just exported.
*/
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
int length = trailingBytesForUTF8[*source]+1;
if (length > sourceEnd - source) {
return false;
}
return isLegalUTF8(source, length);
}
/* --------------------------------------------------------------------- */
static unsigned
findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
const UTF8 *sourceEnd) {
UTF8 b1, b2, b3;
assert(!isLegalUTF8Sequence(source, sourceEnd));
/*
* Unicode 6.3.0, D93b:
*
* Maximal subpart of an ill-formed subsequence: The longest code unit
* subsequence starting at an unconvertible offset that is either:
* a. the initial subsequence of a well-formed code unit sequence, or
* b. a subsequence of length one.
*/
if (source == sourceEnd)
return 0;
/*
* Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
* Byte Sequences.
*/
b1 = *source;
++source;
if (b1 >= 0xC2 && b1 <= 0xDF) {
/*
* First byte is valid, but we know that this code unit sequence is
* invalid, so the maximal subpart has to end after the first byte.
*/
return 1;
}
if (source == sourceEnd)
return 1;
b2 = *source;
++source;
if (b1 == 0xE0) {
return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 >= 0xE1 && b1 <= 0xEC) {
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 == 0xED) {
return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
}
if (b1 >= 0xEE && b1 <= 0xEF) {
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
}
if (b1 == 0xF0) {
if (b2 >= 0x90 && b2 <= 0xBF) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
if (b1 >= 0xF1 && b1 <= 0xF3) {
if (b2 >= 0x80 && b2 <= 0xBF) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
if (b1 == 0xF4) {
if (b2 >= 0x80 && b2 <= 0x8F) {
if (source == sourceEnd)
return 2;
b3 = *source;
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
}
return 1;
}
assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
/*
* There are no valid sequences that start with these bytes. Maximal subpart
* is defined to have length 1 in these cases.
*/
return 1;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return the total number of bytes in a codepoint
* represented in UTF-8, given the value of the first byte.
*/
unsigned getNumBytesForUTF8(UTF8 first) {
return trailingBytesForUTF8[first] + 1;
}
/* --------------------------------------------------------------------- */
/*
* Exported function to return whether a UTF-8 string is legal or not.
* This is not used here; it's just exported.
*/
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
while (*source != sourceEnd) {
int length = trailingBytesForUTF8[**source] + 1;
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
return false;
*source += length;
}
return true;
}
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (extraBytesToRead >= sourceEnd - source) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
break;
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
case 3: ch += *source++; ch <<= 6;
case 2: ch += *source++; ch <<= 6;
case 1: ch += *source++; ch <<= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (target >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = (UTF16)ch; /* normal case */
}
} else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) {
result = sourceIllegal;
source -= (extraBytesToRead+1); /* return to the start */
break; /* Bail out; shouldn't continue */
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */
static ConversionResult ConvertUTF8toUTF32Impl(
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
Boolean InputIsPartial) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF32* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (extraBytesToRead >= sourceEnd - source) {
if (flags == strictConversion || InputIsPartial) {
result = sourceExhausted;
break;
} else {
result = sourceIllegal;
/*
* Replace the maximal subpart of ill-formed sequence with
* replacement character.
*/
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
sourceEnd);
*target++ = UNI_REPLACEMENT_CHAR;
continue;
}
}
if (target >= targetEnd) {
result = targetExhausted; break;
}
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
if (flags == strictConversion) {
/* Abort conversion. */
break;
} else {
/*
* Replace the maximal subpart of ill-formed sequence with
* replacement character.
*/
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
sourceEnd);
*target++ = UNI_REPLACEMENT_CHAR;
continue;
}
}
/*
* The cases all fall through. See "Note A" below.
*/
switch (extraBytesToRead) {
case 5: ch += *source++; ch <<= 6;
case 4: ch += *source++; ch <<= 6;
case 3: ch += *source++; ch <<= 6;
case 2: ch += *source++; ch <<= 6;
case 1: ch += *source++; ch <<= 6;
case 0: ch += *source++;
}
ch -= offsetsFromUTF8[extraBytesToRead];
if (ch <= UNI_MAX_LEGAL_UTF32) {
/*
* UTF-16 surrogate values are illegal in UTF-32, and anything
* over Plane 17 (> 0x10FFFF) is illegal.
*/
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
if (flags == strictConversion) {
source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
*target++ = ch;
}
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
result = sourceIllegal;
*target++ = UNI_REPLACEMENT_CHAR;
}
}
*sourceStart = source;
*targetStart = target;
return result;
}
ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
const UTF8 *sourceEnd,
UTF32 **targetStart,
UTF32 *targetEnd,
ConversionFlags flags) {
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
flags, /*InputIsPartial=*/true);
}
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
const UTF8 *sourceEnd, UTF32 **targetStart,
UTF32 *targetEnd, ConversionFlags flags) {
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
flags, /*InputIsPartial=*/false);
}
/* ---------------------------------------------------------------------
Note A.
The fall-through switches in UTF-8 reading code save a
temp variable, some decrements & conditionals. The switches
are equivalent to the following loop:
{
int tmpBytesToRead = extraBytesToRead+1;
do {
ch += *source++;
--tmpBytesToRead;
if (tmpBytesToRead) ch <<= 6;
} while (tmpBytesToRead > 0);
}
In UTF-8 writing code, the switches on "bytesToWrite" are
similarly unrolled loops.
--------------------------------------------------------------------- */

Some files were not shown because too many files have changed in this diff Show more