diff --git a/.cirrus.yml b/.cirrus.yml index 6157b8fb5d..c1f963cf6c 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -10,10 +10,11 @@ btest_jobs: &BTEST_JOBS 4 btest_retries: &BTEST_RETRIES 2 memory: &MEMORY 4GB -config: &CONFIG --build-type=release --enable-cpp-tests --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install -static_config: &STATIC_CONFIG --build-type=release --enable-cpp-tests --disable-broker-tests --enable-static-broker --enable-static-binpac --prefix=$CIRRUS_WORKING_DIR/install -sanitizer_config: &SANITIZER_CONFIG --build-type=debug --enable-cpp-tests --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers --enable-coverage -mobile_ipv6_config: &MOBILE_IPV6_CONFIG --build-type=release --enable-cpp-tests --enable-mobile-ipv6 --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install +config: &CONFIG --build-type=release --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install +static_config: &STATIC_CONFIG --build-type=release --disable-broker-tests --enable-static-broker --enable-static-binpac --prefix=$CIRRUS_WORKING_DIR/install +sanitizer_config: &SANITIZER_CONFIG --build-type=debug --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers --enable-coverage +mobile_ipv6_config: &MOBILE_IPV6_CONFIG --build-type=release --enable-mobile-ipv6 --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install +openssl30_config: &OPENSSL30_CONFIG --build-type=release --disable-broker-tests --with-openssl=/opt/openssl --prefix=$CIRRUS_WORKING_DIR/install resources_template: &RESOURCES_TEMPLATE cpu: *CPUS @@ -93,6 +94,13 @@ env: # Linux EOL timelines: https://linuxlifecycle.com/ # Fedora (~13 months): https://fedoraproject.org/wiki/Fedora_Release_Life_Cycle +fedora35_task: + container: + # Fedora 35 EOL: Around Dec 2022 + dockerfile: ci/fedora-35/Dockerfile + << : *RESOURCES_TEMPLATE + << : *CI_TEMPLATE + fedora34_task: container: # Fedora 34 EOL: Around May 2022 @@ -212,16 +220,16 @@ alpine_task: # Apple doesn't publish official long-term support timelines. # We aim to support both the current and previous macOS release. -macos_big_sur_task: +macos_monterey_task: macos_instance: - image: big-sur-xcode-12.5 + image: monterey-xcode-13.1 prepare_script: ./ci/macos/prepare.sh << : *CI_TEMPLATE << : *MACOS_RESOURCES_TEMPLATE -macos_catalina_task: +macos_big_sur_task: macos_instance: - image: catalina-xcode + image: big-sur-xcode-12.5 prepare_script: ./ci/macos/prepare.sh << : *CI_TEMPLATE << : *MACOS_RESOURCES_TEMPLATE @@ -261,6 +269,17 @@ freebsd12_task: prepare_script: ./ci/freebsd/prepare.sh << : *CI_TEMPLATE +# This can be removed as soon as the first distribution that we use ships +# OpenSSL 3.0 +openssl30_task: + container: + # Tweaked Ubuntu 20.04 EOL: April 2025 + dockerfile: ci/openssl-3.0/Dockerfile + << : *RESOURCES_TEMPLATE + << : *CI_TEMPLATE + env: + ZEEK_CI_CONFIGURE_FLAGS: *OPENSSL30_CONFIG + sanitizer_task: container: # Just uses a recent/common distro to run memory error/leak checks. diff --git a/.clang-format b/.clang-format index a105b7df33..4c628b3465 100644 --- a/.clang-format +++ b/.clang-format @@ -1,10 +1,5 @@ # Clang-format configuration for Zeek. This configuration requires # at least clang-format 12.0.1 to format correctly. -# -# The easiest way to run this from the command-line is using the -# python script in auxil/run-clang-format: -# -# python3 auxil/run-clang-format/run-clang-format.py --clang-format-executable /path/to/clang-format -r src -i Language: Cpp Standard: c++17 @@ -102,4 +97,4 @@ IncludeCategories: - Regex: '^"zeek/' Priority: 4 - Regex: '.*' - Priority: 5 \ No newline at end of file + Priority: 5 diff --git a/.clang-format-ignore b/.clang-format-ignore deleted file mode 100644 index 50f6bce6a5..0000000000 --- a/.clang-format-ignore +++ /dev/null @@ -1,17 +0,0 @@ -# Ignore everything 3rdparty -src/3rdparty/* - -# These are files that are technically sourced from other places but aren't in 3rdparty -# and shouldn't be reformatted. -src/ConvertUTF.* -src/bro_inet_ntop.* -src/bsd-getopt-long.* -src/in_cksum.* -src/nb_dns.* -src/modp_numtoa.* -src/patricia.* -src/strsep.c -src/setsignal.c - -# These files are generated code -src/DebugCmdInfoConstants.* \ No newline at end of file diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..4a41859c7b --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,26 @@ +# Reformat the world (initial clang-format formatting) +b2f171ec69eae3a833a9db1b16e5234bd3eaf0b6 + +# clang-format: Force zeek-config.h to be earlier in the config ordering +9cb54f5d449b63006cc9a1f451a47732c92fef2d + +# clang-format: A few minor comment-spacing fixes +07e276ab2e351ce71b709139f1933b9ead40d094 + +# clang-format: Enforce ordering of includes in ZBody +cb99ae2b7c9988656b097ad2789dffd2c0c37939 + +# clang-format: Other include ordering changes +e97c14add5b04aedc7f3f9dba59f665cbad793af + +# clang-format: Other minor formatting changes +02206f3215f977ba7752476ba89ca06abe93375c + +# clang-format: Set IndentCaseBlocks to false +4423574d265749da8e707ab0fbcffcbfaed26614 + +# clang-format: Set penalty for breaking after assignment operator +9af6b2f48d11b4e287d0f18034a486f76f9f2d61 + +# Remove trailing whitespace from script files +a6378531dbc5c357926d98fe785bb719cc70e1b4 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 8253d3d4e8..738cbf727e 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -13,12 +13,17 @@ defaults: run: shell: bash +env: + IMAGE_NAME: zeek-image.tar + IMAGE_FILE: /tmp/zeek-image.tar + IMAGE_PATH: /tmp + jobs: - build: + docker-build: runs-on: ubuntu-latest env: TEST_TAG: zeek:latest - CONFFLAGS: --generator=Ninja --build-type=Release + CONFFLAGS: --generator=Ninja --build-type=Release --enable-zeek-client steps: - uses: actions/checkout@v2 with: @@ -27,7 +32,8 @@ jobs: # Create and boot a loader. This will e.g., provide caching # so we avoid rebuilds of the same image after this step. - uses: docker/setup-buildx-action@v1 - - name: Build + + - name: Build image uses: docker/build-push-action@v2 with: context: ./ @@ -40,9 +46,13 @@ jobs: - name: Run btests run: make -C docker/btest + - name: Save image tarball + run: docker save -o ${{ env.IMAGE_FILE }} ${{ env.TEST_TAG }} + - name: Get version id: version run: echo "::set-output name=RELEASE_VERSION::$(cat VERSION)" + - name: Compute target tag id: target env: @@ -59,21 +69,22 @@ jobs: echo "::set-output name=tag::zeek:latest" elif [ "${GITHUB_REF}" = "refs/heads/master" ]; then echo "::set-output name=tag::zeek-dev:latest" - elif [[ "${GITHUB_REF}" = refs/heads/v* ]] && [[ "${GITHUB_REF}" != refs/heads/v*-dev ]]; then + elif [[ "${GITHUB_REF}" = refs/tags/v* ]] && [[ "${GITHUB_REF}" != refs/tags/v*-dev ]]; then echo "::set-output name=tag::zeek:${RELEASE_VERSION}" fi - name: Login to DockerHub uses: docker/login-action@v1 - # Secrets for the login are not available for pull requests. - if: github.event_name == 'push' + # Don't publish on forks. Also note that secrets for the login are not + # available for pull requests, so trigger on pushes only. + if: github.repository == 'zeek/zeek' && github.event_name == 'push' with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - name: Push + - name: Push image # Only publish if we did compute a tag. - if: github.event_name == 'push' && steps.target.outputs.tag != '' + if: github.repository == 'zeek/zeek' && github.event_name == 'push' && steps.target.outputs.tag != '' uses: docker/build-push-action@v2 with: context: ./ @@ -84,10 +95,65 @@ jobs: tags: | zeekurity/${{ steps.target.outputs.tag }} - - name: Preserve artifacts + - name: Preserve image artifact + uses: actions/upload-artifact@v2 + with: + name: ${{ env.IMAGE_NAME }} + path: ${{ env.IMAGE_FILE }} + retention-days: 1 + + - name: Preserve btest artifacts uses: actions/upload-artifact@v2 if: failure() with: name: docker-btest path: docker/btest/.tmp if-no-files-found: ignore + + cluster-testing: + # We need the Zeek Docker image build job to complete first, since we need + # the resulting image for our docker-compose setup. + needs: docker-build + runs-on: ubuntu-latest + steps: + # Grab the sources so we have access to btest. Could also use pip, but it + # seems appealing to be using the in-tree version of btest. btest is in a + # submodule; we check it out selectively to save time. + - uses: actions/checkout@v2 + - name: Check out btest + run: git submodule update --init ./auxil/btest + + - name: Download Docker image artifact + uses: actions/download-artifact@v2 + with: + name: ${{ env.IMAGE_NAME }} + path: ${{ env.IMAGE_PATH }} + + - name: Load Docker image + run: | + docker load --input ${{ env.IMAGE_FILE }} + docker tag zeek:latest zeektest:latest + + # The testsuite ref to use for this version of Zeek is stored in a file in + # the Zeek source tree. + - name: Get testsuite version + run: | + echo "TESTSUITE_COMMIT=$(cat ./testing/external/commit-hash.zeek-testing-cluster)" >> $GITHUB_ENV + + - name: Retrieve cluster testsuite + uses: actions/checkout@v2 + with: + repository: zeek/zeek-testing-cluster + path: testing/external/zeek-testing-cluster + ref: ${{ ENV.TESTSUITE_COMMIT }} + + - name: Run testsuite + run: make -C testing/external/zeek-testing-cluster + + - name: Preserve btest artifacts + uses: actions/upload-artifact@v2 + if: failure() + with: + name: cluster-btest + path: testing/external/zeek-testing-cluster/.tmp + if-no-files-found: ignore diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000000..925b8accd7 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,14 @@ +name: pre-commit + +on: + pull_request: + push: + branches: [master] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: pre-commit/action@v2.0.3 diff --git a/.gitmodules b/.gitmodules index e3b149e9f9..4318212fe0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -49,6 +49,3 @@ [submodule "auxil/zeek-client"] path = auxil/zeek-client url = https://github.com/zeek/zeek-client -[submodule "auxil/run-clang-format"] - path = auxil/run-clang-format - url = https://github.com/Sarcasm/run-clang-format diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..50844cf58b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +# +repos: +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: 'v13.0.0' + hooks: + - id: clang-format + +- repo: https://github.com/maxwinterstein/shfmt-py + rev: 3.3.1.8 + hooks: + - id: shfmt + args: ["-w", "-i", "4", "-ci"] + +- repo: https://github.com/pre-commit/mirrors-yapf + rev: v0.31.0 + hooks: + - id: yapf diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 0000000000..b05085101b --- /dev/null +++ b/.style.yapf @@ -0,0 +1,2 @@ +[style] +column_limit=100 diff --git a/CHANGES b/CHANGES index d438502f84..78b6f1bffd 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,480 @@ +4.2.0-dev.514 | 2022-01-03 13:56:12 -0700 + + * deprecation warning on use of out-of-scope local (Vern Paxson, Corelight) + +4.2.0-dev.510 | 2022-01-03 13:54:52 -0700 + + * Switch BitTorrent analyzer to Zeek's regex engine (Avinal Kumar) + + - Removes dependency on + - Replaces regex function with Zeek's standard regex functions + - Some replacements are workaround, may be improved later via an + appropiate API + - Update test baseline to fix what seems to be capturing on a bug in the + existing code. + + Edit pass by Robin Sommer. Note that our test doesn't cover all the code + paths, but it does go through the one with the most substantial change. + + * Adding test for BitTorrent tracker. (Robin Sommer, Corelight) + + Our test trace is extracted from https://www.cloudshark.org/captures/b9089aac6eee. + + There actually seems to be a bug in the existing code: the URI passed to + bt_tracker_request() includes a partial HTTP version. This commits + includes the baseline as the current code produces it, we'll fix that in + a subsequent comment. + +4.2.0-dev.506 | 2022-01-03 09:33:43 -0800 + + * Expansion of the emerging cluster controller framework (Christian Kreibich, Corelight) + + - Controller/agent connectivity is now controlled by pushed configurations + - The Request module now supports state timeouts + - Use Result records consistently for responses to the client + - Track successful config deployment in cluster controller + - Add ClusterController::API::notify_agents_ready event + - Make all globals start with a "g_" prefix + - Add missing debug() log function to log module's API + - Add separate utility module for controller and agent + - Additional infrastructure for printing types + - Bump zeek-client to v0.2.0 + - Add Github action job for cluster tests + - Tweak Docker image configure invocation to include zeek-client + - Zeekygen documentation pass + +4.2.0-dev.477 | 2021-12-14 16:53:57 -0700 + + * fixes for double-delete and reducing '?' operator with constant alternatives (Vern Paxson, Corelight) + + * correct usage info for -u flag; -uu no longer supported (Vern Paxson, Corelight) + +4.2.0-dev.468 | 2021-12-14 11:34:47 -0700 + + * factoring of generating C++ initializations, no semantic changes (Vern Paxson, Corelight) + + * restored support for incremental compilation of scripts to C++ (Vern Paxson, Corelight) + + * fixes for -O gen-standalone-C++ (Vern Paxson, Corelight) + + * new ZEEK_FILE_ONLY and ZEEK_FUNC_ONLY environment variables for debugging script optimization - replaces ZEEK_ONLY (Vern Paxson, Corelight) + + * fix for compiling record constructors to C++ (Vern Paxson, Corelight) + + * fixes for compiling vector operations to C++ (Vern Paxson, Corelight) + + * fixed for profiling missing some profile elements (Vern Paxson, Corelight) + + * minor efficiency tweak for ZAM record construction (Vern Paxson, Corelight) + +4.2.0-dev.456 | 2021-12-14 09:23:47 -0700 + + * GH-1860: Add double_to_int() bif (Tim Wojtulewicz, Corelight) + +4.2.0-dev.454 | 2021-12-13 09:41:32 -0700 + + * Check for sets before attempting to check for same Yield types (Tim Wojtulewicz) + + * Add early bail-outs to same_type() (Tim Wojtulewicz) + + * Fix types for Analyzer::register_for_port(s) to be the same (Tim Wojtulewicz) + + * Update cmake submodule across all other submodules (Tim Wojtulewicz, Corelight) + +4.2.0-dev.448 | 2021-12-10 15:35:34 -0700 + + * update btest to no longer use (unsupported) %S formatting, no longer needed (Vern Paxson, Corelight) + + * replace --optimize-only with --optimize-funcs and --optimize-files (Vern Paxson, Corelight) + +4.2.0-dev.444 | 2021-12-10 13:13:13 -0700 + + * reintroduction of "-O add-C++" option (Vern Paxson, Corelight) + +4.2.0-dev.442 | 2021-12-10 13:12:43 -0700 + + * fixes for vector operations (Vern Paxson, Corelight) + + * flag globals initialized to opaque values as non-compilable (Vern Paxson, Corelight) + + * skip type signatures for lambdas (Vern Paxson, Corelight) + + * fix for translating filenames beginning with numbers to C++ variable names (Vern Paxson, Corelight) + +4.2.0-dev.436 | 2021-12-10 13:11:36 -0700 + + * update script-to-C++ compilation for new record constructor internals (Vern Paxson, Corelight) + +4.2.0-dev.434 | 2021-12-10 13:11:10 -0700 + + * updates to ZAM to track recent changes in script semantics (Vern Paxson, Corelight) + +4.2.0-dev.432 | 2021-12-10 09:28:23 -0700 + + * GH-1741: Print error if calling a non-hook with hook keyword (Tim Wojtulewicz, Corelight) + + * GH-1740: Report a better error message if table key is not a list (Tim Wojtulewicz, Corelight) + +4.2.0-dev.428 | 2021-12-09 14:58:53 -0700 + + * GH-1125: Support GRE ARUBA headers (Tim Wojtulewicz, Corelight) + + * Fix ethertype for ARP in Geneve forwarding rules (Tim Wojtulewicz, Corelight) + +4.2.0-dev.425 | 2021-12-09 13:45:17 -0800 + + * Add LogAscii::json_include_unset_fields flag to control unset field rendering (Christian Kreibich, Corelight) + +4.2.0-dev.423 | 2021-12-09 19:56:43 +0000 + + * Improve error message for clash between variable and function name (Johanna Amann, Corelight) + Fixes GH-1832 + + * Restore --disable-zeekctl configure argument (Tim Wojtulewicz, Corelight) + + * Update plugin.hooks baseline for recent Geneve change (Tim Wojtulewicz, Corelight) + +4.2.0-dev.419 | 2021-12-07 09:34:45 -0700 + + * GH-1764: Update mappings for Geneve analyzer to IP4/IP6/ARP (Tim Wojtulewicz, Corelight) + +4.2.0-dev.417 | 2021-12-06 17:00:16 -0800 + + * Flip C++ unit tests to being enabled by default (Christian Kreibich, Corelight) + + To disable them, configure with --disable-cpp-tests. + + * Support for unit tests in plugins (Christian Kreibich, Corelight) + +4.2.0-dev.410 | 2021-12-06 11:29:32 -0700 + + * Remove separate Tag types, note breaking change in NEWS (Tim Wojtulewicz, Corelight) + +4.2.0-dev.408 | 2021-12-06 09:15:24 -0700 + + * GH-1768: Properly cleanup existing log stream when recreated on with the same ID (Tim Wojtulewicz, Corelight) + +4.2.0-dev.406 | 2021-12-01 10:32:34 -0700 + + * Format Python scripts with yapf. (Benjamin Bannier, Corelight) + + We also add a very basic yapf configuration file. Most of the changes in + this patch were performed automatically, but we broke one overly long + string into multiple components on `src/make_dbg_constants.py`. + + * Format shell scripts with shfmt. (Benjamin Bannier, Corelight) + + All changes in this patch were performed automatically with `shfmt` with + configuration flags specified in `.pre-commit-config.yaml`. + +4.2.0-dev.403 | 2021-12-01 10:25:32 -0700 + + * fix btest comment to more accurately describe the test (Vern Paxson, Corelight) + + * btests for erroneous script conditionals (Vern Paxson, Corelight) + + * avoid compiling-to-C++ for functions potentially influenced by conditionals (Vern Paxson, Corelight) + + * track the use of conditionals in functions and files (Vern Paxson, Corelight) + + * AST profiles track the associated function/body/expression (Vern Paxson, Corelight) + +4.2.0-dev.396 | 2021-12-01 09:44:03 -0700 + + * GH-1873: Deprecate the tag types differently to avoid type clashes (Tim Wojtulewicz, Corelight) + +4.2.0-dev.394 | 2021-11-30 11:53:35 -0700 + + * Fix for the recent patch that allows segment offloaded packets. (Johanna Amann, Corelight) + + We recently added support for segment offloaded packets. It turns out + that this can lead to problems in UDP/ICMP based parsers since I missed + correctly also updating the payloadlength there, and using the capture + length instead when segment offloading is enabled. + + Credit to OSS-Fuzz for discovery + https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=41391 + https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=41394 + https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=41395 + (Link to details becomes public 30 days after patch release) + +4.2.0-dev.393 | 2021-11-29 13:46:59 -0700 + + * Fix a number of Coverity findings (Tim Wojtulewicz, Corelight) + + 1466460: Uninitialized field in gtp-analyzer.pac + 1462465: Null pointer dereference in CompositeHash::SingleValHash + 1462463: Copy/paste error in TCPSessionAdapter::build_syn_packet_val + 1462067: Uninitialized fields in Zinst + +4.2.0-dev.391 | 2021-11-29 13:44:11 -0700 + + * suppress unneeded initializations (Vern Paxson, Corelight) + +4.2.0-dev.387 | 2021-11-24 13:32:33 -0700 + + * fixes for constructing and assigning records with fields that are empty vectors (Vern Paxson, Corelight) + +4.2.0-dev.385 | 2021-11-23 19:43:48 -0700 + + * Changes to speed up compilation of Compiled-to-C++ Zeek Scripts (Vern Paxson, Corelight) + + * removing unused SubNetType class (Vern Paxson, Corelight) + +4.2.0-dev.371 | 2021-11-23 19:41:10 -0700 + + * Add new tunnel packet analyzers, remove old ones (Tim Wojtulewicz, Corelight) + + * Add PacketAnalyzer::register_for_port(s) functions (Tim Wojtulewicz, Corelight) + + These allow packet analyzers to register ports as identifiers to forward from + parent analyzers, while also adding those ports to the now-global + Analyzer::ports table at the same time. + + * Add analyzer_confirmation and analyzer_violation events (Tim Wojtulewicz, Corelight) + + * Add utility function for tunnel analyzers to setup encapsulation (Tim Wojtulewicz, Corelight) + + * Store some additional information in the packet during processing (Tim Wojtulewicz, Corelight) + + - Session related to the packet + - is_orig information if a UDP header was found + + * Minor fix in UDP to avoid duplicating tunnels (Tim Wojtulewicz, Corelight) + + * Fix error text in IPTunnel analyzer (Tim Wojtulewicz, Corelight) + + * Change Packet::ip_hdr to be a shared_ptr so it can be copied into EncapsulatingConn (Tim Wojtulewicz, Corelight) + + * Add method for packet analyzers to register for protocol detection (Tim Wojtulewicz, Corelight) + + * Add concept of "parent" tag namespaces (Tim Wojtulewicz, Corelight) + + This allows us to create an EnumType that groups all of the analyzer + tag values into a single type, while still having the existing types + that split them up. We can then use this for certain events that benefit + from taking all of the tag types at once. + + * Unify plugin::Component and plugin::TaggedComponent into a single class (Tim Wojtulewicz, Corelight) + + These two are almost always used in conjunction with each other, and + TaggedComponent is never used by itself. Combining them together into + a single class will help simplify some of the code around managing + the mapping between Tags and Components. + + * Remove uses of deprecated Tag types (Tim Wojtulewicz, Corelight) + + * Unify all of the Tag types into one type (Tim Wojtulewicz, Corelight) + + - Remove tag types for each component type (analyzer, etc) + - Add deprecated versions of the old types + - Remove unnecessary tag element from templates for TaggedComponent and ComponentManager + - Enable TaggedComponent to pass an EnumType when initializing Tag objects + - Update some tests that are affected by the tag enum values changing order + +4.2.0-dev.350 | 2021-11-23 15:35:06 +0000 + + * Add testcase for TCP segment offloading (GH-1829). (Johanna Amann, Corelight) + +4.2.0-dev.348 | 2021-11-23 13:45:39 +0000 + + * OpenSSL 3 compatibility (Johanna Amann, Corelight) + + Zeek is now compatible with OpenSSL 3.0, our test baselines pass cleanly, and + we have a CI run for OpenSSL 3.0. This has a certain amount of new code for + X.509 certificate parsing. Apart from that, the main chainge is that we + use an older, legacy, API for OpaqueVal hashing, since the newer API + does not allow us to serialize data anymore. For details see ticket 1379. + +4.2.0-dev.340 | 2021-11-23 10:10:13 +0000 + + * Accept packets that use tcp segment offloading. (Johanna Amann, Corelight) + + When checksum offloading is enabled, we now forward packets that + have 0 header lengths set - and assume that they have TSO enabled. + + If checksum offloading is not enabled, we drop the packets (GH-1829) + + * Updates to NEWS to cover recent additions. [nomail] [skip ci] (Christian Kreibich, Corelight) + + * Update doc and auxil/zeek-aux submodules [nomail] [skip ci] (Christian Kreibich, Corelight) + + * Update cmake and aux/zeek-aux submodules [nomail] [skip ci] (Christian Kreibich, Corelight) + +4.2.0-dev.333 | 2021-11-17 11:57:04 -0800 + + * Clean up fully after successful Docker btests (Christian Kreibich, Corelight) + +4.2.0-dev.331 | 2021-11-15 10:10:52 -0800 + + * Fix ref-naming typo in the Github Docker workflow (Christian Kreibich, Corelight) + +4.2.0-dev.328 | 2021-11-12 13:46:32 -0700 + + * Update libkqueue submodule (Tim Wojtulewicz, Corelight) + +4.2.0-dev.326 | 2021-11-12 09:30:54 -0700 + + * Added plugin.unprocessed_packet_hook btest (Tim Wojtulewicz, Corelight) + + * Fix whitespace in help output (Tim Wojtulewicz, Corelight) + + * Add command-line option to write unprocessed packets to a file (Tim Wojtulewicz, Corelight) + + This commit also changes the PcapDumper to automatically flush after + every called to Dump(). This is because pcap_dump has an internal buffer + of some sort that only writes to the file after a set amount of bytes. + When using the new option on a low-traffic network, it might be a while + before you see any packets written since it has to overcome that buffer + limit first. + + * GH-1620: Add event and plugin hook to track packets not processed (Tim Wojtulewicz, Corelight) + +4.2.0-dev.319 | 2021-11-10 10:20:01 -0700 + + * Install include headers from `src/3rdparty/`. (Benjamin Bannier, Corelight) + + This is a fixup commit for 72cbc7cd13b7c1bda98658104431c3b530ff68d6 + where we move some header files from `src/` to `src/3rdparty/` but + missed adding install rules for these header. Since some of these + headers are exposed in installed headers they need to be installed as + well. + +4.2.0-dev.317 | 2021-11-10 11:33:29 +0000 + + * Add case-insensitive search for find_str and rfind_str (Abdel) + +4.2.0-dev.314 | 2021-11-10 11:16:28 +0100 + + * GH-1757: Add new hook `HookLoadFileExtended` that allows plugins + to supply Zeek script and signature code to parse. (Robin Sommer) + + The new hook works similar to the existing `HookLoadFile` but, + additionally, allows the plugin to return a string that contains + the code to be used for the file being loaded. If the plugin does + so, the content of any actual file on disk will be ignored. This + works for both Zeek scripts and signatures. + + * Fix an issue where signature files supplied on the command line + wouldn't pass through the file loading hooks. (Robin Sommer, + Corelight) + +4.2.0-dev.310 | 2021-11-09 10:29:59 -0700 + + * Add Github action exercising pre-commit (Benjamin Bannier, Corelight) + + This patch adds a Github action which exercises pre-commit linters for + commits to the `master` branch or for pull requests. We adds this task + as a Github action since we expect it to finish quickly; running outside + of Cirrus makes it possible provide feedback quickly. + + * Add pre-commit config. (Benjamin Bannier, Corelight) + + This patch adds `clang-format` as only linter for now. This replaces the + previously used script from `auxil/run-clang-format` which we remove. + + This requires the Python program `pre-commit` + (https://pypi.org/project/pre-commit/). With that one can then run + `clang-format` on the whole codebase with + + $ pre-commit run -a clang-format + + or on just the staged files + + # Explicitly selecting linter. + $ pre-commit run clang-format + + # Run all linters (currently just `clang-format`). + $ pre-commit + + `pre-commit` supports managing Git commit hooks so that linters are run + on commit. Linters can be installed with + + $ pre-commit install + + The documentation at https://pre-commit.com/ covers these topics in + addition to more information. + + * Format code with `clang-format` (Benjamin Bannier, Corelight) + + This patch formats files not conforming to the C++ formatting with + `clang-format`. + + * Remove stale files `src/DebugCmdInfoConstants.*` (Benjamin Bannier, Corelight) + + The files generated from `src/DebugCmdInfoConstants.in` are placed in + `build/src/` by the build setup, and generated file in `src/` removed + here were unused and possibly out-of-date. + + * Disable formatting for files in `testing/btest/plugins` (Benjamin Bannier, Corelight) + + Files in that folder were previously not formatted. With this patch we + now disable formatting in that folder explicitly by adding a dedicated + `clang-format` config which deactivates any formatting changes. + + * Move 3rdparty source files to `3rdparty/` (Benjamin Bannier, Corelight) + + This patch moves in-tree 3rdparty source files to `3rdparty/`. With that + we can remove special treatment of these files for `run-clang-format`. + +4.2.0-dev.303 | 2021-11-09 09:45:57 -0700 + + * GH-1819: Handle recursive types when describing type in binary mode (Tim Wojtulewicz, Corelight) + +4.2.0-dev.301 | 2021-11-09 09:28:18 -0700 + + * Remove no-op false-teredo test (Tim Wojtulewicz, Corelight) + +4.2.0-dev.297 | 2021-11-05 12:49:55 -0700 + + * Only push CI's Docker images when we're on the main repo (Christian Kreibich, Corelight) + + * Add macOS Monterey and drop Catalina in CI (Christian Kreibich, Corelight) + + * Add Fedora 35 to CI (Christian Kreibich, Corelight) + +4.2.0-dev.292 | 2021-11-04 14:28:35 -0700 + + * Fix C++ set intersection code (Yacin Nadji, Corelight) + +4.2.0-dev.286 | 2021-11-03 09:36:41 -0700 + + * GH-693: use pcap_dump_open_append where supported (Tim Wojtulewicz, Corelight) + +4.2.0-dev.284 | 2021-11-03 09:35:10 -0700 + + * GH-1781: Add .git-blame-ignore-revs file (Tim Wojtulewicz, Corelight) + +4.2.0-dev.280 | 2021-11-01 09:20:16 -0700 + + * Fix issue with broken libpcaps that return repeat packets (Tim Wojtulewicz, Corelight) + + This is apparently a problem with the Myricom version of libpcap, where + instead of returning a null or a zero if no packets are available, it + returns the previous packet. This causes Zeek to improperly parse the + packet and crash. We thought we had fixed this previously with a check + for a null packet but that fix was not enough. + +4.2.0-dev.277 | 2021-10-21 17:23:46 -0700 + + * Apply some missing clang-format changes (Tim Wojtulewicz, Corelight) + +4.2.0-dev.274 | 2021-10-20 11:13:16 -0700 + + * Remove trailing whitespace from script files (Tim Wojtulewicz, Corelight) + +4.2.0-dev.271 | 2021-10-19 14:54:56 +0200 + + * Add parsing of DNS SVCB/HTTPS records (FlyingWithJerome) + +4.2.0-dev.260 | 2021-10-15 09:45:45 +0100 + + * logging/writers/ascii: shadow files: Add fsync() before rename(). This + prevents potential problems with leftover files after unclean shutdowns. + (Arne Welzel, Corelight) + + * Fix typo in typedef changes that broke tests on 32-bit Debian 9 (Tim Wojtulewicz, Corelight) + 4.2.0-dev.255 | 2021-10-12 09:22:37 -0700 * Replace most uses of typedef with using for type aliasing (Tim Wojtulewicz, Corelight) diff --git a/CMakeLists.txt b/CMakeLists.txt index 908c15a057..494cb2efa2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -490,6 +490,9 @@ include(FindKqueue) if ( (OPENSSL_VERSION VERSION_EQUAL "1.1.0") OR (OPENSSL_VERSION VERSION_GREATER "1.1.0") ) set(ZEEK_HAVE_OPENSSL_1_1 true CACHE INTERNAL "" FORCE) endif() +if ( (OPENSSL_VERSION VERSION_EQUAL "3.0.0") OR (OPENSSL_VERSION VERSION_GREATER "3.0.0") ) + set(ZEEK_HAVE_OPENSSL_3_0 true CACHE INTERNAL "" FORCE) +endif() # Tell the plugin code that we're building as part of the main tree. set(ZEEK_PLUGIN_INTERNAL_BUILD true CACHE INTERNAL "" FORCE) diff --git a/COPYING.3rdparty b/COPYING.3rdparty index 4b21b90ab5..ffbb0dee0b 100644 --- a/COPYING.3rdparty +++ b/COPYING.3rdparty @@ -250,7 +250,7 @@ PROJECT (https://github.com/zeek) UNDER BSD LICENCE. ============================================================================== -%%% in_cksum.cc +%%% 3rdparty/in_cksum.cc ============================================================================== @@ -283,7 +283,7 @@ SUCH DAMAGE. ============================================================================== -%%% Patricia.c +%%% 3rdparty/patricia.c ============================================================================== @@ -328,7 +328,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ============================================================================== -%%% strsep.c +%%% 3rdparty/strsep.c ============================================================================== @@ -365,7 +365,7 @@ SUCH DAMAGE. ============================================================================== -%%% ConvertUTF.c +%%% 3rdparty/ConvertUTF.c ============================================================================== @@ -479,7 +479,7 @@ SUCH DAMAGE. ============================================================================== -%%% bsd-getopt-long.c +%%% 3rdparty/bsd-getopt-long.c ============================================================================== @@ -555,7 +555,7 @@ limitations under the License. ============================================================================== -%%% bro_inet_ntop.c +%%% 3rdparty/bro_inet_ntop.c ============================================================================== @@ -578,7 +578,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ============================================================================== -%%% modp_numtoa.h +%%% 3rdparty/modp_numtoa.h ============================================================================== diff --git a/NEWS b/NEWS index 074dd3cf33..49f003d071 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,17 @@ release. For an exhaustive list of changes, see the ``CHANGES`` file Zeek 4.2.0 ========== +Breaking Changes +---------------- + +- The existing ``Tag`` types in C++ (``zeek::Analyzer::Tag``, etc) have been + merged into a single type called ``zeek::Tag``. This is a breaking change, and + may result in plugins failing to build where they were relying on those types + being different for function overloading and such. We attempted to include + deprecated versions of the old types, but were unable to do so because of + changes to return types from a number of methods. With this change, any uses + of the `zeek::*::Tag` types will need to be replaced by `zeek::Tag`. + New Functionality ----------------- @@ -22,19 +33,89 @@ New Functionality example to build a Zeek plugin. You can add any required system packages in a derived image, or install them directly in the running container. -- Zeek now supports formatting the C++ code using clang-format. It requires at - least clang-format 12.0.1 due to some additions that were made in that version - to better support the Whitesmiths style. Zeek also includes a set of python - scripts to more easily reformat in the auxil/run-clang-format directory. An - example command to reformat the code: - - `python3 auxil/run-clang-format/run-clang-format.py --clang-format-executable `which clang-format-12` -r src -i` +- Zeek now supports formatting the C++ code using clang-format. Also provided is + a configuration for ``pre-commit`` to run clang-format when add new commits via + ``git``. More details can be found at https://github.com/zeek/zeek/wiki/Coding-Style-and-Conventions#clang-format. - Experimental support for speeding up Zeek script execution by compiling scripts to a low-level form called "ZAM". You activate this feature by specifying ``-O ZAM`` on the command line. See ``src/script_opt/ZAM/README.md`` for more information. +- Improvements for compiling scripts to C++ (an experimental optimization + feature introduced in 4.1). The generated C++ now compiles much faster than + previously, though it can still take quite a while when using C++ optimization + on large sets of scripts. You can incrementally compile additional scripts + using ``-O add-C++``. See ``src/script_opt/CPP/README.md`` for details. + +- The new flags --optimize-files=/pat/ and --optimize-funcs=/pat/ apply + to both ZAM and compile-to-C++ script optimization. The first instructs + Zeek to optimize any functions/hooks/event handlers residing in files + matching the given pattern (unanchored). The second does the same but + based on the function name, and with the pattern anchored (so for example + --optimize-funcs=foo will optimize any functions named "foo" but not + those named "foobar", or "MYSCOPE::foo"). The flags can be combined + and can also be used multiple times to specify a set of patterns. + If neither flag is used then optimization is applied to all loaded + scripts; if used, then only to those that match. + +- The ``-uu`` flag for analyzing potentially unused record fields has been + removed because, due to other changes in script optimization, keeping it + would now require about 1,800 lines of code not otherwise needed. + +- The DNS analyzer has initial support for the SVCB and HTTPS types. The new + events are ``dns_SVCB`` and ``dns_HTTPS``. + +- The ``find_str`` and ``rfind_str`` bifs now support case-insensitive searches. + +- Added a new plugin hook for capturing packets that made it through analysis + without being processed called ``Plugin::HookUnprocessedPacket``. Currently + ARP packets or packets with a valid IP-based transport header are marked as + processed. This also adds an event called ``packet_not_processed`` that + reports the same packets. + +- A new command-line option ``-c`` or ``--capture-unprocessed`` will dump any + packets not marked as being processed, similar to the new hook and event + above. + +- In Zeek plugins, the new cmake function ``zeek_plugin_scripts()`` should be + used alongside ``zeek_plugin_cc()`` and related functions to establish + dependency tracking between Zeek scripts shipped with the plugin and plugin + rebuilds. Previously, updates to included Zeek scripts didn't reliably + trigger a rebuild. + +- Added PacketAnalyzer::register_for_port(s) functions to the packet analyzer + framework in script-land. This allows a packet analyzer to register a port + mapping with a parent analyzer just like any other numeric identifier, while + also adding that port to the now-global Analyzer::ports table used by BPF + filtering. + +- Added AllAnalyzers::Tag enum type that combines the existing Analyzer::Tag, + PacketAnalyzer::Tag, and Files::Tags into a single enum. The existing types + still exist, but the new type can be used as an argument for + functions/hooks/events that need to handle any of the analyzer types. + +- Added protocol detection functionality to the packet analyzer framework. + Packet analyzers can register for protocol detection using the + ``PacketAnalyzer::register_protocol_detection`` script function and implement + the ``PacketAnalyzer::DetectProtocol`` method in C++. This allows packet + analyzer plugins to detect a protocol via byte matching or other heuristics + instead of relying solely on a numeric identifier for forwarding. + +- The JSON logger's new LogAscii::json_include_unset_fields flag provides + control over how to handle unset "&optional" fields. By default it continues + to skip such fields entirely. When redef'ing the flag to T it includes such + fields, with a "null" value. This simplifies data import use cases that + require fields to be present at all times, regardless of their value. + +- A new external testsuite, https://github.com/zeek/zeek-testing-cluster, + focuses on testing the emerging controller framework. It leverages the new + official Zeek Docker image for building docker-compose test setups, driven via + btest. The Github CI setup now includes a workflow that deploys and runs this + testsuite. + +- The GRE analyzer now supports the Aruba WLAN protocol type. + Changed Functionality --------------------- @@ -43,19 +124,75 @@ Changed Functionality to serialize, meaning that you can now also index with sets, vectors, patterns, and even tables. -- The traditional TSV Zeek logs are now valid UTF8 by default. It's possible - to revert to the previous behavior by setting ``LogAscii::enable_utf_8`` to +- The traditional TSV Zeek logs are now valid UTF8 by default. It's possible to + revert to the previous behavior by setting ``LogAscii::enable_utf_8`` to false. -- The ``SYN_packet`` record now records TCP timestamps (TSval/TSecr) - when available. +- The ``SYN_packet`` record now records TCP timestamps (TSval/TSecr) when + available. -Removed Functionality ---------------------- +- The ``init-plugin`` script now focuses purely on dynamic Zeek plugins. It no + longer generates Zeek packages. To instantiate new Zeek packages, use the + ``zkg create`` command instead. + +- The ``ignore_checksums`` options and the ``-C`` command-line option now + additionally cause Zeek to accept IPv4 packets that provide a length of zero + in the total-length IPv4 header field. When the length is set to zero, the + capture length of the packet is used instead. This can be used to replay + traces, or analyze traffic when TCP sequence offloading is enabled on the + local NIC - which typically causes the total-length of affected packets to be + set to zero. + +- The existing tunnel analyzers for AYIYA, Geneve, GTPv1, Teredo, and VXLAN are + now packet analyzers. + +- C++ unit tests are now compiled in by default and can be disabled by + configuring the build with --disable-cpp-tests. We removed the former + --enable-cpp-tests configure flag. Unit tests now also work in (static and + dynamic) Zeek plugins. + +- This release expands the emerging cluster controller framework. Most changes + concern internals of the framework. Agent/controller connectivity management + has become more flexible: configuration updates pushed by the client can now + convey the agent topology, removing the need to hardwire/redef settings + in the controller. The new ClusterController::API::notify_agents_ready event + declares the management infrastructure ready for use. zeek-client's CLI has + expanded to support the new functionality. + + The framework is still experimental and provides only a small subset of + ZeekControl's functionality. ZeekControl remains the recommended tool for + maintaining your cluster. Deprecated Functionality ------------------------ +- The ``protocol_confirmation`` and ``protocol_violation`` events along with the + corresponding ``Analyzer::ProtocolConfirmation` and + ``Analyzer::ProtocolViolation`` C++ methods are marked as deprecated. They are + replaced by ``analyzer_confirmation`` and ``analyzer_violation`` which can + also now be implemented in packet analyzers. + +- Declaring a local variable in an inner scope and then accessing it in an + outer scope is now deprecated. For example, + + if ( foo() ) + { + local a = 5; + ... + } + print a; + + is deprecated. You can address the issue by hoisting the declaration + to the outer scope, such as: + + local a: count; + if ( foo() ) + { + a = 5; + ... + } + print a; + Zeek 4.1.0 ========== diff --git a/VERSION b/VERSION index 73f4da94cb..a4c350773f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.2.0-dev.255 +5.0.0-dev.2 diff --git a/auxil/bifcl b/auxil/bifcl index 6bd2643518..a7d9233b37 160000 --- a/auxil/bifcl +++ b/auxil/bifcl @@ -1 +1 @@ -Subproject commit 6bd264351813eedb92753d2d4ed76ac6ddc076b3 +Subproject commit a7d9233b37daac558314625566bb8c8a993f2904 diff --git a/auxil/binpac b/auxil/binpac index 8169f16309..8b1322d306 160000 --- a/auxil/binpac +++ b/auxil/binpac @@ -1 +1 @@ -Subproject commit 8169f1630993b34189b2c221d0e5ab8ba9777967 +Subproject commit 8b1322d3060a1fecdc586693e6215ad7ef8ab0e9 diff --git a/auxil/broker b/auxil/broker index 47cac80cbe..d9e8440053 160000 --- a/auxil/broker +++ b/auxil/broker @@ -1 +1 @@ -Subproject commit 47cac80cbe1e1bde8e3b425903e50d62715972a2 +Subproject commit d9e84400534b968e33ab01cfadfb569c0d7b2929 diff --git a/auxil/btest b/auxil/btest index 0a37819d48..5f954ec65c 160000 --- a/auxil/btest +++ b/auxil/btest @@ -1 +1 @@ -Subproject commit 0a37819d484358999a47e76ac473da74799ab08d +Subproject commit 5f954ec65cb78b17f7156455c8c3c905a816ae96 diff --git a/auxil/libkqueue b/auxil/libkqueue index 6c1717dea2..aeaeed2119 160000 --- a/auxil/libkqueue +++ b/auxil/libkqueue @@ -1 +1 @@ -Subproject commit 6c1717dea2dc34a91d32e07d2cae34b1afa0a84e +Subproject commit aeaeed21198d6f41d0cf70bda63fe0f424922ac5 diff --git a/auxil/rapidjson b/auxil/rapidjson index dfbe1db9da..fd3dc29a5c 160000 --- a/auxil/rapidjson +++ b/auxil/rapidjson @@ -1 +1 @@ -Subproject commit dfbe1db9da455552f7a9ad5d2aea17dd9d832ac1 +Subproject commit fd3dc29a5c2852df569e1ea81dbde2c412ac5051 diff --git a/auxil/run-clang-format b/auxil/run-clang-format deleted file mode 160000 index 39081c9c42..0000000000 --- a/auxil/run-clang-format +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 39081c9c42768ab5e8321127a7494ad1647c6a2f diff --git a/auxil/zeek-archiver b/auxil/zeek-archiver index f3a1e8fe46..479e8a85fd 160000 --- a/auxil/zeek-archiver +++ b/auxil/zeek-archiver @@ -1 +1 @@ -Subproject commit f3a1e8fe464c0425688eff67e30f35c678914ad2 +Subproject commit 479e8a85fd58936c16d361dbf3de4e7268d751f8 diff --git a/auxil/zeek-aux b/auxil/zeek-aux index 296383d577..12be5e3e51 160000 --- a/auxil/zeek-aux +++ b/auxil/zeek-aux @@ -1 +1 @@ -Subproject commit 296383d577a3f089c4f491061a985293cf6736e6 +Subproject commit 12be5e3e51a4a97ab3aa0fa4a02da194a83c7f24 diff --git a/auxil/zeek-client b/auxil/zeek-client index afe253c775..553d897734 160000 --- a/auxil/zeek-client +++ b/auxil/zeek-client @@ -1 +1 @@ -Subproject commit afe253c77591e87b2a6cf6d5682cd02caa78e9d2 +Subproject commit 553d897734b6d9abbc2e4467fae89f68a2c7315d diff --git a/auxil/zeekctl b/auxil/zeekctl index d31885671d..95b048298a 160000 --- a/auxil/zeekctl +++ b/auxil/zeekctl @@ -1 +1 @@ -Subproject commit d31885671d74932d951778c029fa74d44cf3e542 +Subproject commit 95b048298a77bb14d2c54dcca8bb549c86eb96b9 diff --git a/ci/build.sh b/ci/build.sh index 20ca3237cc..19a92743d1 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" . ${SCRIPT_DIR}/common.sh set -e @@ -10,6 +10,11 @@ set -x # some problems with Catalina specifically, but it doesn't break anything on Big Sur either. if [[ "${CIRRUS_OS}" == "darwin" ]]; then export ZEEK_CI_CONFIGURE_FLAGS="${ZEEK_CI_CONFIGURE_FLAGS} --osx-sysroot=$(xcrun --show-sdk-path)" + + # Starting with Monterey & Xcode 13.1 we need to help it find OpenSSL + if [ -d /usr/local/opt/openssl@1.1/lib/pkgconfig ]; then + export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/opt/openssl@1.1/lib/pkgconfig + fi fi if [[ "${ZEEK_CI_CREATE_ARTIFACT}" != "1" ]]; then diff --git a/ci/common.sh b/ci/common.sh index cc84e9a973..52949dcf73 100644 --- a/ci/common.sh +++ b/ci/common.sh @@ -3,10 +3,10 @@ # On Cirrus, oversubscribe the CPUs when on Linux. This uses Cirrus' "greedy" feature. if [[ "${CIRRUS_OS}" == linux ]]; then if [[ -n "${ZEEK_CI_CPUS}" ]]; then - ZEEK_CI_CPUS=$(( 2 * ${ZEEK_CI_CPUS} )) + ZEEK_CI_CPUS=$((2 * ${ZEEK_CI_CPUS})) fi if [[ -n "${ZEEK_CI_BTEST_JOBS}" ]]; then - ZEEK_CI_BTEST_JOBS=$(( 2 * ${ZEEK_CI_BTEST_JOBS} )) + ZEEK_CI_BTEST_JOBS=$((2 * ${ZEEK_CI_BTEST_JOBS})) fi fi diff --git a/ci/fedora-35/Dockerfile b/ci/fedora-35/Dockerfile new file mode 100644 index 0000000000..a168642ff3 --- /dev/null +++ b/ci/fedora-35/Dockerfile @@ -0,0 +1,23 @@ +FROM fedora:35 + +RUN dnf -y install \ + bison \ + cmake \ + diffutils \ + findutils \ + flex \ + git \ + gcc \ + gcc-c++ \ + libpcap-devel \ + make \ + openssl-devel \ + python3-devel \ + python3-pip\ + sqlite \ + swig \ + which \ + zlib-devel \ + && dnf clean all && rm -rf /var/cache/dnf + +RUN pip3 install junit2html diff --git a/ci/freebsd/prepare.sh b/ci/freebsd/prepare.sh index 00b3965e2f..8a51a49461 100755 --- a/ci/freebsd/prepare.sh +++ b/ci/freebsd/prepare.sh @@ -8,6 +8,6 @@ set -x env ASSUME_ALWAYS_YES=YES pkg bootstrap pkg install -y bash git cmake swig bison python3 base64 pkg upgrade -y curl -pyver=`python3 -c 'import sys; print(f"py{sys.version_info[0]}{sys.version_info[1]}")'` +pyver=$(python3 -c 'import sys; print(f"py{sys.version_info[0]}{sys.version_info[1]}")') pkg install -y $pyver-sqlite3 $pyver-pip pip install junit2html diff --git a/ci/init-external-repos.sh b/ci/init-external-repos.sh index 43814a69c5..b3684b0dcd 100755 --- a/ci/init-external-repos.sh +++ b/ci/init-external-repos.sh @@ -1,13 +1,12 @@ #! /usr/bin/env bash -function banner - { +function banner { local msg="${1}" printf "+--------------------------------------------------------------+\n" printf "| %-60s |\n" "$(date)" printf "| %-60s |\n" "${msg}" printf "+--------------------------------------------------------------+\n" - } +} set -e @@ -52,8 +51,8 @@ if [[ -n "${CIRRUS_CI}" ]] && [[ "${CIRRUS_REPO_OWNER}" == "zeek" ]] && [[ ! -d fi banner "Trying to clone zeek-testing-private git repo" - echo "${ZEEK_TESTING_PRIVATE_SSH_KEY}" > cirrus_key.b64 - base64 -d cirrus_key.b64 > cirrus_key + echo "${ZEEK_TESTING_PRIVATE_SSH_KEY}" >cirrus_key.b64 + base64 -d cirrus_key.b64 >cirrus_key rm cirrus_key.b64 chmod 600 cirrus_key git --version diff --git a/ci/openssl-3.0/Dockerfile b/ci/openssl-3.0/Dockerfile new file mode 100644 index 0000000000..321afae31d --- /dev/null +++ b/ci/openssl-3.0/Dockerfile @@ -0,0 +1,36 @@ +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND="noninteractive" TZ="America/Los_Angeles" + +RUN apt-get update && apt-get -y install \ + git \ + cmake \ + make \ + gcc \ + g++ \ + flex \ + bison \ + libpcap-dev \ + libssl-dev \ + python3 \ + python3-dev \ + python3-pip\ + swig \ + zlib1g-dev \ + libmaxminddb-dev \ + libkrb5-dev \ + bsdmainutils \ + sqlite3 \ + curl \ + wget \ + unzip \ + ruby \ + bc \ + lcov \ + && rm -rf /var/lib/apt/lists/* + +# Note - the symlink is important, otherwise cmake uses the wrong .so files. +RUN wget https://www.openssl.org/source/openssl-3.0.0.tar.gz && tar xvf ./openssl-3.0.0.tar.gz && cd ./openssl-3.0.0 && ./Configure --prefix=/opt/openssl && make install && cd .. && rm -rf openssl-3.0.0 && ln -sf /opt/openssl/lib64 /opt/openssl/lib + +RUN pip3 install junit2html +RUN gem install coveralls-lcov diff --git a/ci/run-clang-format.sh b/ci/run-clang-format.sh deleted file mode 100755 index 3bd6d5d898..0000000000 --- a/ci/run-clang-format.sh +++ /dev/null @@ -1,65 +0,0 @@ -#! /bin/sh -# -# Copyright (c) 2020 by the Zeek Project. See LICENSE for details. - -base=$(git rev-parse --show-toplevel) -fix=0 -pre_commit_hook=0 - -# Directories to run on by default. When changing, adapt .pre-commit-config.yam -# as well. -files="src" - -error() { - test "${pre_commit_hook}" = 0 && echo "$@" >&2 && exit 1 - exit 0 -} - -if [ $# != 0 ]; then - case "$1" in - --fixit) - shift - fix=1 - ;; - - --pre-commit-hook) - shift - fix=1 - pre_commit_hook=1 - ;; - - -*) - echo "usage: $(basename $0) [--fixit | --pre-commit-hook] []" - exit 1 - esac -fi - -test $# != 0 && files="$@" - -if [ -z "${CLANG_FORMAT}" ]; then - CLANG_FORMAT=$(which clang-format 2>/dev/null) -fi - -if [ -z "${CLANG_FORMAT}" -o ! -x "${CLANG_FORMAT}" ]; then - error "Cannot find clang-format. If not in PATH, set CLANG_FORMAT." -fi - -if ! (cd / && ${CLANG_FORMAT} -dump-config | grep -q SpacesInConditionalStatement); then - error "${CLANG_FORMAT} does not support SpacesInConditionalStatement. Install custom version and put it into PATH, or point CLANG_FORMAT to it." -fi - -if [ ! -e .clang-format ]; then - error "Must execute in top-level directory." -fi - -cmd="${base}/auxil/run-clang-format/run-clang-format.py -r --clang-format-executable ${CLANG_FORMAT} --exclude '*/3rdparty/*' ${files}" -tmp=/tmp/$(basename $0).$$.tmp -trap "rm -f ${tmp}" EXIT -eval "${cmd}" >"${tmp}" - -if [ "${fix}" = 1 ]; then - test -s "${tmp}" && cat "${tmp}" | git apply -p0 - true -else - cat "${tmp}" -fi diff --git a/ci/test-fuzzers.sh b/ci/test-fuzzers.sh index c3ae93d469..389c272066 100755 --- a/ci/test-fuzzers.sh +++ b/ci/test-fuzzers.sh @@ -17,8 +17,8 @@ for fuzzer_path in ${fuzzers}; do if [[ -e ${corpus} ]]; then echo "Fuzzer: ${fuzzer_exe} ${corpus}" - ( rm -rf corpus && mkdir corpus ) || result=1 - ( cd corpus && unzip ../${corpus} >/dev/null ) || result=1 + (rm -rf corpus && mkdir corpus) || result=1 + (cd corpus && unzip ../${corpus} >/dev/null) || result=1 ${fuzzer_path} corpus/* >${fuzzer_exe}.out 2>${fuzzer_exe}.err if [[ $? -eq 0 ]]; then @@ -36,5 +36,4 @@ for fuzzer_path in ${fuzzers}; do echo "-----------------------------------------" done - exit ${result} diff --git a/ci/test.sh b/ci/test.sh index 50d0703dc9..ba407ffec6 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -16,47 +16,41 @@ if [[ -z "${CIRRUS_CI}" ]]; then ZEEK_CI_BTEST_RETRIES=2 fi -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" . ${SCRIPT_DIR}/common.sh -function pushd - { - command pushd "$@" > /dev/null || exit 1 - } +function pushd { + command pushd "$@" >/dev/null || exit 1 +} -function popd - { - command popd "$@" > /dev/null || exit 1 - } +function popd { + command popd "$@" >/dev/null || exit 1 +} -function banner - { +function banner { local msg="${1}" printf "+--------------------------------------------------------------+\n" printf "| %-60s |\n" "$(date)" printf "| %-60s |\n" "${msg}" printf "+--------------------------------------------------------------+\n" - } +} -function run_unit_tests - { +function run_unit_tests { banner "Running unit tests" pushd build - ( . ./zeek-path-dev.sh && zeek --test ) || result=1 + (. ./zeek-path-dev.sh && zeek --test) || result=1 popd return 0 - } +} -function prep_artifacts - { +function prep_artifacts { banner "Prepare artifacts" [[ -d .tmp ]] && rm -rf .tmp/script-coverage && tar -czf tmp.tar.gz .tmp junit2html btest-results.xml btest-results.html - } +} -function run_btests - { +function run_btests { banner "Running baseline tests: zeek" pushd testing/btest @@ -73,10 +67,9 @@ function run_btests prep_artifacts popd return 0 - } +} -function run_external_btests - { +function run_external_btests { # Commenting out this line in btest.cfg causes the script profiling/coverage # to be disabled. We do this for the sanitizer build right now because of a # fairly significant performance bug when running tests. @@ -120,7 +113,7 @@ function run_external_btests else banner "Skipping private tests (not available for PRs)" fi - } +} banner "Start tests: ${ZEEK_CI_CPUS} cpus, ${ZEEK_CI_BTEST_JOBS} btest jobs" diff --git a/ci/update-zeekygen-docs.sh b/ci/update-zeekygen-docs.sh index 1c0769bc9a..c4722237cb 100755 --- a/ci/update-zeekygen-docs.sh +++ b/ci/update-zeekygen-docs.sh @@ -1,15 +1,15 @@ #! /usr/bin/env bash -unset ZEEK_DISABLE_ZEEKYGEN; +unset ZEEK_DISABLE_ZEEKYGEN # If running this from btest, unset any of the environment # variables that alter default script values. -unset ZEEK_DEFAULT_LISTEN_ADDRESS; -unset ZEEK_DEFAULT_LISTEN_RETRY; -unset ZEEK_DEFAULT_CONNECT_RETRY; +unset ZEEK_DEFAULT_LISTEN_ADDRESS +unset ZEEK_DEFAULT_LISTEN_RETRY +unset ZEEK_DEFAULT_CONNECT_RETRY -dir="$( cd "$( dirname "$0" )" && pwd )" -source_dir="$( cd $dir/.. && pwd )" +dir="$(cd "$(dirname "$0")" && pwd)" +source_dir="$(cd $dir/.. && pwd)" build_dir=$source_dir/build conf_file=$build_dir/zeekygen-test.conf output_dir=$source_dir/doc @@ -21,15 +21,14 @@ fi case $output_dir in /*) ;; - *) output_dir=`pwd`/$output_dir ;; + *) output_dir=$(pwd)/$output_dir ;; esac cd $build_dir . zeek-path-dev.sh export ZEEK_SEED_FILE=$source_dir/testing/btest/random.seed -function run_zeek - { +function run_zeek { ZEEK_ALLOW_INIT_ERRORS=1 zeek -X $conf_file zeekygen >/dev/null 2>$zeek_error_file if [ $? -ne 0 ]; then @@ -37,23 +36,22 @@ function run_zeek echo "See stderr in $zeek_error_file" exit 1 fi - } +} scripts_output_dir=$output_dir/scripts rm -rf $scripts_output_dir -printf "script\t*\t$scripts_output_dir/" > $conf_file +printf "script\t*\t$scripts_output_dir/" >$conf_file echo "Generating $scripts_output_dir/" run_zeek script_ref_dir=$output_dir/script-reference mkdir -p $script_ref_dir -function generate_index - { +function generate_index { echo "Generating $script_ref_dir/$2" - printf "$1\t*\t$script_ref_dir/$2\n" > $conf_file + printf "$1\t*\t$script_ref_dir/$2\n" >$conf_file run_zeek - } +} generate_index "script_index" "autogenerated-script-index.rst" generate_index "package_index" "autogenerated-package-index.rst" diff --git a/cmake b/cmake index 4d1990f0e4..12fbc1a3bc 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 4d1990f0e4c273cf51ec52278add6ff256f9c889 +Subproject commit 12fbc1a3bc206a57b079505e3df938c3a993ba58 diff --git a/configure b/configure index f40846bc43..8f0fb27e41 100755 --- a/configure +++ b/configure @@ -54,51 +54,51 @@ Usage: $0 [OPTION]... [VAR=VALUE]... install --home [PATH/lib/python] Optional Features: - --enable-debug compile in debugging mode (like --build-type=Debug) --enable-coverage compile with code coverage support (implies debugging mode) + --enable-debug compile in debugging mode (like --build-type=Debug) --enable-fuzzers build fuzzer targets + --enable-jemalloc link against jemalloc --enable-mobile-ipv6 analyze mobile IPv6 features defined by RFC 6275 --enable-perftools enable use of Google perftools (use tcmalloc) --enable-perftools-debug use Google's perftools for debugging - --enable-jemalloc link against jemalloc - --enable-static-broker build Broker statically (ignored if --with-broker is specified) --enable-static-binpac build binpac statically (ignored if --with-binpac is specified) - --enable-cpp-tests build Zeek's C++ unit tests + --enable-static-broker build Broker statically (ignored if --with-broker is specified) --enable-zeek-client install the Zeek cluster management client (experimental) - --disable-zeekctl don't install ZeekControl - --disable-auxtools don't build or install auxiliary tools --disable-archiver don't build or install zeek-archiver tool + --disable-auxtools don't build or install auxiliary tools + --disable-broker-tests don't try to build Broker unit tests --disable-btest don't install BTest --disable-btest-pcaps don't install Zeek's BTest input pcaps + --disable-cpp-tests don't build Zeek's C++ unit tests --disable-python don't try to build python bindings for Broker - --disable-broker-tests don't try to build Broker unit tests + --disable-zeekctl don't install ZeekControl --disable-zkg don't install zkg Required Packages in Non-Standard Locations: - --with-openssl=PATH path to OpenSSL install root - --with-bind=PATH path to BIND install root - --with-pcap=PATH path to libpcap install root - --with-binpac=PATH path to BinPAC executable - (useful for cross-compiling) --with-bifcl=PATH path to Zeek BIF compiler executable (useful for cross-compiling) - --with-flex=PATH path to flex executable + --with-bind=PATH path to BIND install root + --with-binpac=PATH path to BinPAC executable + (useful for cross-compiling) --with-bison=PATH path to bison executable - --with-python=PATH path to Python executable --with-broker=PATH path to Broker install root (Zeek uses an embedded version by default) --with-caf=PATH path to C++ Actor Framework install root (a Broker dependency that is embedded by default) + --with-flex=PATH path to flex executable --with-libkqueue=PATH path to libkqueue install root (Zeek uses an embedded version by default) + --with-openssl=PATH path to OpenSSL install root + --with-pcap=PATH path to libpcap install root + --with-python=PATH path to Python executable Optional Packages in Non-Standard Locations: --with-geoip=PATH path to the libmaxminddb install root + --with-jemalloc=PATH path to jemalloc install root --with-krb5=PATH path to krb5 install root --with-perftools=PATH path to Google Perftools install root - --with-jemalloc=PATH path to jemalloc install root - --with-python-lib=PATH path to libpython --with-python-inc=PATH path to Python headers + --with-python-lib=PATH path to libpython --with-swig=PATH path to SWIG executable Packaging Options (for developers): @@ -118,7 +118,7 @@ Usage: $0 [OPTION]... [VAR=VALUE]... CXXFLAGS C++ compiler flags " -sourcedir="$( cd "$( dirname "$0" )" && pwd )" +sourcedir="$(cd "$(dirname "$0")" && pwd)" if [ ! -e "$sourcedir/cmake/COPYING" ] && [ -d "$sourcedir/.git" ]; then echo "\ @@ -128,8 +128,8 @@ This typically means that you performed a non-recursive git clone of Zeek. To check out the required subdirectories, please execute: ( cd $sourcedir && git submodule update --recursive --init ) -" >&2; - exit 1; +" >&2 + exit 1 fi # Function to append a CMake cache entry definition to the @@ -137,14 +137,14 @@ fi # $1 is the cache entry variable name # $2 is the cache entry variable type # $3 is the cache entry variable value -append_cache_entry () { +append_cache_entry() { CMakeCacheEntries="$CMakeCacheEntries -D $1:$2=$3" } # Function to remove a CMake cache entry definition from the # CMakeCacheEntries variable # $1 is the cache entry variable name -remove_cache_entry () { +remove_cache_entry() { CMakeCacheEntries="$CMakeCacheEntries -U $1" # Even with -U, cmake still warns by default if @@ -156,22 +156,23 @@ remove_cache_entry () { builddir=build prefix=/usr/local/zeek CMakeCacheEntries="" -append_cache_entry CMAKE_INSTALL_PREFIX PATH $prefix -append_cache_entry ZEEK_ROOT_DIR PATH $prefix +append_cache_entry CMAKE_INSTALL_PREFIX PATH $prefix +append_cache_entry ZEEK_ROOT_DIR PATH $prefix append_cache_entry ZEEK_SCRIPT_INSTALL_PATH STRING $prefix/share/zeek -append_cache_entry ZEEK_ETC_INSTALL_DIR PATH $prefix/etc -append_cache_entry ENABLE_DEBUG BOOL false -append_cache_entry ENABLE_PERFTOOLS BOOL false -append_cache_entry ENABLE_JEMALLOC BOOL false -append_cache_entry BUILD_SHARED_LIBS BOOL true -append_cache_entry INSTALL_AUX_TOOLS BOOL true -append_cache_entry INSTALL_BTEST BOOL true -append_cache_entry INSTALL_BTEST_PCAPS BOOL true -append_cache_entry INSTALL_ZEEK_ARCHIVER BOOL true -append_cache_entry INSTALL_ZEEKCTL BOOL true -append_cache_entry INSTALL_ZKG BOOL true +append_cache_entry ZEEK_ETC_INSTALL_DIR PATH $prefix/etc +append_cache_entry ENABLE_DEBUG BOOL false +append_cache_entry ENABLE_PERFTOOLS BOOL false +append_cache_entry ENABLE_JEMALLOC BOOL false +append_cache_entry ENABLE_ZEEK_UNIT_TESTS BOOL true +append_cache_entry BUILD_SHARED_LIBS BOOL true +append_cache_entry INSTALL_AUX_TOOLS BOOL true +append_cache_entry INSTALL_BTEST BOOL true +append_cache_entry INSTALL_BTEST_PCAPS BOOL true +append_cache_entry INSTALL_ZEEK_ARCHIVER BOOL true +append_cache_entry INSTALL_ZEEKCTL BOOL true +append_cache_entry INSTALL_ZKG BOOL true append_cache_entry CPACK_SOURCE_IGNORE_FILES STRING -append_cache_entry ZEEK_SANITIZERS STRING "" +append_cache_entry ZEEK_SANITIZERS STRING "" append_cache_entry ZEEK_INCLUDE_PLUGINS STRING "" has_enable_mobile_ipv6=0 @@ -179,12 +180,12 @@ has_enable_mobile_ipv6=0 # parse arguments while [ $# -ne 0 ]; do case "$1" in - -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; + -*=*) optarg=$(echo "$1" | sed 's/[-_a-zA-Z0-9]*=//') ;; *) optarg= ;; esac case "$1" in - --help|-h) + --help | -h) echo "${usage}" 1>&2 exit 1 ;; @@ -198,110 +199,105 @@ while [ $# -ne 0 ]; do builddir=$optarg ;; --build-type=*) - append_cache_entry CMAKE_BUILD_TYPE STRING $optarg + append_cache_entry CMAKE_BUILD_TYPE STRING $optarg if [ $(echo "$optarg" | tr [:upper:] [:lower:]) = "debug" ]; then - append_cache_entry ENABLE_DEBUG BOOL true + append_cache_entry ENABLE_DEBUG BOOL true fi ;; --generator=*) CMakeGenerator="$optarg" ;; --ccache) - append_cache_entry ENABLE_CCACHE BOOL true + append_cache_entry ENABLE_CCACHE BOOL true ;; --toolchain=*) - append_cache_entry CMAKE_TOOLCHAIN_FILE PATH $optarg + append_cache_entry CMAKE_TOOLCHAIN_FILE PATH $optarg ;; --include-plugins=*) - append_cache_entry ZEEK_INCLUDE_PLUGINS STRING $optarg + append_cache_entry ZEEK_INCLUDE_PLUGINS STRING $optarg ;; --prefix=*) prefix=$optarg - append_cache_entry CMAKE_INSTALL_PREFIX PATH $optarg - append_cache_entry ZEEK_ROOT_DIR PATH $optarg + append_cache_entry CMAKE_INSTALL_PREFIX PATH $optarg + append_cache_entry ZEEK_ROOT_DIR PATH $optarg ;; --libdir=*) - append_cache_entry CMAKE_INSTALL_LIBDIR PATH $optarg + append_cache_entry CMAKE_INSTALL_LIBDIR PATH $optarg ;; --plugindir=*) - append_cache_entry ZEEK_PLUGIN_DIR PATH $optarg + append_cache_entry ZEEK_PLUGIN_DIR PATH $optarg ;; --python-dir=*) - append_cache_entry ZEEK_PYTHON_DIR PATH $optarg + append_cache_entry ZEEK_PYTHON_DIR PATH $optarg ;; --python-prefix=*) - append_cache_entry ZEEK_PYTHON_PREFIX PATH $optarg + append_cache_entry ZEEK_PYTHON_PREFIX PATH $optarg ;; --python-home=*) - append_cache_entry ZEEK_PYTHON_HOME PATH $optarg + append_cache_entry ZEEK_PYTHON_HOME PATH $optarg ;; --scriptdir=*) append_cache_entry ZEEK_SCRIPT_INSTALL_PATH STRING $optarg user_set_scriptdir="true" ;; --conf-files-dir=*) - append_cache_entry ZEEK_ETC_INSTALL_DIR PATH $optarg + append_cache_entry ZEEK_ETC_INSTALL_DIR PATH $optarg user_set_conffilesdir="true" ;; --localstatedir=*) - append_cache_entry ZEEK_LOCAL_STATE_DIR PATH $optarg + append_cache_entry ZEEK_LOCAL_STATE_DIR PATH $optarg ;; --spooldir=*) - append_cache_entry ZEEK_SPOOL_DIR PATH $optarg + append_cache_entry ZEEK_SPOOL_DIR PATH $optarg ;; --logdir=*) - append_cache_entry ZEEK_LOG_DIR PATH $optarg + append_cache_entry ZEEK_LOG_DIR PATH $optarg ;; --mandir=*) - append_cache_entry ZEEK_MAN_INSTALL_PATH PATH $optarg + append_cache_entry ZEEK_MAN_INSTALL_PATH PATH $optarg ;; --enable-coverage) - append_cache_entry ENABLE_COVERAGE BOOL true - append_cache_entry ENABLE_DEBUG BOOL true + append_cache_entry ENABLE_COVERAGE BOOL true + append_cache_entry ENABLE_DEBUG BOOL true + ;; + --enable-debug) + append_cache_entry ENABLE_DEBUG BOOL true ;; --enable-fuzzers) append_cache_entry ZEEK_ENABLE_FUZZERS BOOL true ;; - --enable-debug) - append_cache_entry ENABLE_DEBUG BOOL true + --enable-jemalloc) + append_cache_entry ENABLE_JEMALLOC BOOL true ;; --enable-mobile-ipv6) has_enable_mobile_ipv6=1 ;; --enable-perftools) - append_cache_entry ENABLE_PERFTOOLS BOOL true + append_cache_entry ENABLE_PERFTOOLS BOOL true ;; --enable-perftools-debug) - append_cache_entry ENABLE_PERFTOOLS BOOL true - append_cache_entry ENABLE_PERFTOOLS_DEBUG BOOL true - ;; - --sanitizers=*) - append_cache_entry ZEEK_SANITIZERS STRING $optarg - ;; - --enable-jemalloc) - append_cache_entry ENABLE_JEMALLOC BOOL true - ;; - --enable-static-broker) - append_cache_entry BUILD_STATIC_BROKER BOOL true + append_cache_entry ENABLE_PERFTOOLS BOOL true + append_cache_entry ENABLE_PERFTOOLS_DEBUG BOOL true ;; --enable-static-binpac) - append_cache_entry BUILD_STATIC_BINPAC BOOL true + append_cache_entry BUILD_STATIC_BINPAC BOOL true ;; - --enable-cpp-tests) - append_cache_entry ENABLE_ZEEK_UNIT_TESTS BOOL true + --enable-static-broker) + append_cache_entry BUILD_STATIC_BROKER BOOL true ;; --enable-zeek-client) - append_cache_entry INSTALL_ZEEK_CLIENT BOOL true - ;; - --disable-zeekctl) - append_cache_entry INSTALL_ZEEKCTL BOOL false - ;; - --disable-auxtools) - append_cache_entry INSTALL_AUX_TOOLS BOOL false + append_cache_entry INSTALL_ZEEK_CLIENT BOOL true ;; --disable-archiver) - append_cache_entry INSTALL_ZEEK_ARCHIVER BOOL false + append_cache_entry INSTALL_ZEEK_ARCHIVER BOOL false + ;; + --disable-auxtools) + append_cache_entry INSTALL_AUX_TOOLS BOOL false + ;; + --disable-broker-tests) + append_cache_entry BROKER_DISABLE_TESTS BOOL true + append_cache_entry BROKER_DISABLE_DOC_EXAMPLES BOOL true ;; --disable-btest) append_cache_entry INSTALL_BTEST BOOL false @@ -309,71 +305,76 @@ while [ $# -ne 0 ]; do --disable-btest-pcaps) append_cache_entry INSTALL_BTEST_PCAPS BOOL false ;; - --disable-python) - append_cache_entry DISABLE_PYTHON_BINDINGS BOOL true + --disable-cpp-tests) + append_cache_entry ENABLE_ZEEK_UNIT_TESTS BOOL false ;; - --disable-broker-tests) - append_cache_entry BROKER_DISABLE_TESTS BOOL true - append_cache_entry BROKER_DISABLE_DOC_EXAMPLES BOOL true + --disable-python) + append_cache_entry DISABLE_PYTHON_BINDINGS BOOL true + ;; + --disable-zeekctl) + append_cache_entry INSTALL_ZEEKCTL BOOL false ;; --disable-zkg) append_cache_entry INSTALL_ZKG BOOL false ;; - --with-openssl=*) - append_cache_entry OPENSSL_ROOT_DIR PATH $optarg + --with-bifcl=*) + append_cache_entry BIFCL_EXE_PATH PATH $optarg ;; --with-bind=*) append_cache_entry BIND_ROOT_DIR PATH $optarg ;; - --with-pcap=*) - append_cache_entry PCAP_ROOT_DIR PATH $optarg - ;; --with-binpac=*) - append_cache_entry BINPAC_EXE_PATH PATH $optarg - ;; - --with-bifcl=*) - append_cache_entry BIFCL_EXE_PATH PATH $optarg - ;; - --with-flex=*) - append_cache_entry FLEX_EXECUTABLE PATH $optarg + append_cache_entry BINPAC_EXE_PATH PATH $optarg ;; --with-bison=*) append_cache_entry BISON_EXECUTABLE PATH $optarg ;; + --with-broker=*) + append_cache_entry BROKER_ROOT_DIR PATH $optarg + ;; + --with-caf=*) + append_cache_entry CAF_ROOT PATH $optarg + ;; + --with-flex=*) + append_cache_entry FLEX_EXECUTABLE PATH $optarg + ;; --with-geoip=*) append_cache_entry LibMMDB_ROOT_DIR PATH $optarg ;; + --with-jemalloc=*) + append_cache_entry JEMALLOC_ROOT_DIR PATH $optarg + append_cache_entry ENABLE_JEMALLOC BOOL true + ;; --with-krb5=*) append_cache_entry LibKrb5_ROOT_DIR PATH $optarg ;; + --with-libkqueue=*) + append_cache_entry LIBKQUEUE_ROOT_DIR PATH $optarg + ;; + --with-pcap=*) + append_cache_entry PCAP_ROOT_DIR PATH $optarg + ;; --with-perftools=*) append_cache_entry GooglePerftools_ROOT_DIR PATH $optarg ;; - --with-jemalloc=*) - append_cache_entry JEMALLOC_ROOT_DIR PATH $optarg - append_cache_entry ENABLE_JEMALLOC BOOL true + --with-openssl=*) + append_cache_entry OPENSSL_ROOT_DIR PATH $optarg ;; --with-python=*) - append_cache_entry PYTHON_EXECUTABLE PATH $optarg - ;; - --with-python-lib=*) - append_cache_entry PYTHON_LIBRARY PATH $optarg + append_cache_entry PYTHON_EXECUTABLE PATH $optarg ;; --with-python-inc=*) - append_cache_entry PYTHON_INCLUDE_DIR PATH $optarg - append_cache_entry PYTHON_INCLUDE_PATH PATH $optarg + append_cache_entry PYTHON_INCLUDE_DIR PATH $optarg + append_cache_entry PYTHON_INCLUDE_PATH PATH $optarg + ;; + --with-python-lib=*) + append_cache_entry PYTHON_LIBRARY PATH $optarg ;; --with-swig=*) - append_cache_entry SWIG_EXECUTABLE PATH $optarg + append_cache_entry SWIG_EXECUTABLE PATH $optarg ;; - --with-broker=*) - append_cache_entry BROKER_ROOT_DIR PATH $optarg - ;; - --with-caf=*) - append_cache_entry CAF_ROOT PATH $optarg - ;; - --with-libkqueue=*) - append_cache_entry LIBKQUEUE_ROOT_DIR PATH $optarg + --sanitizers=*) + append_cache_entry ZEEK_SANITIZERS STRING $optarg ;; --binary-package) append_cache_entry BINARY_PACKAGING_MODE BOOL true @@ -400,15 +401,15 @@ done if [ -z "$CMakeCommand" ]; then # prefer cmake3 over "regular" cmake (cmake == cmake2 on RHEL) - if command -v cmake3 >/dev/null 2>&1 ; then + if command -v cmake3 >/dev/null 2>&1; then CMakeCommand="cmake3" - elif command -v cmake >/dev/null 2>&1 ; then + elif command -v cmake >/dev/null 2>&1; then CMakeCommand="cmake" else echo "This package requires CMake, please install it first." echo "Then you may use this script to configure the CMake build." echo "Note: pass --cmake=PATH to use cmake in non-standard locations." - exit 1; + exit 1 fi fi @@ -442,8 +443,8 @@ else "$CMakeCommand" $CMakeCacheEntries $sourcedir fi -echo "# This is the command used to configure this build" > config.status -echo $command >> config.status +echo "# This is the command used to configure this build" >config.status +echo $command >>config.status chmod u+x config.status if [ $has_enable_mobile_ipv6 -eq 1 ]; then diff --git a/doc b/doc index fefd7e6ceb..b8ae1f3362 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit fefd7e6ceb67dd011c268c658171967f1281b970 +Subproject commit b8ae1f336272371d6c46fda133e472a075f69e3d diff --git a/docker/btest/Makefile b/docker/btest/Makefile index 65d4d65c98..30d0348a72 100644 --- a/docker/btest/Makefile +++ b/docker/btest/Makefile @@ -1,11 +1,11 @@ DIAG=diag.log BTEST=../../auxil/btest/btest -all: cleanup btest-verbose +all: btest-verbose clean # Showing all tests. btest-verbose: @$(BTEST) -d -j -f $(DIAG) -cleanup: - @rm -f $(DIAG) +clean: + @rm -rf $(DIAG) .tmp .btest.failed.dat diff --git a/man/zeek.8 b/man/zeek.8 index 1a667f5630..7b6e046ed6 100644 --- a/man/zeek.8 +++ b/man/zeek.8 @@ -66,7 +66,7 @@ print version and exit print contents of state file .TP \fB\-C\fR,\ \-\-no\-checksums -ignore checksums +When this option is set, Zeek ignores invalid packet checksums and does process the packets. Furthermore, if this option is set Zeek also processes IP packets with a zero total length field, which is typically caused by TCP (TCP Segment Offloading) on the NIC. .TP \fB\-F\fR,\ \-\-force\-dns force DNS diff --git a/scripts/base/frameworks/analyzer/main.zeek b/scripts/base/frameworks/analyzer/main.zeek index 54ba82178d..18a8c97c52 100644 --- a/scripts/base/frameworks/analyzer/main.zeek +++ b/scripts/base/frameworks/analyzer/main.zeek @@ -9,6 +9,13 @@ ##! These tags are defined internally by ##! the analyzers themselves, and documented in their analyzer-specific ##! description along with the events that they generate. +##! +##! Analyzer tags are also inserted into a global :zeek:type:`AllAnalyzers::Tag` enum +##! type. This type contains duplicates of all of the :zeek:type:`Analyzer::Tag`, +##! :zeek:type:`PacketAnalyzer::Tag` and :zeek:type:`Files::Tag` enum values +##! and can be used for arguments to function/hook/event definitions where they +##! need to handle any analyzer type. See :zeek:id:`Analyzer::register_for_ports` +##! for an example. @load base/frameworks/packet-filter/utils @@ -66,13 +73,13 @@ export { ## tag: The tag of the analyzer. ## ## Returns: The set of ports. - global registered_ports: function(tag: Analyzer::Tag) : set[port]; + global registered_ports: function(tag: AllAnalyzers::Tag) : set[port]; ## Returns a table of all ports-to-analyzer mappings currently registered. ## ## Returns: A table mapping each analyzer to the set of ports ## registered for it. - global all_registered_ports: function() : table[Analyzer::Tag] of set[port]; + global all_registered_ports: function() : table[AllAnalyzers::Tag] of set[port]; ## Translates an analyzer type to a string with the analyzer's name. ## @@ -126,12 +133,16 @@ export { global disabled_analyzers: set[Analyzer::Tag] = { ANALYZER_TCPSTATS, } &redef; + + ## A table of ports mapped to analyzers that handle those ports. This is + ## used by BPF filtering and DPD. Session analyzers can add to this using + ## Analyzer::register_for_port(s) and packet analyzers can add to this + ## using PacketAnalyzer::register_for_port(s). + global ports: table[AllAnalyzers::Tag] of set[port]; } @load base/bif/analyzer.bif -global ports: table[Analyzer::Tag] of set[port]; - event zeek_init() &priority=5 { if ( disable_all ) @@ -176,22 +187,22 @@ function register_for_port(tag: Analyzer::Tag, p: port) : bool return T; } -function registered_ports(tag: Analyzer::Tag) : set[port] +function registered_ports(tag: AllAnalyzers::Tag) : set[port] { return tag in ports ? ports[tag] : set(); } -function all_registered_ports(): table[Analyzer::Tag] of set[port] +function all_registered_ports(): table[AllAnalyzers::Tag] of set[port] { return ports; } -function name(atype: Analyzer::Tag) : string +function name(atype: AllAnalyzers::Tag) : string { return __name(atype); } -function get_tag(name: string): Analyzer::Tag +function get_tag(name: string): AllAnalyzers::Tag { return __tag(name); } @@ -223,4 +234,3 @@ function get_bpf(): string } return output; } - diff --git a/scripts/base/frameworks/cluster/nodes/manager.zeek b/scripts/base/frameworks/cluster/nodes/manager.zeek index 53e3e52298..7504575dfc 100644 --- a/scripts/base/frameworks/cluster/nodes/manager.zeek +++ b/scripts/base/frameworks/cluster/nodes/manager.zeek @@ -2,7 +2,7 @@ ##! ##! The manager is passive (the workers connect to us), and once connected ##! the manager registers for the events on the workers that are needed -##! to get the desired data from the workers. This script will be +##! to get the desired data from the workers. This script will be ##! automatically loaded if necessary based on the type of node being started. ##! This is where the cluster manager sets it's specific settings for other diff --git a/scripts/base/frameworks/cluster/pools.zeek b/scripts/base/frameworks/cluster/pools.zeek index 1be7b336d8..8b2b9dee62 100644 --- a/scripts/base/frameworks/cluster/pools.zeek +++ b/scripts/base/frameworks/cluster/pools.zeek @@ -364,7 +364,7 @@ event zeek_init() &priority=-5 if ( manager_is_logger ) { local mgr = nodes_with_type(Cluster::MANAGER); - + if ( |mgr| > 0 ) { local eln = pool_eligibility[Cluster::LOGGER]$eligible_nodes; @@ -438,7 +438,7 @@ event zeek_init() &priority=-5 pet = pool_eligibility[pool$spec$node_type]; local nodes_to_init = |pet$eligible_nodes|; - + if ( pool$spec?$max_nodes && pool$spec$max_nodes < |pet$eligible_nodes| ) nodes_to_init = pool$spec$max_nodes; diff --git a/scripts/base/frameworks/dpd/main.zeek b/scripts/base/frameworks/dpd/main.zeek index 9424db3d5a..b00a600dc1 100644 --- a/scripts/base/frameworks/dpd/main.zeek +++ b/scripts/base/frameworks/dpd/main.zeek @@ -35,7 +35,7 @@ export { ## Number of protocol violations to tolerate before disabling an analyzer. option max_violations: table[Analyzer::Tag] of count = table() &default = 5; - ## Analyzers which you don't want to throw + ## Analyzers which you don't want to throw option ignore_violations: set[Analyzer::Tag] = set(); ## Ignore violations which go this many bytes into the connection. @@ -53,7 +53,7 @@ event zeek_init() &priority=5 Log::create_stream(DPD::LOG, [$columns=Info, $path="dpd", $policy=log_policy]); } -event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=10 +event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=10 { local analyzer = Analyzer::name(atype); @@ -63,7 +63,7 @@ event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &pr add c$service[analyzer]; } -event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, +event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, reason: string) &priority=10 { local analyzer = Analyzer::name(atype); @@ -85,7 +85,7 @@ event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, c$dpd = info; } -event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason: string) &priority=5 +event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, reason: string) &priority=5 { if ( atype in ignore_violations ) return; @@ -114,8 +114,8 @@ event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason } } -event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, - reason: string) &priority=-5 +event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, + reason: string) &priority=-5 { if ( c?$dpd ) { diff --git a/scripts/base/frameworks/files/magic/general.sig b/scripts/base/frameworks/files/magic/general.sig index d34c3d043d..f0e41018f6 100644 --- a/scripts/base/frameworks/files/magic/general.sig +++ b/scripts/base/frameworks/files/magic/general.sig @@ -252,7 +252,7 @@ signature file-mpqgame { file-magic /^MPQ\x1a/ } -# Blizzard CASC Format game file +# Blizzard CASC Format game file signature file-blizgame { file-mime "application/x-blizgame", 100 file-magic /^BLTE/ @@ -302,4 +302,3 @@ signature file-iso9660 { file-mime "application/x-iso9660-image", 99 file-magic /CD001/ } - diff --git a/scripts/base/frameworks/files/magic/office.sig b/scripts/base/frameworks/files/magic/office.sig index 3b3a264c24..b9563c0407 100644 --- a/scripts/base/frameworks/files/magic/office.sig +++ b/scripts/base/frameworks/files/magic/office.sig @@ -1,7 +1,6 @@ - # This signature is non-specific and terrible but after -# searching for a long time there doesn't seem to be a -# better option. +# searching for a long time there doesn't seem to be a +# better option. signature file-msword { file-magic /^\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1/ file-mime "application/msword", 50 diff --git a/scripts/base/frameworks/files/main.zeek b/scripts/base/frameworks/files/main.zeek index 10cc81ba56..5f799a26c4 100644 --- a/scripts/base/frameworks/files/main.zeek +++ b/scripts/base/frameworks/files/main.zeek @@ -104,7 +104,7 @@ export { missing_bytes: count &log &default=0; ## The number of bytes in the file stream that were not delivered to - ## stream file analyzers. This could be overlapping bytes or + ## stream file analyzers. This could be overlapping bytes or ## bytes that couldn't be reassembled. overflow_bytes: count &log &default=0; @@ -150,7 +150,7 @@ export { ## f: the file. global enable_reassembly: function(f: fa_file); - ## Disables the file reassembler on this file. If the file is not + ## Disables the file reassembler on this file. If the file is not ## transferred out of order this will have no effect. ## ## f: the file. @@ -266,7 +266,7 @@ export { }; ## Register callbacks for protocols that work with the Files framework. - ## The callbacks must uniquely identify a file and each protocol can + ## The callbacks must uniquely identify a file and each protocol can ## only have a single callback registered for it. ## ## tag: Tag for the protocol analyzer having a callback being registered. @@ -280,7 +280,7 @@ export { ## manipulation when they are being added to a file before the core code ## takes over. This is unlikely to be interesting for users and should ## only be called by file analyzer authors but is *not required*. - ## + ## ## tag: Tag for the file analyzer. ## ## callback: Function to execute when the given file analyzer is being added. diff --git a/scripts/base/frameworks/intel/main.zeek b/scripts/base/frameworks/intel/main.zeek index d88d0eb225..8464e7ebfe 100644 --- a/scripts/base/frameworks/intel/main.zeek +++ b/scripts/base/frameworks/intel/main.zeek @@ -49,7 +49,7 @@ export { ## A URL for more information about the data. url: string &optional; }; - + ## Represents a piece of intelligence. type Item: record { ## The intelligence indicator. @@ -57,12 +57,12 @@ export { ## The type of data that the indicator field represents. indicator_type: Type; - + ## Metadata for the item. Typically represents more deeply ## descriptive data for a piece of intelligence. meta: MetaData; }; - + ## Enum to represent where data came from when it was discovered. ## The convention is to prefix the name with ``IN_``. type Where: enum { @@ -158,8 +158,8 @@ export { global extend_match: hook(info: Info, s: Seen, items: set[Item]); ## The expiration timeout for intelligence items. Once an item expires, the - ## :zeek:id:`Intel::item_expired` hook is called. Reinsertion of an item - ## resets the timeout. A negative value disables expiration of intelligence + ## :zeek:id:`Intel::item_expired` hook is called. Reinsertion of an item + ## resets the timeout. A negative value disables expiration of intelligence ## items. const item_expiration = -1 min &redef; diff --git a/scripts/base/frameworks/logging/writers/ascii.zeek b/scripts/base/frameworks/logging/writers/ascii.zeek index ce37a32537..bbeb94c172 100644 --- a/scripts/base/frameworks/logging/writers/ascii.zeek +++ b/scripts/base/frameworks/logging/writers/ascii.zeek @@ -66,6 +66,11 @@ export { ## This option is also available as a per-filter ``$config`` option. const json_timestamps: JSON::TimestampFormat = JSON::TS_EPOCH &redef; + ## Handling of optional fields when writing out JSON. By default the + ## JSON formatter skips key and val when the field is absent. Setting + ## the following field to T includes the key, with a null value. + const json_include_unset_fields = F &redef; + ## If true, include lines with log meta information such as column names ## with types, the values of ASCII logging options that are in use, and ## the time when the file was opened and closed (the latter at the end). diff --git a/scripts/base/frameworks/netcontrol/plugin.zeek b/scripts/base/frameworks/netcontrol/plugin.zeek index 36d5a76173..8ecbef7911 100644 --- a/scripts/base/frameworks/netcontrol/plugin.zeek +++ b/scripts/base/frameworks/netcontrol/plugin.zeek @@ -41,7 +41,7 @@ export { name: function(state: PluginState) : string; ## If true, plugin can expire rules itself. If false, the NetControl - ## framework will manage rule expiration. + ## framework will manage rule expiration. can_expire: bool; ## One-time initialization function called when plugin gets registered, and diff --git a/scripts/base/frameworks/netcontrol/plugins/debug.zeek b/scripts/base/frameworks/netcontrol/plugins/debug.zeek index 479d934b6c..f159cda73f 100644 --- a/scripts/base/frameworks/netcontrol/plugins/debug.zeek +++ b/scripts/base/frameworks/netcontrol/plugins/debug.zeek @@ -46,7 +46,7 @@ function debug_add_rule(p: PluginState, r: Rule) : bool local s = fmt("add_rule: %s", r); debug_log(p, s); - if ( do_something(p) ) + if ( do_something(p) ) { event NetControl::rule_added(r, p); return T; @@ -76,12 +76,10 @@ global debug_plugin = Plugin( function create_debug(do_something: bool) : PluginState { local p: PluginState = [$plugin=debug_plugin]; - + # FIXME: Why's the default not working? p$config = table(); p$config["all"] = (do_something ? "1" : "0"); return p; } - - diff --git a/scripts/base/frameworks/netcontrol/plugins/packetfilter.zeek b/scripts/base/frameworks/netcontrol/plugins/packetfilter.zeek index 3648ed3955..ec3cc24247 100644 --- a/scripts/base/frameworks/netcontrol/plugins/packetfilter.zeek +++ b/scripts/base/frameworks/netcontrol/plugins/packetfilter.zeek @@ -1,7 +1,7 @@ ##! NetControl plugin for the process-level PacketFilter that comes with ##! Zeek. Since the PacketFilter in Zeek is quite limited in scope ##! and can only add/remove filters for addresses, this is quite -##! limited in scope at the moment. +##! limited in scope at the moment. @load ../plugin @@ -110,4 +110,3 @@ function create_packetfilter() : PluginState return p; } - diff --git a/scripts/base/frameworks/netcontrol/types.zeek b/scripts/base/frameworks/netcontrol/types.zeek index beac2302f6..731e8c38b0 100644 --- a/scripts/base/frameworks/netcontrol/types.zeek +++ b/scripts/base/frameworks/netcontrol/types.zeek @@ -1,7 +1,7 @@ ##! This file defines the types that are used by the NetControl framework. ##! ##! The most important type defined in this file is :zeek:see:`NetControl::Rule`, -##! which is used to describe all rules that can be expressed by the NetControl framework. +##! which is used to describe all rules that can be expressed by the NetControl framework. module NetControl; diff --git a/scripts/base/frameworks/notice/actions/add-geodata.zeek b/scripts/base/frameworks/notice/actions/add-geodata.zeek index 04cc10209d..6f07a9dd31 100644 --- a/scripts/base/frameworks/notice/actions/add-geodata.zeek +++ b/scripts/base/frameworks/notice/actions/add-geodata.zeek @@ -1,6 +1,6 @@ ##! This script adds geographic location data to notices for the "remote" -##! host in a connection. It does make the assumption that one of the -##! addresses in a connection is "local" and one is "remote" which is +##! host in a connection. It does make the assumption that one of the +##! addresses in a connection is "local" and one is "remote" which is ##! probably a safe assumption to make in most cases. If both addresses ##! are remote, it will use the $src address. @@ -17,13 +17,13 @@ export { ## in order for this to work. ACTION_ADD_GEODATA }; - + redef record Info += { ## If GeoIP support is built in, notices can have geographic ## information attached to them. remote_location: geo_location &log &optional; }; - + ## Notice types which should have the "remote" location looked up. ## If GeoIP support is not built in, this does nothing. option lookup_location_types: set[Notice::Type] = {}; @@ -35,7 +35,7 @@ hook policy(n: Notice::Info) &priority=10 add n$actions[ACTION_ADD_GEODATA]; } -# This is handled at a high priority in case other notice handlers +# This is handled at a high priority in case other notice handlers # want to use the data. hook notice(n: Notice::Info) &priority=10 { diff --git a/scripts/base/frameworks/notice/actions/email_admin.zeek b/scripts/base/frameworks/notice/actions/email_admin.zeek index 6ba5937bb7..d9abeea41e 100644 --- a/scripts/base/frameworks/notice/actions/email_admin.zeek +++ b/scripts/base/frameworks/notice/actions/email_admin.zeek @@ -10,9 +10,9 @@ module Notice; export { redef enum Action += { - ## Indicate that the generated email should be addressed to the + ## Indicate that the generated email should be addressed to the ## appropriate email addresses as found by the - ## :zeek:id:`Site::get_emails` function based on the relevant + ## :zeek:id:`Site::get_emails` function based on the relevant ## address or addresses indicated in the notice. ACTION_EMAIL_ADMIN }; @@ -23,7 +23,6 @@ hook notice(n: Notice::Info) if ( |Site::local_admins| > 0 && ACTION_EMAIL_ADMIN in n$actions ) { - local email = ""; if ( n?$src && |Site::get_emails(n$src)| > 0 ) add n$email_dest[Site::get_emails(n$src)]; if ( n?$dst && |Site::get_emails(n$dst)| > 0 ) diff --git a/scripts/base/frameworks/openflow/cluster.zeek b/scripts/base/frameworks/openflow/cluster.zeek index 6ff005b877..0ab56ca293 100644 --- a/scripts/base/frameworks/openflow/cluster.zeek +++ b/scripts/base/frameworks/openflow/cluster.zeek @@ -112,12 +112,12 @@ function lookup_controller(name: string): vector of Controller if ( Cluster::local_node_type() != Cluster::MANAGER ) return vector(); - # I am not quite sure if we can actually get away with this - in the + # I am not quite sure if we can actually get away with this - in the # current state, this means that the individual nodes cannot lookup # a controller by name. # # This means that there can be no reactions to things on the actual - # worker nodes - because they cannot look up a name. On the other hand - + # worker nodes - because they cannot look up a name. On the other hand - # currently we also do not even send the events to the worker nodes (at least # not if we are using broker). Because of that I am not really feeling that # badly about it... diff --git a/scripts/base/frameworks/signatures/main.zeek b/scripts/base/frameworks/signatures/main.zeek index b235cba312..a610afccf5 100644 --- a/scripts/base/frameworks/signatures/main.zeek +++ b/scripts/base/frameworks/signatures/main.zeek @@ -60,7 +60,7 @@ export { SIG_ALARM_PER_ORIG, ## Alarm once and then never again. SIG_ALARM_ONCE, - ## Count signatures per responder host and alarm with the + ## Count signatures per responder host and alarm with the ## :zeek:enum:`Signatures::Count_Signature` notice if a threshold ## defined by :zeek:id:`Signatures::count_thresholds` is reached. SIG_COUNT_PER_RESP, @@ -100,15 +100,15 @@ export { ## Number of hosts, from a summary count. host_count: count &log &optional; }; - - ## Actions for a signature. + + ## Actions for a signature. const actions: table[string] of Action = { ["unspecified"] = SIG_IGNORE, # place-holder } &redef &default = SIG_ALARM; ## Signature IDs that should always be ignored. option ignored_ids = /NO_DEFAULT_MATCHES/; - + ## Generate a notice if, for a pair [orig, signature], the number of ## different responders has reached one of the thresholds. const horiz_scan_thresholds = { 5, 10, 50, 100, 500, 1000 } &redef; @@ -120,7 +120,7 @@ export { ## Generate a notice if a :zeek:enum:`Signatures::SIG_COUNT_PER_RESP` ## signature is triggered as often as given by one of these thresholds. const count_thresholds = { 5, 10, 50, 100, 500, 1000, 10000, 1000000, } &redef; - + ## The interval between when :zeek:enum:`Signatures::Signature_Summary` ## notices are generated. option summary_interval = 1 day; @@ -147,7 +147,7 @@ event zeek_init() &priority=5 { Log::create_stream(Signatures::LOG, [$columns=Info, $ev=log_signature, $path="signatures", $policy=log_policy]); } - + # Returns true if the given signature has already been triggered for the given # [orig, resp] pair. function has_signature_matched(id: string, orig: addr, resp: addr): bool @@ -173,7 +173,7 @@ event signature_match(state: signature_state, msg: string, data: string) # Trim the matched data down to something reasonable if ( |data| > 140 ) data = fmt("%s...", sub_bytes(data, 0, 140)); - + local src_addr: addr; local src_port: port; local dst_addr: addr; @@ -212,7 +212,7 @@ event signature_match(state: signature_state, msg: string, data: string) local notice = F; if ( action == SIG_ALARM ) notice = T; - + if ( action == SIG_COUNT_PER_RESP ) { local dst = state$conn$id$resp_h; @@ -252,7 +252,7 @@ event signature_match(state: signature_state, msg: string, data: string) $conn=state$conn, $src=src_addr, $dst=dst_addr, $msg=fmt("%s: %s", src_addr, msg), $sub=data]); - + if ( action == SIG_FILE_BUT_NO_SCAN || action == SIG_SUMMARY ) return; @@ -279,7 +279,7 @@ event signature_match(state: signature_state, msg: string, data: string) fmt("%s has triggered signature %s on %d hosts", orig, sig_id, hcount); - Log::write(Signatures::LOG, + Log::write(Signatures::LOG, [$ts=network_time(), $note=Multiple_Sig_Responders, $src_addr=orig, $sig_id=sig_id, $event_msg=msg, $host_count=hcount, $sub_msg=horz_scan_msg]); @@ -296,9 +296,9 @@ event signature_match(state: signature_state, msg: string, data: string) fmt("%s has triggered %d different signatures on host %s", orig, vcount, resp); - Log::write(Signatures::LOG, + Log::write(Signatures::LOG, [$ts=network_time(), - $note=Multiple_Signatures, + $note=Multiple_Signatures, $src_addr=orig, $dst_addr=resp, $sig_id=sig_id, $sig_count=vcount, $event_msg=fmt("%s different signatures triggered", vcount), @@ -311,4 +311,3 @@ event signature_match(state: signature_state, msg: string, data: string) last_vthresh[orig] = vcount; } } - diff --git a/scripts/base/frameworks/software/main.zeek b/scripts/base/frameworks/software/main.zeek index 5704ee98b9..9fed88668b 100644 --- a/scripts/base/frameworks/software/main.zeek +++ b/scripts/base/frameworks/software/main.zeek @@ -13,18 +13,18 @@ module Software; export { ## The software logging stream identifier. redef enum Log::ID += { LOG }; - + ## A default logging policy hook for the stream. global log_policy: Log::PolicyHook; ## Scripts detecting new types of software need to redef this enum to add - ## their own specific software types which would then be used when they + ## their own specific software types which would then be used when they ## create :zeek:type:`Software::Info` records. type Type: enum { ## A placeholder type for when the type of software is not known. UNKNOWN, }; - + ## A structure to represent the numeric version of software. type Version: record { ## Major version number. @@ -38,7 +38,7 @@ export { ## Additional version string (e.g. "beta42"). addl: string &optional; } &log; - + ## The record type that is used for representing and logging software. type Info: record { ## The time at which the software was detected. @@ -58,9 +58,9 @@ export { ## parsing doesn't always work reliably in all cases and this ## acts as a fallback in the logs. unparsed_version: string &log &optional; - + ## This can indicate that this software being detected should - ## definitely be sent onward to the logging framework. By + ## definitely be sent onward to the logging framework. By ## default, only software that is "interesting" due to a change ## in version or it being currently unknown is sent to the ## logging framework. This can be set to T to force the record @@ -68,7 +68,7 @@ export { ## tracking needs to happen in a specific way to the software. force_log: bool &default=F; }; - + ## Hosts whose software should be detected and tracked. ## Choices are: LOCAL_HOSTS, REMOTE_HOSTS, ALL_HOSTS, NO_HOSTS. option asset_tracking = LOCAL_HOSTS; @@ -78,21 +78,21 @@ export { ## id: The connection id where the software was discovered. ## ## info: A record representing the software discovered. - ## + ## ## Returns: T if the software was logged, F otherwise. global found: function(id: conn_id, info: Info): bool; - + ## Compare two version records. - ## + ## ## Returns: -1 for v1 < v2, 0 for v1 == v2, 1 for v1 > v2. ## If the numerical version numbers match, the *addl* string ## is compared lexicographically. global cmp_versions: function(v1: Version, v2: Version): int; - - ## Sometimes software will expose itself on the network with - ## slight naming variations. This table provides a mechanism - ## for a piece of software to be renamed to a single name - ## even if it exposes itself with an alternate name. The + + ## Sometimes software will expose itself on the network with + ## slight naming variations. This table provides a mechanism + ## for a piece of software to be renamed to a single name + ## even if it exposes itself with an alternate name. The ## yielded string is the name that will be logged and generally ## used for everything. global alternate_names: table[string] of string { @@ -100,17 +100,17 @@ export { } &default=function(a: string): string { return a; }; ## Type to represent a collection of :zeek:type:`Software::Info` records. - ## It's indexed with the name of a piece of software such as "Firefox" + ## It's indexed with the name of a piece of software such as "Firefox" ## and it yields a :zeek:type:`Software::Info` record with more ## information about the software. type SoftwareSet: table[string] of Info; - + ## The set of software associated with an address. Data expires from - ## this table after one day by default so that a detected piece of + ## this table after one day by default so that a detected piece of ## software will be logged once each day. In a cluster, this table is ## uniformly distributed among proxy nodes. global tracked: table[addr] of SoftwareSet &create_expire=1day; - + ## This event can be handled to access the :zeek:type:`Software::Info` ## record as it is sent on to the logging framework. global log_software: event(rec: Info); @@ -128,7 +128,7 @@ event zeek_init() &priority=5 { Log::create_stream(Software::LOG, [$columns=Info, $ev=log_software, $path="software", $policy=log_policy]); } - + type Description: record { name: string; version: Version; @@ -138,13 +138,13 @@ type Description: record { # Defining this here because of a circular dependency between two functions. global parse_mozilla: function(unparsed_version: string): Description; -# Don't even try to understand this now, just make sure the tests are +# Don't even try to understand this now, just make sure the tests are # working. function parse(unparsed_version: string): Description { local software_name = ""; local v: Version; - + # Parse browser-alike versions separately if ( /^(Mozilla|Opera)\/[0-9]+\./ in unparsed_version ) { @@ -220,10 +220,10 @@ function parse(unparsed_version: string): Description { v$addl = strip(version_parts[2]); } - + } } - + if ( 3 in version_numbers && version_numbers[3] != "" ) v$minor3 = extract_count(version_numbers[3]); if ( 2 in version_numbers && version_numbers[2] != "" ) @@ -234,7 +234,7 @@ function parse(unparsed_version: string): Description v$major = extract_count(version_numbers[0]); } } - + return [$version=v, $unparsed_version=unparsed_version, $name=alternate_names[software_name]]; } @@ -245,7 +245,7 @@ function parse_with_cache(unparsed_version: string): Description { if (unparsed_version in parse_cache) return parse_cache[unparsed_version]; - + local res = parse(unparsed_version); parse_cache[unparsed_version] = res; return res; @@ -256,7 +256,7 @@ function parse_mozilla(unparsed_version: string): Description local software_name = ""; local v: Version; local parts: string_vec; - + if ( /Opera [0-9\.]*$/ in unparsed_version ) { software_name = "Opera"; @@ -349,7 +349,7 @@ function parse_mozilla(unparsed_version: string): Description if ( 2 in parts ) v = parse(parts[2])$version; } - + else if ( /AdobeAIR\/[0-9\.]*/ in unparsed_version ) { software_name = "AdobeAIR"; @@ -392,7 +392,7 @@ function cmp_versions(v1: Version, v2: Version): int else return v1?$major ? 1 : -1; } - + if ( v1?$minor && v2?$minor ) { if ( v1$minor < v2$minor ) @@ -407,7 +407,7 @@ function cmp_versions(v1: Version, v2: Version): int else return v1?$minor ? 1 : -1; } - + if ( v1?$minor2 && v2?$minor2 ) { if ( v1$minor2 < v2$minor2 ) @@ -462,7 +462,7 @@ function software_endpoint_name(id: conn_id, host: addr): string # Convert a version into a string "a.b.c-x". function software_fmt_version(v: Version): string { - return fmt("%s%s%s%s%s", + return fmt("%s%s%s%s%s", v?$major ? fmt("%d", v$major) : "0", v?$minor ? fmt(".%d", v$minor) : "", v?$minor2 ? fmt(".%d", v$minor2) : "", @@ -510,10 +510,10 @@ event Software::register(info: Info) local changed = cmp_versions(old$version, info$version) != 0; if ( changed ) - event Software::version_change(old, info); + event Software::version_change(old, info); else if ( ! info$force_log ) # If the version hasn't changed, then we're just redetecting the - # same thing, then we don't care. + # same thing, then we don't care. return; } @@ -526,7 +526,7 @@ function found(id: conn_id, info: Info): bool if ( ! info$force_log && ! addr_matches_host(info$host, asset_tracking) ) return F; - if ( ! info?$ts ) + if ( ! info?$ts ) info$ts = network_time(); if ( info?$version ) diff --git a/scripts/base/frameworks/sumstats/cluster.zeek b/scripts/base/frameworks/sumstats/cluster.zeek index 2296a4e38c..f055355170 100644 --- a/scripts/base/frameworks/sumstats/cluster.zeek +++ b/scripts/base/frameworks/sumstats/cluster.zeek @@ -220,7 +220,7 @@ event zeek_init() &priority=100 # This variable is maintained by manager nodes as they collect and aggregate # results. # Index on a uid. -global stats_keys: table[string] of set[Key] &read_expire=1min +global stats_keys: table[string] of set[Key] &read_expire=1min &expire_func=function(s: table[string] of set[Key], idx: string): interval { Reporter::warning(fmt("SumStat key request for the %s SumStat uid took longer than 1 minute and was automatically cancelled.", idx)); diff --git a/scripts/base/frameworks/sumstats/main.zeek b/scripts/base/frameworks/sumstats/main.zeek index 930125f6c8..8f8638b57b 100644 --- a/scripts/base/frameworks/sumstats/main.zeek +++ b/scripts/base/frameworks/sumstats/main.zeek @@ -510,7 +510,7 @@ function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: dou return F; # Add in the extra ResultVals to make threshold_vals easier to write. - # This length comparison should work because we just need to make + # This length comparison should work because we just need to make # sure that we have the same number of reducers and results. if ( |ss$reducers| != |result| ) { @@ -568,4 +568,3 @@ function threshold_crossed(ss: SumStat, key: Key, result: Result) ss$threshold_crossed(key, result); } - diff --git a/scripts/base/frameworks/sumstats/plugins/sample.zeek b/scripts/base/frameworks/sumstats/plugins/sample.zeek index 2f96c5eb30..d5d236f43f 100644 --- a/scripts/base/frameworks/sumstats/plugins/sample.zeek +++ b/scripts/base/frameworks/sumstats/plugins/sample.zeek @@ -95,7 +95,7 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) { local other_vector: vector of Observation; local othercount: count; - + if ( rv1$sample_elements > rv2$sample_elements ) { result$samples = copy(rv1$samples); diff --git a/scripts/base/frameworks/sumstats/plugins/unique.zeek b/scripts/base/frameworks/sumstats/plugins/unique.zeek index 5fcaa1dc3c..069522effb 100644 --- a/scripts/base/frameworks/sumstats/plugins/unique.zeek +++ b/scripts/base/frameworks/sumstats/plugins/unique.zeek @@ -46,7 +46,7 @@ hook register_observe_plugins() if ( ! r?$unique_max || |rv$unique_vals| <= r$unique_max ) add rv$unique_vals[obs]; - + rv$unique = |rv$unique_vals|; }); } diff --git a/scripts/base/frameworks/tunnels/main.zeek b/scripts/base/frameworks/tunnels/main.zeek index 688d1d7f67..3c4e8adf3d 100644 --- a/scripts/base/frameworks/tunnels/main.zeek +++ b/scripts/base/frameworks/tunnels/main.zeek @@ -90,20 +90,9 @@ export { global finalize_tunnel: Conn::RemovalHook; } -const ayiya_ports = { 5072/udp }; -const teredo_ports = { 3544/udp }; -const gtpv1_ports = { 2152/udp, 2123/udp }; -redef likely_server_ports += { ayiya_ports, teredo_ports, gtpv1_ports, vxlan_ports, geneve_ports }; - event zeek_init() &priority=5 { Log::create_stream(Tunnel::LOG, [$columns=Info, $path="tunnel", $policy=log_policy]); - - Analyzer::register_for_ports(Analyzer::ANALYZER_AYIYA, ayiya_ports); - Analyzer::register_for_ports(Analyzer::ANALYZER_TEREDO, teredo_ports); - Analyzer::register_for_ports(Analyzer::ANALYZER_GTPV1, gtpv1_ports); - Analyzer::register_for_ports(Analyzer::ANALYZER_VXLAN, vxlan_ports); - Analyzer::register_for_ports(Analyzer::ANALYZER_GENEVE, geneve_ports); } function register_all(ecv: EncapsulatingConnVector) diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index dc6d8ce802..9e102ed6fc 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -1016,9 +1016,16 @@ const TCP_RESET = 6; ##< Endpoint has sent RST. const UDP_INACTIVE = 0; ##< Endpoint is still inactive. const UDP_ACTIVE = 1; ##< Endpoint has sent something. -## If true, don't verify checksums. Useful for running on altered trace -## files, and for saving a few cycles, but at the risk of analyzing invalid -## data. Note that the ``-C`` command-line option overrides the setting of this +## If true, don't verify checksums, and accept packets that give a length of +## zero in the IPv4 header. This is useful when running against traces of local +## traffic and the NIC checksum offloading feature is enabled. It can also +## be useful for running on altered trace files, and for saving a few cycles +## at the risk of analyzing invalid data. +## With this option, packets that have a value of zero in the total-length field +## of the IPv4 header are also accepted, and the capture-length is used instead. +## The total-length field is commonly set to zero when the NIC sequence offloading +## feature is enabled. +## Note that the ``-C`` command-line option overrides the setting of this ## variable. const ignore_checksums = F &redef; @@ -3884,6 +3891,14 @@ type dns_loc_rr: record { is_query: count; ##< The RR is a query/Response. }; +## DNS SVCB and HTTPS RRs +## +## .. zeek:see:: dns_SVCB dns_HTTPS +type dns_svcb_rr: record { + svc_priority: count; ##< Service priority for the current record, 0 indicates that this record is in AliasMode and cannot carry svc_params; otherwise this is in ServiceMode, and may include svc_params + target_name: string; ##< Target name, the hostname of the service endpoint. +}; + # DNS answer types. # # .. zeek:see:: dns_answerr @@ -5021,14 +5036,14 @@ export { ## With this set, the Teredo analyzer waits until it sees both sides ## of a connection using a valid Teredo encapsulation before issuing - ## a :zeek:see:`protocol_confirmation`. If it's false, the first + ## a :zeek:see:`analyzer_confirmation`. If it's false, the first ## occurrence of a packet with valid Teredo encapsulation causes a ## confirmation. const delay_teredo_confirmation = T &redef; ## With this set, the GTP analyzer waits until the most-recent upflow ## and downflow packets are a valid GTPv1 encapsulation before - ## issuing :zeek:see:`protocol_confirmation`. If it's false, the + ## issuing :zeek:see:`analyzer_confirmation`. If it's false, the ## first occurrence of a packet with valid GTPv1 encapsulation causes ## confirmation. Since the same inner connection can be carried ## differing outer upflow/downflow connections, setting to false @@ -5045,17 +5060,6 @@ export { ## may choose whether to perform the validation. const validate_vxlan_checksums = T &redef; - ## The set of UDP ports used for VXLAN traffic. Traffic using this - ## UDP destination port will attempt to be decapsulated. Note that if - ## if you customize this, you may still want to manually ensure that - ## :zeek:see:`likely_server_ports` also gets populated accordingly. - const vxlan_ports: set[port] = { 4789/udp } &redef; - - ## The set of UDP ports used for Geneve traffic. Traffic using this - ## UDP destination port will attempt to be decapsulated. Note that if - ## if you customize this, you may still want to manually ensure that - ## :zeek:see:`likely_server_ports` also gets populated accordingly. - const geneve_ports: set[port] = { 6081/udp } &redef; } # end export module Reporter; diff --git a/scripts/base/packet-protocols/__load__.zeek b/scripts/base/packet-protocols/__load__.zeek index 3a4d9209cb..5ea4cb93ef 100644 --- a/scripts/base/packet-protocols/__load__.zeek +++ b/scripts/base/packet-protocols/__load__.zeek @@ -1,3 +1,5 @@ +@load ./main.zeek + @load base/packet-protocols/root @load base/packet-protocols/ip @load base/packet-protocols/skip @@ -12,9 +14,15 @@ @load base/packet-protocols/pppoe @load base/packet-protocols/vlan @load base/packet-protocols/mpls -@load base/packet-protocols/gre -@load base/packet-protocols/iptunnel @load base/packet-protocols/vntag @load base/packet-protocols/udp @load base/packet-protocols/tcp @load base/packet-protocols/icmp + +@load base/packet-protocols/gre +@load base/packet-protocols/iptunnel +@load base/packet-protocols/ayiya +@load base/packet-protocols/geneve +@load base/packet-protocols/vxlan +@load base/packet-protocols/teredo +@load base/packet-protocols/gtpv1 diff --git a/scripts/base/packet-protocols/ayiya/__load__.zeek b/scripts/base/packet-protocols/ayiya/__load__.zeek new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/packet-protocols/ayiya/__load__.zeek @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/packet-protocols/ayiya/main.zeek b/scripts/base/packet-protocols/ayiya/main.zeek new file mode 100644 index 0000000000..d6fab5a44b --- /dev/null +++ b/scripts/base/packet-protocols/ayiya/main.zeek @@ -0,0 +1,19 @@ +module PacketAnalyzer::AYIYA; + +# Needed for port registration for BPF +@load base/frameworks/analyzer/main + +const IPPROTO_IPV4 : count = 4; +const IPPROTO_IPV6 : count = 41; + +const ayiya_ports = { 5072/udp }; +redef likely_server_ports += { ayiya_ports }; + +event zeek_init() &priority=20 + { + PacketAnalyzer::register_protocol_detection(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_AYIYA); + PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_AYIYA, IPPROTO_IPV4, PacketAnalyzer::ANALYZER_IP); + PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_AYIYA, IPPROTO_IPV6, PacketAnalyzer::ANALYZER_IP); + + PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_AYIYA, ayiya_ports); + } diff --git a/scripts/base/packet-protocols/geneve/__load__.zeek b/scripts/base/packet-protocols/geneve/__load__.zeek new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/packet-protocols/geneve/__load__.zeek @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/packet-protocols/geneve/main.zeek b/scripts/base/packet-protocols/geneve/main.zeek new file mode 100644 index 0000000000..64efe9ce66 --- /dev/null +++ b/scripts/base/packet-protocols/geneve/main.zeek @@ -0,0 +1,27 @@ +module PacketAnalyzer::Geneve; + +export { + ## The set of UDP ports used for Geneve traffic. Traffic using this + ## UDP destination port will attempt to be decapsulated. Note that if + ## if you customize this, you may still want to manually ensure that + ## :zeek:see:`likely_server_ports` also gets populated accordingly. + const geneve_ports: set[port] = { 6081/udp } &redef; +} + +redef likely_server_ports += { geneve_ports }; + +event zeek_init() &priority=20 + { + PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_GENEVE, geneve_ports); + + # This is defined by IANA as being "Trans Ether Bridging" but the Geneve RFC + # says to use it for Ethernet. See + # https://datatracker.ietf.org/doc/html/draft-gross-geneve-00#section-3.4 + # for details. + PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x6558, PacketAnalyzer::ANALYZER_ETHERNET); + + # Some additional mappings for protocols that we already handle natively. + PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x0800, PacketAnalyzer::ANALYZER_IP); + PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x08DD, PacketAnalyzer::ANALYZER_IP); + PacketAnalyzer::register_packet_analyzer(PacketAnalyzer::ANALYZER_GENEVE, 0x0806, PacketAnalyzer::ANALYZER_ARP); + } diff --git a/scripts/base/packet-protocols/gtpv1/__load__.zeek b/scripts/base/packet-protocols/gtpv1/__load__.zeek new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/packet-protocols/gtpv1/__load__.zeek @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/packet-protocols/gtpv1/main.zeek b/scripts/base/packet-protocols/gtpv1/main.zeek new file mode 100644 index 0000000000..052efa5f58 --- /dev/null +++ b/scripts/base/packet-protocols/gtpv1/main.zeek @@ -0,0 +1,28 @@ +module PacketAnalyzer::GTPV1; + +# This needs to be loaded here so the function is available. Function BIFs normally aren't +# loaded until after the packet analysis init scripts are run, and then zeek complains it +# can't find the function. +@load base/bif/plugins/Zeek_GTPv1.functions.bif + +# Needed for port registration for BPF +@load base/frameworks/analyzer/main + +export { + ## Default analyzer + const default_analyzer: PacketAnalyzer::Tag = PacketAnalyzer::ANALYZER_IP &redef; +} + +const gtpv1_ports = { 2152/udp, 2123/udp }; +redef likely_server_ports += { gtpv1_ports }; + +event zeek_init() &priority=20 + { + PacketAnalyzer::register_protocol_detection(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_GTPV1); + PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_GTPV1, gtpv1_ports); + } + +event connection_state_remove(c: connection) + { + remove_gtpv1_connection(c$id); + } diff --git a/scripts/base/packet-protocols/main.zeek b/scripts/base/packet-protocols/main.zeek new file mode 100644 index 0000000000..e696da2556 --- /dev/null +++ b/scripts/base/packet-protocols/main.zeek @@ -0,0 +1,61 @@ +module PacketAnalyzer; + +@load base/frameworks/analyzer/main.zeek + +export { + ## Registers a set of well-known ports for an analyzer. If a future + ## connection on one of these ports is seen, the analyzer will be + ## automatically assigned to parsing it. The function *adds* to all ports + ## already registered, it doesn't replace them. + ## + ## tag: The tag of the analyzer. + ## + ## ports: The set of well-known ports to associate with the analyzer. + ## + ## Returns: True if the ports were successfully registered. + global register_for_ports: function(parent: PacketAnalyzer::Tag, + child: PacketAnalyzer::Tag, + ports: set[port]) : bool; + + ## Registers an individual well-known port for an analyzer. If a future + ## connection on this port is seen, the analyzer will be automatically + ## assigned to parsing it. The function *adds* to all ports already + ## registered, it doesn't replace them. + ## + ## tag: The tag of the analyzer. + ## + ## p: The well-known port to associate with the analyzer. + ## + ## Returns: True if the port was successfully registered. + global register_for_port: function(parent: PacketAnalyzer::Tag, + child: PacketAnalyzer::Tag, + p: port) : bool; +} + +function register_for_ports(parent: PacketAnalyzer::Tag, + child: PacketAnalyzer::Tag, + ports: set[port]) : bool + { + local rc = T; + + for ( p in ports ) + { + if ( ! register_for_port(parent, child, p) ) + rc = F; + } + + return rc; + } + +function register_for_port(parent: PacketAnalyzer::Tag, + child: PacketAnalyzer::Tag, + p: port) : bool + { + register_packet_analyzer(parent, port_to_count(p), child); + + if ( child !in Analyzer::ports ) + Analyzer::ports[child] = set(); + + add Analyzer::ports[child][p]; + return T; + } diff --git a/scripts/base/packet-protocols/teredo/__load__.zeek b/scripts/base/packet-protocols/teredo/__load__.zeek new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/packet-protocols/teredo/__load__.zeek @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/packet-protocols/teredo/main.zeek b/scripts/base/packet-protocols/teredo/main.zeek new file mode 100644 index 0000000000..5bba5c9243 --- /dev/null +++ b/scripts/base/packet-protocols/teredo/main.zeek @@ -0,0 +1,28 @@ +module PacketAnalyzer::TEREDO; + +# This needs to be loaded here so the functions are available. Function BIFs normally aren't +# loaded until after the packet analysis init scripts are run, and then zeek complains it +# can't find the function. +@load base/bif/plugins/Zeek_Teredo.functions.bif + +# Needed for port registration for BPF +@load base/frameworks/analyzer/main + +export { + ## Default analyzer + const default_analyzer: PacketAnalyzer::Tag = PacketAnalyzer::ANALYZER_IP &redef; +} + +const teredo_ports = { 3544/udp }; +redef likely_server_ports += { teredo_ports }; + +event zeek_init() &priority=20 + { + PacketAnalyzer::register_protocol_detection(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_TEREDO); + PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_TEREDO, teredo_ports); + } + +event connection_state_remove(c: connection) + { + remove_teredo_connection(c$id); + } diff --git a/scripts/base/packet-protocols/vxlan/__load__.zeek b/scripts/base/packet-protocols/vxlan/__load__.zeek new file mode 100644 index 0000000000..d551be57d3 --- /dev/null +++ b/scripts/base/packet-protocols/vxlan/__load__.zeek @@ -0,0 +1 @@ +@load ./main \ No newline at end of file diff --git a/scripts/base/packet-protocols/vxlan/main.zeek b/scripts/base/packet-protocols/vxlan/main.zeek new file mode 100644 index 0000000000..83bde18c2b --- /dev/null +++ b/scripts/base/packet-protocols/vxlan/main.zeek @@ -0,0 +1,20 @@ +module PacketAnalyzer::VXLAN; + +export { + # There's no indicator in the VXLAN packet header format about what the next protocol + # in the chain is. All of the documentation just lists Ethernet, so default to that. + const default_analyzer: PacketAnalyzer::Tag = PacketAnalyzer::ANALYZER_ETHERNET &redef; + + ## The set of UDP ports used for VXLAN traffic. Traffic using this + ## UDP destination port will attempt to be decapsulated. Note that if + ## if you customize this, you may still want to manually ensure that + ## :zeek:see:`likely_server_ports` also gets populated accordingly. + const vxlan_ports: set[port] = { 4789/udp } &redef; +} + +redef likely_server_ports += { vxlan_ports }; + +event zeek_init() &priority=20 + { + PacketAnalyzer::register_for_ports(PacketAnalyzer::ANALYZER_UDP, PacketAnalyzer::ANALYZER_VXLAN, vxlan_ports); + } diff --git a/scripts/base/protocols/conn/contents.zeek b/scripts/base/protocols/conn/contents.zeek index ea689c6350..f3e64b00b1 100644 --- a/scripts/base/protocols/conn/contents.zeek +++ b/scripts/base/protocols/conn/contents.zeek @@ -1,5 +1,5 @@ -##! This script can be used to extract either the originator's data or the -##! responders data or both. By default nothing is extracted, and in order +##! This script can be used to extract either the originator's data or the +##! responders data or both. By default nothing is extracted, and in order ##! to actually extract data the ``c$extract_orig`` and/or the ##! ``c$extract_resp`` variable must be set to ``T``. One way to achieve this ##! would be to handle the :zeek:id:`connection_established` event elsewhere @@ -19,7 +19,7 @@ export { ## The prefix given to files containing extracted connections as they ## are opened on disk. option extraction_prefix = "contents"; - + ## If this variable is set to ``T``, then all contents of all ## connections will be extracted. option default_extract = F; @@ -38,7 +38,7 @@ event connection_established(c: connection) &priority=-5 local orig_f = open(orig_file); set_contents_file(c$id, CONTENTS_ORIG, orig_f); } - + if ( c$extract_resp ) { local resp_file = generate_extraction_filename(extraction_prefix, c, "resp.dat"); diff --git a/scripts/base/protocols/conn/inactivity.zeek b/scripts/base/protocols/conn/inactivity.zeek index b438a05b61..0d63240407 100644 --- a/scripts/base/protocols/conn/inactivity.zeek +++ b/scripts/base/protocols/conn/inactivity.zeek @@ -6,19 +6,19 @@ module Conn; export { ## Define inactivity timeouts by the service detected being used over ## the connection. - option analyzer_inactivity_timeouts: table[Analyzer::Tag] of interval = { + option analyzer_inactivity_timeouts: table[AllAnalyzers::Tag] of interval = { # For interactive services, allow longer periods of inactivity. [[Analyzer::ANALYZER_SSH, Analyzer::ANALYZER_FTP]] = 1 hrs, }; - + ## Define inactivity timeouts based on common protocol ports. option port_inactivity_timeouts: table[port] of interval = { [[21/tcp, 22/tcp, 23/tcp, 513/tcp]] = 1 hrs, }; - + } - -event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) + +event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) { if ( atype in analyzer_inactivity_timeouts ) set_inactivity_timeout(c$id, analyzer_inactivity_timeouts[atype]); diff --git a/scripts/base/protocols/dce-rpc/main.zeek b/scripts/base/protocols/dce-rpc/main.zeek index 2f69b33a7f..6092029e59 100644 --- a/scripts/base/protocols/dce-rpc/main.zeek +++ b/scripts/base/protocols/dce-rpc/main.zeek @@ -17,7 +17,7 @@ export { ## The connection's 4-tuple of endpoint addresses/ports. id : conn_id &log; ## Round trip time from the request to the response. - ## If either the request or response wasn't seen, + ## If either the request or response wasn't seen, ## this will be null. rtt : interval &log &optional; diff --git a/scripts/base/protocols/dhcp/main.zeek b/scripts/base/protocols/dhcp/main.zeek index b7a8f15649..d6a6807540 100644 --- a/scripts/base/protocols/dhcp/main.zeek +++ b/scripts/base/protocols/dhcp/main.zeek @@ -78,7 +78,7 @@ export { ## The DHCP message types seen by this DHCP transaction msg_types: vector of string &log &default=string_vec(); - ## Duration of the DHCP "session" representing the + ## Duration of the DHCP "session" representing the ## time from the first message to the last. duration: interval &log &default=0secs; diff --git a/scripts/base/protocols/dns/consts.zeek b/scripts/base/protocols/dns/consts.zeek index a1e0f53a0b..cdce6d0231 100644 --- a/scripts/base/protocols/dns/consts.zeek +++ b/scripts/base/protocols/dns/consts.zeek @@ -172,4 +172,15 @@ export { [4] = "SHA384", } &default = function(n: count): string { return fmt("digest-%d", n); }; + ## SVCB/HTTPS SvcParam keys, as defined in + ## https://www.ietf.org/archive/id/draft-ietf-dnsop-svcb-https-07.txt, sec 14.3.2 + const svcparam_keys = { + [0] = "mandatory", + [1] = "alpn", + [2] = "no-default-alpn", + [3] = "port", + [4] = "ipv4hint", + [5] = "ech", + [6] = "ipv6hint", + } &default = function(n: count): string { return fmt("key-%d", n); }; } diff --git a/scripts/base/protocols/dns/main.zeek b/scripts/base/protocols/dns/main.zeek index 85c90efadc..fa22cdba13 100644 --- a/scripts/base/protocols/dns/main.zeek +++ b/scripts/base/protocols/dns/main.zeek @@ -375,7 +375,7 @@ hook DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) if ( ! c$dns?$rtt ) { c$dns$rtt = network_time() - c$dns$ts; - # This could mean that only a reply was seen since + # This could mean that only a reply was seen since # we assume there must be some passage of time between # request and response. if ( c$dns$rtt == 0secs ) @@ -547,9 +547,9 @@ event dns_SRV_reply(c: connection, msg: dns_msg, ans: dns_answer, target: string # # } # event dns_EDNS_ecs(c: connection, msg: dns_msg, opt: dns_edns_ecs) -# { -# -# } +# { +# +# } # #event dns_TSIG_addl(c: connection, msg: dns_msg, ans: dns_tsig_additional) # { diff --git a/scripts/base/protocols/ftp/files.zeek b/scripts/base/protocols/ftp/files.zeek index f2c2625bdb..e7a200f927 100644 --- a/scripts/base/protocols/ftp/files.zeek +++ b/scripts/base/protocols/ftp/files.zeek @@ -18,14 +18,14 @@ export { ## Describe the file being transferred. global describe_file: function(f: fa_file): string; - redef record fa_file += { + redef record fa_file += { ftp: FTP::Info &optional; }; } function get_file_handle(c: connection, is_orig: bool): string { - if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) + if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return ""; return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig); @@ -54,7 +54,7 @@ event zeek_init() &priority=5 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { - if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) + if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected ) return; local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p]; diff --git a/scripts/base/protocols/ftp/utils-commands.zeek b/scripts/base/protocols/ftp/utils-commands.zeek index 43519b8422..5b75bd5032 100644 --- a/scripts/base/protocols/ftp/utils-commands.zeek +++ b/scripts/base/protocols/ftp/utils-commands.zeek @@ -11,12 +11,12 @@ export { ## Counter to track how many commands have been executed. seq: count &default=0; }; - + ## Structure for tracking pending commands in the event that the client - ## sends a large number of commands before the server has a chance to + ## sends a large number of commands before the server has a chance to ## reply. type PendingCmds: table[count] of CmdArg; - + ## Possible response codes for a wide variety of FTP commands. option cmd_reply_code: set[string, count] = { # According to RFC 959 @@ -65,7 +65,7 @@ export { ["MDTM", [213, 500, 501, 550]], # RFC3659 ["MLST", [150, 226, 250, 500, 501, 550]], # RFC3659 ["MLSD", [150, 226, 250, 500, 501, 550]], # RFC3659 - + ["CLNT", [200, 500]], # No RFC (indicate client software) ["MACB", [200, 500, 550]], # No RFC (test for MacBinary support) @@ -79,11 +79,11 @@ function add_pending_cmd(pc: PendingCmds, cmd: string, arg: string): CmdArg { local ca = [$cmd = cmd, $arg = arg, $seq=|pc|+1, $ts=network_time()]; pc[ca$seq] = ca; - + return ca; } -# Determine which is the best command to match with based on the +# Determine which is the best command to match with based on the # response code and message. function get_pending_cmd(pc: PendingCmds, reply_code: count, reply_msg: string): CmdArg { @@ -94,18 +94,18 @@ function get_pending_cmd(pc: PendingCmds, reply_code: count, reply_msg: string): for ( cmd_seq, cmd in pc ) { local score: int = 0; - + # if the command is compatible with the reply code # code 500 (syntax error) is compatible with all commands if ( reply_code == 500 || [cmd$cmd, reply_code] in cmd_reply_code ) score = score + 100; - + # if the command or the command arg appears in the reply message if ( strstr(reply_msg, cmd$cmd) > 0 ) score = score + 20; if ( strstr(reply_msg, cmd$arg) > 0 ) score = score + 10; - + if ( score > best_score || ( score == best_score && best_seq > cmd_seq ) ) # break tie with sequence number { @@ -132,7 +132,7 @@ function remove_pending_cmd(pc: PendingCmds, ca: CmdArg): bool else return F; } - + function pop_pending_cmd(pc: PendingCmds, reply_code: count, reply_msg: string): CmdArg { local ca = get_pending_cmd(pc, reply_code, reply_msg); diff --git a/scripts/base/protocols/http/entities.zeek b/scripts/base/protocols/http/entities.zeek index 0a72c6b76e..b0689c5478 100644 --- a/scripts/base/protocols/http/entities.zeek +++ b/scripts/base/protocols/http/entities.zeek @@ -97,7 +97,7 @@ event http_header(c: connection, is_orig: bool, name: string, value: string) &pr event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { - if ( f$source == "HTTP" && c?$http ) + if ( f$source == "HTTP" && c?$http ) { f$http = c$http; @@ -199,6 +199,6 @@ event file_sniff(f: fa_file, meta: fa_metadata) &priority=5 event http_end_entity(c: connection, is_orig: bool) &priority=5 { - if ( c?$http && c$http?$current_entity ) + if ( c?$http && c$http?$current_entity ) delete c$http$current_entity; } diff --git a/scripts/base/protocols/http/utils.zeek b/scripts/base/protocols/http/utils.zeek index dcdbe4bc8e..5f4da1b77c 100644 --- a/scripts/base/protocols/http/utils.zeek +++ b/scripts/base/protocols/http/utils.zeek @@ -16,7 +16,7 @@ export { ## ## Returns: A vector of strings containing the keys. global extract_keys: function(data: string, kv_splitter: pattern): string_vec; - + ## Creates a URL from an :zeek:type:`HTTP::Info` record. This should ## handle edge cases such as proxied requests appropriately. ## @@ -24,7 +24,7 @@ export { ## ## Returns: A URL, not prefixed by ``"http://"``. global build_url: function(rec: Info): string; - + ## Creates a URL from an :zeek:type:`HTTP::Info` record. This should ## handle edge cases such as proxied requests appropriately. ## @@ -41,7 +41,7 @@ export { function extract_keys(data: string, kv_splitter: pattern): string_vec { local key_vec: vector of string = vector(); - + local parts = split_string(data, kv_splitter); for ( part_index in parts ) { @@ -64,7 +64,7 @@ function build_url(rec: Info): string host = fmt("%s:%d", host, resp_p); return fmt("%s%s", host, uri); } - + function build_url_http(rec: Info): string { return fmt("http://%s", build_url(rec)); diff --git a/scripts/base/protocols/irc/files.zeek b/scripts/base/protocols/irc/files.zeek index 59b178f4df..33128f57a6 100644 --- a/scripts/base/protocols/irc/files.zeek +++ b/scripts/base/protocols/irc/files.zeek @@ -31,7 +31,7 @@ event zeek_init() &priority=5 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5 { - if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) + if ( [c$id$resp_h, c$id$resp_p] !in dcc_expected_transfers ) return; local irc = dcc_expected_transfers[c$id$resp_h, c$id$resp_p]; diff --git a/scripts/base/protocols/irc/main.zeek b/scripts/base/protocols/irc/main.zeek index ae15098282..de4d2296ea 100644 --- a/scripts/base/protocols/irc/main.zeek +++ b/scripts/base/protocols/irc/main.zeek @@ -1,11 +1,11 @@ ##! Implements the core IRC analysis support. The logging model is to log -##! IRC commands along with the associated response and some additional +##! IRC commands along with the associated response and some additional ##! metadata about the connection if it's available. module IRC; export { - + redef enum Log::ID += { LOG }; global log_policy: Log::PolicyHook; @@ -21,7 +21,7 @@ export { nick: string &log &optional; ## Username given for the connection. user: string &log &optional; - + ## Command given by the client. command: string &log &optional; ## Value for the command given by the client. @@ -29,8 +29,8 @@ export { ## Any additional data for the command. addl: string &log &optional; }; - - ## Event that can be handled to access the IRC record as it is sent on + + ## Event that can be handled to access the IRC record as it is sent on ## to the logging framework. global irc_log: event(rec: Info); } @@ -48,7 +48,7 @@ event zeek_init() &priority=5 Log::create_stream(IRC::LOG, [$columns=Info, $ev=irc_log, $path="irc", $policy=log_policy]); Analyzer::register_for_ports(Analyzer::ANALYZER_IRC, ports); } - + function new_session(c: connection): Info { local info: Info; @@ -57,12 +57,12 @@ function new_session(c: connection): Info info$id = c$id; return info; } - + function set_session(c: connection) { if ( ! c?$irc ) c$irc = new_session(c); - + c$irc$ts=network_time(); } diff --git a/scripts/base/protocols/krb/main.zeek b/scripts/base/protocols/krb/main.zeek index d8fbbd12fc..16cdfac6f1 100644 --- a/scripts/base/protocols/krb/main.zeek +++ b/scripts/base/protocols/krb/main.zeek @@ -95,7 +95,7 @@ function set_session(c: connection): bool $id = c$id); Conn::register_removal_hook(c, finalize_krb); } - + return c$krb$logged; } @@ -115,7 +115,7 @@ event krb_error(c: connection, msg: Error_Msg) &priority=5 if ( msg?$error_text && msg$error_text in ignored_errors ) { - if ( c?$krb ) + if ( c?$krb ) delete c$krb; return; @@ -174,7 +174,7 @@ event krb_as_response(c: connection, msg: KDC_Response) &priority=5 if ( ! c$krb?$client && ( msg?$client_name || msg?$client_realm ) ) { - c$krb$client = fmt("%s/%s", msg?$client_name ? msg$client_name : "", + c$krb$client = fmt("%s/%s", msg?$client_name ? msg$client_name : "", msg?$client_realm ? msg$client_realm : ""); } @@ -202,7 +202,7 @@ event krb_tgs_request(c: connection, msg: KDC_Request) &priority=5 c$krb$request_type = "TGS"; if ( msg?$service_name ) c$krb$service = msg$service_name; - if ( msg?$from ) + if ( msg?$from ) c$krb$from = msg$from; if ( msg?$till ) c$krb$till = msg$till; @@ -221,7 +221,7 @@ event krb_tgs_response(c: connection, msg: KDC_Response) &priority=5 if ( ! c$krb?$client && ( msg?$client_name || msg?$client_realm ) ) { - c$krb$client = fmt("%s/%s", msg?$client_name ? msg$client_name : "", + c$krb$client = fmt("%s/%s", msg?$client_name ? msg$client_name : "", msg?$client_realm ? msg$client_realm : ""); } diff --git a/scripts/base/protocols/ntlm/main.zeek b/scripts/base/protocols/ntlm/main.zeek index 1aca98791e..17d8b85486 100644 --- a/scripts/base/protocols/ntlm/main.zeek +++ b/scripts/base/protocols/ntlm/main.zeek @@ -33,7 +33,7 @@ export { ## Indicate whether or not the authentication was successful. success : bool &log &optional; - ## Internally used field to indicate if the login attempt + ## Internally used field to indicate if the login attempt ## has already been logged. done: bool &default=F; }; diff --git a/scripts/base/protocols/radius/main.zeek b/scripts/base/protocols/radius/main.zeek index f5c1f2cbc3..dc692cac2f 100644 --- a/scripts/base/protocols/radius/main.zeek +++ b/scripts/base/protocols/radius/main.zeek @@ -24,7 +24,7 @@ export { mac : string &log &optional; ## The address given to the network access server, if ## present. This is only a hint from the RADIUS server - ## and the network access server is not required to honor + ## and the network access server is not required to honor ## the address. framed_addr : addr &log &optional; ## Address (IPv4, IPv6, or FQDN) of the initiator end of the tunnel, @@ -33,7 +33,7 @@ export { tunnel_client: string &log &optional; ## Connect info, if present. connect_info : string &log &optional; - ## Reply message from the server challenge. This is + ## Reply message from the server challenge. This is ## frequently shown to the user authenticating. reply_msg : string &log &optional; ## Successful or failed authentication. diff --git a/scripts/base/protocols/rdp/main.zeek b/scripts/base/protocols/rdp/main.zeek index 51c94d326a..1dd8701ef7 100644 --- a/scripts/base/protocols/rdp/main.zeek +++ b/scripts/base/protocols/rdp/main.zeek @@ -41,15 +41,15 @@ export { desktop_width: count &log &optional; ## Desktop height of the client machine. desktop_height: count &log &optional; - ## The color depth requested by the client in + ## The color depth requested by the client in ## the high_color_depth field. requested_color_depth: string &log &optional; ## If the connection is being encrypted with native - ## RDP encryption, this is the type of cert + ## RDP encryption, this is the type of cert ## being used. cert_type: string &log &optional; - ## The number of certs seen. X.509 can transfer an + ## The number of certs seen. X.509 can transfer an ## entire certificate chain. cert_count: count &log &default=0; ## Indicates if the provided certificate or certificate @@ -57,7 +57,7 @@ export { cert_permanent: bool &log &optional; ## Encryption level of the connection. encryption_level: string &log &optional; - ## Encryption method of the connection. + ## Encryption method of the connection. encryption_method: string &log &optional; }; @@ -65,7 +65,7 @@ export { ## continuing to process encrypted traffic. option disable_analyzer_after_detection = F; - ## The amount of time to monitor an RDP session from when it is first + ## The amount of time to monitor an RDP session from when it is first ## identified. When this interval is reached, the session is logged. option rdp_check_interval = 10secs; @@ -113,7 +113,7 @@ function write_log(c: connection) info$done = T; # Verify that the RDP session contains - # RDP data before writing it to the log. + # RDP data before writing it to the log. if ( info?$cookie || info?$keyboard_layout || info?$result ) Log::write(RDP::LOG, info); } @@ -124,16 +124,16 @@ event check_record(c: connection) if ( c$rdp$done ) return; - # If the value rdp_check_interval has passed since the - # RDP session was started, then log the record. + # If the value rdp_check_interval has passed since the + # RDP session was started, then log the record. local diff = network_time() - c$rdp$ts; if ( diff > rdp_check_interval ) { write_log(c); # Remove the analyzer if it is still attached. - if ( disable_analyzer_after_detection && - connection_exists(c$id) && + if ( disable_analyzer_after_detection && + connection_exists(c$id) && c$rdp?$analyzer_id ) { disable_analyzer(c$id, c$rdp$analyzer_id); @@ -240,7 +240,7 @@ event rdp_server_certificate(c: connection, cert_type: count, permanently_issued # now so we manually count this one. if ( c$rdp$cert_type == "RSA" ) ++c$rdp$cert_count; - + c$rdp$cert_permanent = permanently_issued; } @@ -265,7 +265,7 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori } } -event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=5 +event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=5 { if ( atype == Analyzer::ANALYZER_RDP ) { @@ -274,7 +274,7 @@ event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &pr } } -event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason: string) &priority=5 +event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, reason: string) &priority=5 { # If a protocol violation occurs, then log the record immediately. if ( c?$rdp ) diff --git a/scripts/base/protocols/smb/consts.zeek b/scripts/base/protocols/smb/consts.zeek index 32a03dd17d..9b68419baa 100644 --- a/scripts/base/protocols/smb/consts.zeek +++ b/scripts/base/protocols/smb/consts.zeek @@ -107,13 +107,13 @@ export { } &redef &default=function(i: count):string { return fmt("unknown-wksta-command-%d", i); }; type rpc_cmd_table: table[count] of string; - + ## The subcommands for RPC endpoints. const rpc_sub_cmds: table[string] of rpc_cmd_table = { ["4b324fc8-1670-01d3-1278-5a47bf6ee188"] = srv_cmds, - ["6bffd098-a112-3610-9833-46c3f87e345a"] = wksta_cmds, + ["6bffd098-a112-3610-9833-46c3f87e345a"] = wksta_cmds, } &redef &default=function(i: string):rpc_cmd_table { return table() &default=function(j: string):string { return fmt("unknown-uuid-%s", j); }; }; - + } module SMB1; @@ -195,37 +195,37 @@ export { } &default=function(i: count):string { return fmt("unknown-%d", i); }; const trans2_sub_commands: table[count] of string = { - [0x00] = "OPEN2", - [0x01] = "FIND_FIRST2", - [0x02] = "FIND_NEXT2", - [0x03] = "QUERY_FS_INFORMATION", - [0x04] = "SET_FS_INFORMATION", - [0x05] = "QUERY_PATH_INFORMATION", - [0x06] = "SET_PATH_INFORMATION", - [0x07] = "QUERY_FILE_INFORMATION", - [0x08] = "SET_FILE_INFORMATION", - [0x09] = "FSCTL", - [0x0A] = "IOCTL", - [0x0B] = "FIND_NOTIFY_FIRST", - [0x0C] = "FIND_NOTIFY_NEXT", - [0x0D] = "CREATE_DIRECTORY", - [0x0E] = "SESSION_SETUP", - [0x10] = "GET_DFS_REFERRAL", - [0x11] = "REPORT_DFS_INCONSISTENCY", + [0x00] = "OPEN2", + [0x01] = "FIND_FIRST2", + [0x02] = "FIND_NEXT2", + [0x03] = "QUERY_FS_INFORMATION", + [0x04] = "SET_FS_INFORMATION", + [0x05] = "QUERY_PATH_INFORMATION", + [0x06] = "SET_PATH_INFORMATION", + [0x07] = "QUERY_FILE_INFORMATION", + [0x08] = "SET_FILE_INFORMATION", + [0x09] = "FSCTL", + [0x0A] = "IOCTL", + [0x0B] = "FIND_NOTIFY_FIRST", + [0x0C] = "FIND_NOTIFY_NEXT", + [0x0D] = "CREATE_DIRECTORY", + [0x0E] = "SESSION_SETUP", + [0x10] = "GET_DFS_REFERRAL", + [0x11] = "REPORT_DFS_INCONSISTENCY", } &default=function(i: count):string { return fmt("unknown-trans2-sub-cmd-%d", i); }; const trans_sub_commands: table[count] of string = { - [0x01] = "SET_NMPIPE_STATE", - [0x11] = "RAW_READ_NMPIPE", - [0x21] = "QUERY_NMPIPE_STATE", - [0x22] = "QUERY_NMPIPE_INFO", - [0x23] = "PEEK_NMPIPE", - [0x26] = "TRANSACT_NMPIPE", - [0x31] = "RAW_WRITE_NMPIPE", - [0x36] = "READ_NMPIPE", - [0x37] = "WRITE_NMPIPE", - [0x53] = "WAIT_NMPIPE", - [0x54] = "CALL_NMPIPE", + [0x01] = "SET_NMPIPE_STATE", + [0x11] = "RAW_READ_NMPIPE", + [0x21] = "QUERY_NMPIPE_STATE", + [0x22] = "QUERY_NMPIPE_INFO", + [0x23] = "PEEK_NMPIPE", + [0x26] = "TRANSACT_NMPIPE", + [0x31] = "RAW_WRITE_NMPIPE", + [0x36] = "READ_NMPIPE", + [0x37] = "WRITE_NMPIPE", + [0x53] = "WAIT_NMPIPE", + [0x54] = "CALL_NMPIPE", } &default=function(i: count):string { return fmt("unknown-trans-sub-cmd-%d", i); }; } diff --git a/scripts/base/protocols/smb/files.zeek b/scripts/base/protocols/smb/files.zeek index e3b387b771..a47874d480 100644 --- a/scripts/base/protocols/smb/files.zeek +++ b/scripts/base/protocols/smb/files.zeek @@ -14,7 +14,7 @@ export { function get_file_handle(c: connection, is_orig: bool): string { if ( ! (c$smb_state?$current_file && - (c$smb_state$current_file?$name || + (c$smb_state$current_file?$name || c$smb_state$current_file?$path)) ) { # TODO - figure out what are the cases where this happens. diff --git a/scripts/base/protocols/smb/main.zeek b/scripts/base/protocols/smb/main.zeek index cfccde16ac..703a76903d 100644 --- a/scripts/base/protocols/smb/main.zeek +++ b/scripts/base/protocols/smb/main.zeek @@ -5,7 +5,7 @@ module SMB; export { - redef enum Log::ID += { + redef enum Log::ID += { AUTH_LOG, MAPPING_LOG, FILES_LOG @@ -13,7 +13,7 @@ export { global log_policy_files: Log::PolicyHook; global log_policy_mapping: Log::PolicyHook; - + ## Abstracted actions for SMB file actions. type Action: enum { FILE_READ, @@ -55,7 +55,7 @@ export { id : conn_id &log; ## Unique ID of the file. fuid : string &log &optional; - + ## Action this log record represents. action : Action &log &optional; ## Path pulled from the tree this file was transferred to or from. @@ -99,14 +99,14 @@ export { uid : string &log; ## ID of the connection the request was sent over. id : conn_id &log; - + ## The command sent by the client. command : string &log; ## The subcommand sent by the client, if present. sub_command : string &log &optional; ## Command argument sent by the client, if any. argument : string &log &optional; - + ## Server reply to the client's command. status : string &log &optional; ## Round trip time from the request to the response. @@ -116,13 +116,13 @@ export { ## Authenticated username, if available. username : string &log &optional; - + ## If this is related to a tree, this is the tree ## that was used for the current command. tree : string &log &optional; ## The type of tree (disk share, printer share, named pipe, etc.). tree_service : string &log &optional; - + ## If the command referenced a file, store it here. referenced_file : FileInfo &log &optional; ## If the command referenced a tree, store it here. @@ -138,7 +138,7 @@ export { current_file : FileInfo &optional; ## A reference to the current tree. current_tree : TreeInfo &optional; - + ## Indexed on MID to map responses to requests. pending_cmds : table[count] of CmdInfo &optional; ## File map to retrieve file information based on the file ID. @@ -161,7 +161,7 @@ export { redef record connection += { smb_state : State &optional; }; - + ## This is an internally used function. const set_current_file: function(smb_state: State, file_id: count) &redef; @@ -195,7 +195,7 @@ function set_current_file(smb_state: State, file_id: count) smb_state$fid_map[file_id] = smb_state$current_cmd$referenced_file; smb_state$fid_map[file_id]$fid = file_id; } - + smb_state$current_cmd$referenced_file = smb_state$fid_map[file_id]; smb_state$current_file = smb_state$current_cmd$referenced_file; } @@ -203,7 +203,7 @@ function set_current_file(smb_state: State, file_id: count) function write_file_log(state: State) { local f = state$current_file; - if ( f?$name && + if ( f?$name && f$action in logged_file_actions ) { # Everything in this if statement is to avoid overlogging @@ -225,7 +225,7 @@ function write_file_log(state: State) else add state$recent_files[file_ident]; } - + Log::write(FILES_LOG, f); } } @@ -240,7 +240,7 @@ event file_state_remove(f: fa_file) &priority=-5 { if ( f$source != "SMB" ) return; - + for ( id, c in f$conns ) { if ( c?$smb_state && c$smb_state?$current_file) diff --git a/scripts/base/protocols/smb/smb1-main.zeek b/scripts/base/protocols/smb/smb1-main.zeek index 9dabdd1c36..a6bfcc8035 100644 --- a/scripts/base/protocols/smb/smb1-main.zeek +++ b/scripts/base/protocols/smb/smb1-main.zeek @@ -39,12 +39,12 @@ event smb1_message(c: connection, hdr: SMB1::Header, is_orig: bool) &priority=5 { smb_state$current_cmd$tree = smb_state$current_tree$path; } - + if ( smb_state$current_tree?$service ) { smb_state$current_cmd$tree_service = smb_state$current_tree$service; } - + if ( mid !in smb_state$pending_cmds ) { local tmp_cmd = SMB::CmdInfo($uid=c$uid, $id=c$id, $version="SMB1", $command = SMB1::commands[hdr$command]); @@ -52,10 +52,10 @@ event smb1_message(c: connection, hdr: SMB1::Header, is_orig: bool) &priority=5 local tmp_file = SMB::FileInfo($uid=c$uid, $id=c$id); tmp_cmd$referenced_file = tmp_file; tmp_cmd$referenced_tree = smb_state$current_tree; - + smb_state$pending_cmds[mid] = tmp_cmd; } - + smb_state$current_cmd = smb_state$pending_cmds[mid]; if ( !is_orig ) @@ -97,11 +97,11 @@ event smb1_negotiate_response(c: connection, hdr: SMB1::Header, response: SMB1:: delete c$smb_state$current_cmd$smb1_offered_dialects; } } - + event smb1_negotiate_response(c: connection, hdr: SMB1::Header, response: SMB1::NegotiateResponse) &priority=-5 { } - + event smb1_tree_connect_andx_request(c: connection, hdr: SMB1::Header, path: string, service: string) &priority=5 { local tmp_tree = SMB::TreeInfo($uid=c$uid, $id=c$id, $path=path, $service=service); @@ -117,7 +117,7 @@ event smb1_tree_connect_andx_response(c: connection, hdr: SMB1::Header, service: c$smb_state$current_cmd$referenced_tree$share_type = "PIPE"; c$smb_state$current_cmd$tree_service = service; - + if ( native_file_system != "" ) c$smb_state$current_cmd$referenced_tree$native_file_system = native_file_system; @@ -150,13 +150,13 @@ event smb1_nt_create_andx_response(c: connection, hdr: SMB1::Header, file_id: co # I'm seeing negative data from IPC tree transfers if ( time_to_double(times$modified) > 0.0 ) c$smb_state$current_cmd$referenced_file$times = times; - - # We can identify the file by its file id now so let's stick it + + # We can identify the file by its file id now so let's stick it # in the file map. c$smb_state$fid_map[file_id] = c$smb_state$current_cmd$referenced_file; - + c$smb_state$current_file = c$smb_state$fid_map[file_id]; - + SMB::write_file_log(c$smb_state); } @@ -167,7 +167,7 @@ event smb1_read_andx_request(c: connection, hdr: SMB1::Header, file_id: count, o if ( c$smb_state$current_file?$name ) c$smb_state$current_cmd$argument = c$smb_state$current_file$name; } - + event smb1_read_andx_request(c: connection, hdr: SMB1::Header, file_id: count, offset: count, length: count) &priority=-5 { if ( c$smb_state$current_tree?$path && !c$smb_state$current_file?$path ) @@ -180,12 +180,12 @@ event smb1_write_andx_request(c: connection, hdr: SMB1::Header, file_id: count, { SMB::set_current_file(c$smb_state, file_id); c$smb_state$current_file$action = SMB::FILE_WRITE; - if ( !c$smb_state$current_cmd?$argument && + if ( !c$smb_state$current_cmd?$argument && # TODO: figure out why name isn't getting set sometimes. c$smb_state$current_file?$name ) c$smb_state$current_cmd$argument = c$smb_state$current_file$name; } - + event smb1_write_andx_request(c: connection, hdr: SMB1::Header, file_id: count, offset: count, data_len: count) &priority=-5 { if ( c$smb_state$current_tree?$path && !c$smb_state$current_file?$path ) @@ -217,7 +217,7 @@ event smb1_close_request(c: connection, hdr: SMB1::Header, file_id: count) &prio if ( fl?$name ) c$smb_state$current_cmd$argument = fl$name; - + delete c$smb_state$fid_map[file_id]; SMB::write_file_log(c$smb_state); @@ -254,7 +254,7 @@ event smb1_session_setup_andx_response(c: connection, hdr: SMB1::Header, respons { # No behavior yet. } - + event smb1_transaction_request(c: connection, hdr: SMB1::Header, name: string, sub_cmd: count, parameters: string, data: string) { c$smb_state$current_cmd$sub_command = SMB1::trans_sub_commands[sub_cmd]; @@ -267,7 +267,7 @@ event smb1_write_andx_request(c: connection, hdr: SMB1::Header, file_id: count, # TODO: figure out why the uuid isn't getting set sometimes. return; } - + c$smb_state$pipe_map[file_id] = c$smb_state$current_file$uuid; } @@ -278,11 +278,11 @@ event smb_pipe_bind_ack_response(c: connection, hdr: SMB1::Header) # TODO: figure out why the uuid isn't getting set sometimes. return; } - + c$smb_state$current_cmd$sub_command = "RPC_BIND_ACK"; c$smb_state$current_cmd$argument = SMB::rpc_uuids[c$smb_state$current_file$uuid]; } - + event smb_pipe_bind_request(c: connection, hdr: SMB1::Header, uuid: string, version: string) { if ( ! c$smb_state?$current_file || ! c$smb_state$current_file?$uuid ) diff --git a/scripts/base/protocols/smb/smb2-main.zeek b/scripts/base/protocols/smb/smb2-main.zeek index 59436a2c8c..c45a56a799 100644 --- a/scripts/base/protocols/smb/smb2-main.zeek +++ b/scripts/base/protocols/smb/smb2-main.zeek @@ -19,7 +19,7 @@ event smb2_message(c: connection, hdr: SMB2::Header, is_orig: bool) &priority=5 state$pipe_map = table(); c$smb_state = state; } - + local smb_state = c$smb_state; local tid = hdr$tree_id; local mid = hdr$message_id; @@ -159,10 +159,10 @@ event smb2_create_response(c: connection, hdr: SMB2::Header, response: SMB2::Cre if ( time_to_double(response$times$modified) > 0.0 ) c$smb_state$current_file$times = response$times; - # We can identify the file by its file id now so let's stick it + # We can identify the file by its file id now so let's stick it # in the file map. c$smb_state$fid_map[response$file_id$persistent+response$file_id$volatile] = c$smb_state$current_file; - + c$smb_state$current_file = c$smb_state$fid_map[response$file_id$persistent+response$file_id$volatile]; } @@ -193,7 +193,7 @@ event smb2_read_request(c: connection, hdr: SMB2::Header, file_id: SMB2::GUID, o } event smb2_read_request(c: connection, hdr: SMB2::Header, file_id: SMB2::GUID, offset: count, length: count) &priority=-5 - { + { SMB::write_file_log(c$smb_state); } @@ -249,7 +249,7 @@ event smb2_file_rename(c: connection, hdr: SMB2::Header, file_id: SMB2::GUID, ds if ( c$smb_state$current_file?$name ) c$smb_state$current_file$prev_name = c$smb_state$current_file$name; - + c$smb_state$current_file$name = dst_filename; switch ( c$smb_state$current_tree$share_type ) diff --git a/scripts/base/protocols/ssh/main.zeek b/scripts/base/protocols/ssh/main.zeek index 1dbc1bcfcc..1dcbe80328 100644 --- a/scripts/base/protocols/ssh/main.zeek +++ b/scripts/base/protocols/ssh/main.zeek @@ -355,7 +355,7 @@ event ssh_server_host_key(c: connection, hash: string) &priority=5 c$ssh$host_key = hash; } -event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=20 +event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=20 { if ( atype == Analyzer::ANALYZER_SSH ) { diff --git a/scripts/base/protocols/ssl/main.zeek b/scripts/base/protocols/ssl/main.zeek index e3944a0f1e..37a60a1aff 100644 --- a/scripts/base/protocols/ssl/main.zeek +++ b/scripts/base/protocols/ssl/main.zeek @@ -474,7 +474,7 @@ hook finalize_ssl(c: connection) finish(c, F); } -event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=5 +event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=5 { if ( atype == Analyzer::ANALYZER_SSL || atype == Analyzer::ANALYZER_DTLS ) { @@ -494,7 +494,7 @@ event ssl_plaintext_data(c: connection, is_orig: bool, record_version: count, co Weird::weird(wi); } -event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, +event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, reason: string) &priority=5 { if ( c?$ssl && ( atype == Analyzer::ANALYZER_SSL || atype == Analyzer::ANALYZER_DTLS ) ) diff --git a/scripts/base/protocols/syslog/consts.zeek b/scripts/base/protocols/syslog/consts.zeek index c68cbda658..9ec7c4ce12 100644 --- a/scripts/base/protocols/syslog/consts.zeek +++ b/scripts/base/protocols/syslog/consts.zeek @@ -31,7 +31,7 @@ export { [23] = "LOCAL7", [999] = "UNSPECIFIED", } &default=function(c: count): string { return fmt("?-%d", c); }; - + ## Mapping between the constants and string values for syslog severities. const severity_codes: table[count] of string = { [0] = "EMERG", diff --git a/scripts/base/protocols/syslog/main.zeek b/scripts/base/protocols/syslog/main.zeek index 612507c9c1..7a789a9400 100644 --- a/scripts/base/protocols/syslog/main.zeek +++ b/scripts/base/protocols/syslog/main.zeek @@ -1,4 +1,4 @@ -##! Core script support for logging syslog messages. This script represents +##! Core script support for logging syslog messages. This script represents ##! one syslog message as one logged record. @load ./consts @@ -52,7 +52,7 @@ event syslog_message(c: connection, facility: count, severity: count, msg: strin info$facility=facility_codes[facility]; info$severity=severity_codes[severity]; info$message=msg; - + c$syslog = info; } diff --git a/scripts/base/protocols/tunnels/dpd.sig b/scripts/base/protocols/tunnels/dpd.sig index 9c4bddeffd..d4d3b533bc 100644 --- a/scripts/base/protocols/tunnels/dpd.sig +++ b/scripts/base/protocols/tunnels/dpd.sig @@ -1,14 +1,2 @@ # Provide DPD signatures for tunneling protocols that otherwise # wouldn't be detected at all. - -signature dpd_ayiya { - ip-proto = udp - payload /^..\x11\x29/ - enable "ayiya" -} - -signature dpd_teredo { - ip-proto = udp - payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f].{7}((\x20\x01\x00\x00)).{28})|([\x60-\x6f].{23}((\x20\x01\x00\x00))).{12}/ - enable "teredo" -} diff --git a/scripts/base/utils/conn-ids.zeek b/scripts/base/utils/conn-ids.zeek index b5d7fffd77..5222d1ce82 100644 --- a/scripts/base/utils/conn-ids.zeek +++ b/scripts/base/utils/conn-ids.zeek @@ -3,16 +3,16 @@ module GLOBAL; export { - ## Takes a conn_id record and returns a string representation with the + ## Takes a conn_id record and returns a string representation with the ## general data flow appearing to be from the connection originator ## on the left to the responder on the right. global id_string: function(id: conn_id): string; - - ## Takes a conn_id record and returns a string representation with the + + ## Takes a conn_id record and returns a string representation with the ## general data flow appearing to be from the connection responder ## on the right to the originator on the left. global reverse_id_string: function(id: conn_id): string; - + ## Calls :zeek:id:`id_string` or :zeek:id:`reverse_id_string` if the ## second argument is T or F, respectively. global directed_id_string: function(id: conn_id, is_orig: bool): string; diff --git a/scripts/base/utils/directions-and-hosts.zeek b/scripts/base/utils/directions-and-hosts.zeek index 442b4d8454..70f1e9aa2e 100644 --- a/scripts/base/utils/directions-and-hosts.zeek +++ b/scripts/base/utils/directions-and-hosts.zeek @@ -58,7 +58,7 @@ type Host: enum { function addr_matches_host(ip: addr, h: Host): bool { if ( h == NO_HOSTS ) return F; - + return ( h == ALL_HOSTS || (h == LOCAL_HOSTS && Site::is_local_addr(ip)) || (h == REMOTE_HOSTS && !Site::is_local_addr(ip)) ); diff --git a/scripts/base/utils/numbers.zeek b/scripts/base/utils/numbers.zeek index d2adb49ea2..41b8e601bb 100644 --- a/scripts/base/utils/numbers.zeek +++ b/scripts/base/utils/numbers.zeek @@ -1,8 +1,7 @@ - ## Extract an integer from a string. -## +## ## s: The string to search for a number. -## +## ## get_first: Provide `F` if you would like the last number found. ## ## Returns: The request integer from the given string or 0 if diff --git a/scripts/base/utils/patterns.zeek b/scripts/base/utils/patterns.zeek index c52556e86a..0fb7e0b72a 100644 --- a/scripts/base/utils/patterns.zeek +++ b/scripts/base/utils/patterns.zeek @@ -27,7 +27,7 @@ function set_to_regex(ss: set[string], pat: string): pattern for ( s in ss ) { local tmp_pattern = convert_for_pattern(s); - return_pat = ( i == 0 ) ? + return_pat = ( i == 0 ) ? tmp_pattern : cat(tmp_pattern, "|", return_pat); ++i; } diff --git a/scripts/base/utils/strings.zeek b/scripts/base/utils/strings.zeek index 4fa002acd6..e50954309f 100644 --- a/scripts/base/utils/strings.zeek +++ b/scripts/base/utils/strings.zeek @@ -25,7 +25,7 @@ function join_string_set(ss: set[string], j: string): string { if ( i > 0 ) output = cat(output, j); - + output = cat(output, s); ++i; } diff --git a/scripts/base/utils/thresholds.zeek b/scripts/base/utils/thresholds.zeek index d30e9f2b0a..d095edf007 100644 --- a/scripts/base/utils/thresholds.zeek +++ b/scripts/base/utils/thresholds.zeek @@ -16,13 +16,13 @@ export { ## for. index: count &default=0; }; - - ## The thresholds you would like to use as defaults with the + + ## The thresholds you would like to use as defaults with the ## :zeek:id:`default_check_threshold` function. const default_notice_thresholds: vector of count = { 30, 100, 1000, 10000, 100000, 1000000, 10000000, } &redef; - + ## This will check if a :zeek:type:`TrackCount` variable has crossed any ## thresholds in a given set. ## @@ -33,7 +33,7 @@ export { ## ## Returns: T if a threshold has been crossed, else F. global check_threshold: function(v: vector of count, tracker: TrackCount): bool; - + ## This will use the :zeek:id:`default_notice_thresholds` variable to ## check a :zeek:type:`TrackCount` variable to see if it has crossed ## another threshold. diff --git a/scripts/policy/files/unified2/main.zeek b/scripts/policy/files/unified2/main.zeek index c1ce27baf7..2930f483a0 100644 --- a/scripts/policy/files/unified2/main.zeek +++ b/scripts/policy/files/unified2/main.zeek @@ -1,4 +1,3 @@ - @load base/utils/dir @load base/utils/paths @@ -255,7 +254,7 @@ event file_new(f: fa_file) if ( |parts| == 3 ) file_dir = parts[0]; - if ( (watch_file != "" && f$source == watch_file) || + if ( (watch_file != "" && f$source == watch_file) || (watch_dir != "" && compress_path(watch_dir) == file_dir) ) { Files::add_analyzer(f, Files::ANALYZER_UNIFIED2); diff --git a/scripts/policy/frameworks/cluster/agent/__load__.zeek b/scripts/policy/frameworks/cluster/agent/__load__.zeek index 1db332f544..f7f36173f3 100644 --- a/scripts/policy/frameworks/cluster/agent/__load__.zeek +++ b/scripts/policy/frameworks/cluster/agent/__load__.zeek @@ -1,5 +1,4 @@ -# The entry point for the cluster agent. It only runs bootstrap logic for -# launching via the Supervisor. If we're not running the Supervisor, this does -# nothing. +##! The entry point for the cluster agent. It runs bootstrap logic for launching +##! the agent process via Zeek's Supervisor. @load ./boot diff --git a/scripts/policy/frameworks/cluster/agent/api.zeek b/scripts/policy/frameworks/cluster/agent/api.zeek index a5334fbbef..7957677457 100644 --- a/scripts/policy/frameworks/cluster/agent/api.zeek +++ b/scripts/policy/frameworks/cluster/agent/api.zeek @@ -1,24 +1,108 @@ +##! The event API of cluster agents. Most endpoints consist of event pairs, +##! where the agent answers a request event with a corresponding response +##! event. Such event pairs share the same name prefix and end in "_request" and +##! "_response", respectively. + @load base/frameworks/supervisor/control @load policy/frameworks/cluster/controller/types module ClusterAgent::API; export { + ## A simple versioning scheme, used to track basic compatibility of + ## controller and agent. const version = 1; + # Agent API events + ## The controller sends this event to convey a new cluster configuration + ## to the agent. Once processed, the agent responds with the response + ## event. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + ## config: a :zeek:see:`ClusterController::Types::Configuration` record + ## describing the cluster topology. Note that this contains the full + ## topology, not just the part pertaining to this agent. That's because + ## the cluster framework requires full cluster visibility to establish + ## the needed peerings. + ## global set_configuration_request: event(reqid: string, config: ClusterController::Types::Configuration); + + ## Response to a set_configuration_request event. The agent sends + ## this back to the controller. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: the result record. + ## global set_configuration_response: event(reqid: string, result: ClusterController::Types::Result); + + ## The controller sends this event to confirm to the agent that it is + ## part of the current cluster topology. The agent acknowledges with the + ## corresponding response event. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + global agent_welcome_request: event(reqid: string); + + ## Response to an agent_welcome_request event. The agent sends this + ## back to the controller. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: the result record. + ## + global agent_welcome_response: event(reqid: string, + result: ClusterController::Types::Result); + + + ## The controller sends this event to convey that the agent is not + ## currently required. This status may later change, depending on + ## updates from the client, so the Broker-level peering can remain + ## active. The agent releases any cluster-related resources (including + ## shutdown of existing Zeek cluster nodes) when processing the request, + ## and confirms via the response event. Shutting down an agent at this + ## point has no operational impact on the running cluster. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + global agent_standby_request: event(reqid: string); + + ## Response to an agent_standby_request event. The agent sends this + ## back to the controller. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: the result record. + ## + global agent_standby_response: event(reqid: string, + result: ClusterController::Types::Result); + + # Notification events, agent -> controller - # Report agent being available. + ## The agent sends this event upon peering as a "check-in", informing + ## the controller that an agent of the given name is now available to + ## communicate with. It is a controller-level equivalent of + ## `:zeek:see:`Broker::peer_added`. + ## + ## instance: an instance name, really the agent's name as per :zeek:see:`ClusterAgent::name`. + ## + ## host: the IP address of the agent. (This may change in the future.) + ## + ## api_version: the API version of this agent. + ## global notify_agent_hello: event(instance: string, host: addr, api_version: count); + + # The following are not yet implemented. + # Report node state changes. global notify_change: event(instance: string, n: ClusterController::Types::Node, @@ -30,4 +114,4 @@ export { # Report informational message. global notify_log: event(instance: string, msg: string, node: string &default=""); -} + } diff --git a/scripts/policy/frameworks/cluster/agent/boot.zeek b/scripts/policy/frameworks/cluster/agent/boot.zeek index 3eed5f6dd9..daff5b2d24 100644 --- a/scripts/policy/frameworks/cluster/agent/boot.zeek +++ b/scripts/policy/frameworks/cluster/agent/boot.zeek @@ -1,3 +1,9 @@ +##! The cluster agent boot logic runs in Zeek's supervisor and instructs it to +##! launch an agent process. The agent's main logic resides in main.zeek, +##! similarly to other frameworks. The new process will execute that script. +##! +##! If the current process is not the Zeek supervisor, this does nothing. + @load ./config # The agent needs the supervisor to listen for node management requests. We diff --git a/scripts/policy/frameworks/cluster/agent/config.zeek b/scripts/policy/frameworks/cluster/agent/config.zeek index 2e836d08ab..732dc39450 100644 --- a/scripts/policy/frameworks/cluster/agent/config.zeek +++ b/scripts/policy/frameworks/cluster/agent/config.zeek @@ -1,51 +1,83 @@ +##! Configuration settings for a cluster agent. + @load policy/frameworks/cluster/controller/types module ClusterAgent; export { - # The name this agent uses to represent the cluster instance - # it manages. When the environment variable isn't set and there's, - # no redef, this falls back to "agent-". + ## The name this agent uses to represent the cluster instance it + ## manages. Defaults to the value of the ZEEK_AGENT_NAME environment + ## variable. When that is unset and you don't redef the value, + ## the implementation defaults to "agent-". const name = getenv("ZEEK_AGENT_NAME") &redef; - # Agent stdout/stderr log files to produce in Zeek's working - # directory. If empty, no such logs will result. The actual - # log files have the agent's name (as per above) dot-prefixed. + ## Agent stdout log configuration. If the string is non-empty, Zeek will + ## produce a free-form log (i.e., not one governed by Zeek's logging + ## framework) in Zeek's working directory. The final log's name is + ## ".", where the name is taken from :zeek:see:`ClusterAgent::name`, + ## and the suffix is defined by the following variable. If left empty, + ## no such log results. + ## + ## Note that the agent also establishes a "proper" Zeek log via the + ## :zeek:see:`ClusterController::Log` module. const stdout_file_suffix = "agent.stdout" &redef; + + ## Agent stderr log configuration. Like :zeek:see:`ClusterAgent::stdout_file_suffix`, + ## but for the stderr stream. const stderr_file_suffix = "agent.stderr" &redef; - # The address and port the agent listens on. When - # undefined, falls back to configurable default values. + ## The network address the agent listens on. This only takes effect if + ## the agent isn't configured to connect to the controller (see + ## :zeek:see:`ClusterAgent::controller`). By default this uses the value of the + ## ZEEK_AGENT_ADDR environment variable, but you may also redef to + ## a specific value. When empty, the implementation falls back to + ## :zeek:see:`ClusterAgent::default_address`. const listen_address = getenv("ZEEK_AGENT_ADDR") &redef; + + ## The fallback listen address if :zeek:see:`ClusterAgent::listen_address` + ## remains empty. Unless redefined, this uses Broker's own default listen + ## address. const default_address = Broker::default_listen_address &redef; + ## The network port the agent listens on. Counterpart to + ## :zeek:see:`ClusterAgent::listen_address`, defaulting to the ZEEK_AGENT_PORT + ## environment variable. const listen_port = getenv("ZEEK_AGENT_PORT") &redef; + + ## The fallback listen port if :zeek:see:`ClusterAgent::listen_port` remains empty. const default_port = 2151/tcp &redef; - # The agent communicates under to following topic prefix, - # suffixed with "/" (see above): + ## The agent's Broker topic prefix. For its own communication, the agent + ## suffixes this with "/", based on :zeek:see:`ClusterAgent::name`. const topic_prefix = "zeek/cluster-control/agent" &redef; - # The coordinates of the controller. When defined, it means - # agents peer with (connect to) the controller; otherwise the - # controller knows all agents and peers with them. + ## The network coordinates of the controller. When defined, the agent + ## peers with (and connects to) the controller; otherwise the controller + ## will peer (and connect to) the agent, listening as defined by + ## :zeek:see:`ClusterAgent::listen_address` and :zeek:see:`ClusterAgent::listen_port`. const controller: Broker::NetworkInfo = [ $address="0.0.0.0", $bound_port=0/unknown] &redef; - # Agent and controller currently log only, not via the data cluster's - # logger. (This might get added later.) For now, this means that - # if both write to the same log file, it gets garbled. The following - # lets you specify the working directory specifically for the agent. + ## An optional custom output directory for the agent's stdout and stderr + ## logs. Agent and controller currently only log locally, not via the + ## data cluster's logger node. (This might change in the future.) This + ## means that if both write to the same log file, the output gets + ## garbled. const directory = "" &redef; - # Working directory for data cluster nodes. When relative, note - # that this will apply from the working directory of the agent, - # since it creates data cluster nodes. + ## The working directory for data cluster nodes created by this + ## agent. If you make this a relative path, note that the path is + ## relative to the agent's working directory, since it creates data + ## cluster nodes. const cluster_directory = "" &redef; - # The following functions return the effective network endpoint - # information for this agent, in two related forms. + ## Returns a :zeek:see:`ClusterController::Types::Instance` describing this + ## instance (its agent name plus listening address/port, as applicable). global instance: function(): ClusterController::Types::Instance; + + ## Returns a :zeek:see:`Broker::EndpointInfo` record for this instance. + ## Similar to :zeek:see:`ClusterAgent::instance`, but with slightly different + ## data format. global endpoint_info: function(): Broker::EndpointInfo; } @@ -53,8 +85,8 @@ function instance(): ClusterController::Types::Instance { local epi = endpoint_info(); return ClusterController::Types::Instance($name=epi$id, - $host=to_addr(epi$network$address), - $listen_port=epi$network$bound_port); + $host=to_addr(epi$network$address), + $listen_port=epi$network$bound_port); } function endpoint_info(): Broker::EndpointInfo diff --git a/scripts/policy/frameworks/cluster/agent/main.zeek b/scripts/policy/frameworks/cluster/agent/main.zeek index 1956d47d0c..f545186304 100644 --- a/scripts/policy/frameworks/cluster/agent/main.zeek +++ b/scripts/policy/frameworks/cluster/agent/main.zeek @@ -1,3 +1,8 @@ +##! This is the main "runtime" of a cluster agent. Zeek does not load this +##! directly; rather, the agent's bootstrapping module (in ./boot.zeek) +##! specifies it as the script to run in the node newly created via Zeek's +##! supervisor. + @load base/frameworks/broker @load policy/frameworks/cluster/controller/config @@ -6,21 +11,24 @@ @load ./api +module ClusterAgent::Runtime; + redef ClusterController::role = ClusterController::Types::AGENT; # The global configuration as passed to us by the controller -global global_config: ClusterController::Types::Configuration; +global g_config: ClusterController::Types::Configuration; # A map to make other instance info accessible -global instances: table[string] of ClusterController::Types::Instance; +global g_instances: table[string] of ClusterController::Types::Instance; # A map for the nodes we run on this instance, via this agent. -global nodes: table[string] of ClusterController::Types::Node; +global g_nodes: table[string] of ClusterController::Types::Node; # The node map employed by the supervisor to describe the cluster # topology to newly forked nodes. We refresh it when we receive # new configurations. -global data_cluster: table[string] of Supervisor::ClusterEndpoint; +global g_data_cluster: table[string] of Supervisor::ClusterEndpoint; + event SupervisorControl::create_response(reqid: string, result: string) { @@ -86,43 +94,43 @@ event ClusterAgent::API::set_configuration_request(reqid: string, config: Cluste # Adopt the global configuration provided. # XXX this can later handle validation and persistence # XXX should do this transactionally, only set when all else worked - global_config = config; + g_config = config; # Refresh the instances table: - instances = table(); + g_instances = table(); for ( inst in config$instances ) - instances[inst$name] = inst; + g_instances[inst$name] = inst; # Terminate existing nodes - for ( nodename in nodes ) + for ( nodename in g_nodes ) supervisor_destroy(nodename); - nodes = table(); + g_nodes = table(); # Refresh the data cluster and nodes tables - data_cluster = table(); + g_data_cluster = table(); for ( node in config$nodes ) { if ( node$instance == ClusterAgent::name ) - nodes[node$name] = node; + g_nodes[node$name] = node; local cep = Supervisor::ClusterEndpoint( $role = node$role, - $host = instances[node$instance]$host, + $host = g_instances[node$instance]$host, $p = node$p); if ( node?$interface ) cep$interface = node$interface; - data_cluster[node$name] = cep; + g_data_cluster[node$name] = cep; } # Apply the new configuration via the supervisor - for ( nodename in nodes ) + for ( nodename in g_nodes ) { - node = nodes[nodename]; + node = g_nodes[nodename]; nc = Supervisor::NodeConfig($name=nodename); if ( ClusterAgent::cluster_directory != "" ) @@ -140,7 +148,7 @@ event ClusterAgent::API::set_configuration_request(reqid: string, config: Cluste # XXX could use options to enable per-node overrides for # directory, stdout, stderr, others? - nc$cluster = data_cluster; + nc$cluster = g_data_cluster; supervisor_create(nc); } @@ -149,22 +157,59 @@ event ClusterAgent::API::set_configuration_request(reqid: string, config: Cluste # events asynchonously. The only indication of error will be # notification events to the controller. + if ( reqid != "" ) + { + local res = ClusterController::Types::Result( + $reqid = reqid, + $instance = ClusterAgent::name); + + ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_response %s", + ClusterController::Types::result_to_string(res))); + event ClusterAgent::API::set_configuration_response(reqid, res); + } + } + +event ClusterAgent::API::agent_welcome_request(reqid: string) + { + ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_welcome_request %s", reqid)); + local res = ClusterController::Types::Result( $reqid = reqid, $instance = ClusterAgent::name); - ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_response %s", reqid)); - event ClusterAgent::API::set_configuration_response(reqid, res); + ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_response %s", + ClusterController::Types::result_to_string(res))); + event ClusterAgent::API::agent_welcome_response(reqid, res); + } + +event ClusterAgent::API::agent_standby_request(reqid: string) + { + ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_standby_request %s", reqid)); + + # We shut down any existing cluster nodes via an empty configuration, + # and fall silent. We do not unpeer/disconnect (assuming we earlier + # peered/connected -- otherwise there's nothing we can do here via + # Broker anyway), mainly to keep open the possibility of running + # cluster nodes again later. + event ClusterAgent::API::set_configuration_request("", ClusterController::Types::Configuration()); + + local res = ClusterController::Types::Result( + $reqid = reqid, + $instance = ClusterAgent::name); + + ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_standby_response %s", + ClusterController::Types::result_to_string(res))); + event ClusterAgent::API::agent_standby_response(reqid, res); } event Broker::peer_added(peer: Broker::EndpointInfo, msg: string) { # This does not (cannot?) immediately verify that the new peer - # is in fact a controller, so we might send this redundantly. - # Controllers handle the hello event accordingly. + # is in fact a controller, so we might send this in vain. + # Controllers register the agent upon receipt of the event. local epi = ClusterAgent::endpoint_info(); - # XXX deal with unexpected peers, unless we're okay with it + event ClusterAgent::API::notify_agent_hello(epi$id, to_addr(epi$network$address), ClusterAgent::API::version); } @@ -185,13 +230,16 @@ event zeek_init() Broker::peer(supervisor_addr, Broker::default_port, Broker::default_listen_retry); # Agents need receive communication targeted at it, and any responses - # from the supervisor. + # from the supervisor. Broker::subscribe(agent_topic); Broker::subscribe(SupervisorControl::topic_prefix); # Auto-publish a bunch of events. Glob patterns or module-level # auto-publish would be helpful here. Broker::auto_publish(agent_topic, ClusterAgent::API::set_configuration_response); + Broker::auto_publish(agent_topic, ClusterAgent::API::agent_welcome_response); + Broker::auto_publish(agent_topic, ClusterAgent::API::agent_standby_response); + Broker::auto_publish(agent_topic, ClusterAgent::API::notify_agent_hello); Broker::auto_publish(agent_topic, ClusterAgent::API::notify_change); Broker::auto_publish(agent_topic, ClusterAgent::API::notify_error); @@ -210,8 +258,8 @@ event zeek_init() { # We connect to the controller. Broker::peer(ClusterAgent::controller$address, - ClusterAgent::controller$bound_port, - ClusterController::connect_retry); + ClusterAgent::controller$bound_port, + ClusterController::connect_retry); } else { diff --git a/scripts/policy/frameworks/cluster/controller/__load__.zeek b/scripts/policy/frameworks/cluster/controller/__load__.zeek index c88fde804b..6cd1dc789d 100644 --- a/scripts/policy/frameworks/cluster/controller/__load__.zeek +++ b/scripts/policy/frameworks/cluster/controller/__load__.zeek @@ -1,5 +1,4 @@ -# The entry point for the cluster controller. It only runs bootstrap logic for -# launching via the Supervisor. If we're not running the Supervisor, this does -# nothing. +##! The entry point for the cluster controller. It runs bootstrap logic for +##! launching the controller process via Zeek's Supervisor. @load ./boot diff --git a/scripts/policy/frameworks/cluster/controller/api.zeek b/scripts/policy/frameworks/cluster/controller/api.zeek index 4d3e1ba70d..27c41d33ff 100644 --- a/scripts/policy/frameworks/cluster/controller/api.zeek +++ b/scripts/policy/frameworks/cluster/controller/api.zeek @@ -1,16 +1,96 @@ +##! The event API of cluster controllers. Most endpoints consist of event pairs, +##! where the controller answers a zeek-client request event with a +##! corresponding response event. Such event pairs share the same name prefix +##! and end in "_request" and "_response", respectively. + @load ./types module ClusterController::API; export { + ## A simple versioning scheme, used to track basic compatibility of + ## controller, agents, and zeek-client. const version = 1; - global get_instances_request: event(reqid: string); - global get_instances_response: event(reqid: string, - instances: vector of ClusterController::Types::Instance); + ## zeek-client sends this event to request a list of the currently + ## peered agents/instances. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + global get_instances_request: event(reqid: string); + + ## Response to a get_instances_request event. The controller sends + ## this back to the client. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: the result record. Its data member is a + ## :zeek:see:`ClusterController::Types::Instance` record. + ## + global get_instances_response: event(reqid: string, + result: ClusterController::Types::Result); + + + ## zeek-client sends this event to establish a new cluster configuration, + ## including the full cluster topology. The controller processes the update + ## and relays it to the agents. Once each has responded (or a timeout occurs) + ## the controller sends a corresponding response event back to the client. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + ## config: a :zeek:see:`ClusterController::Types::Configuration` record + ## specifying the cluster configuration. + ## global set_configuration_request: event(reqid: string, config: ClusterController::Types::Configuration); + + ## Response to a set_configuration_request event. The controller sends + ## this back to the client. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: a vector of :zeek:see:`ClusterController::Types::Result` records. + ## Each member captures one agent's response. + ## global set_configuration_response: event(reqid: string, result: ClusterController::Types::ResultVec); -} + + + # Testing events. These don't provide operational value but expose + # internal functionality, triggered by test cases. + + ## This event causes no further action (other than getting logged) if + ## with_state is F. When T, the controller establishes request state, and + ## the controller only ever sends the response event when this state times + ## out. + ## + ## reqid: a request identifier string, echoed in the response event when + ## with_state is T. + ## + ## with_state: flag indicating whether the controller should keep (and + ## time out) request state for this request. + ## + global test_timeout_request: event(reqid: string, with_state: bool); + + ## Response to a test_timeout_request event. The controller sends this + ## back to the client if the original request had the with_state flag. + ## + ## reqid: the request identifier used in the request event. + ## + global test_timeout_response: event(reqid: string, + result: ClusterController::Types::Result); + + + # Notification events, agent -> controller + + ## The controller triggers this event when the operational cluster + ## instances align with the ones desired by the cluster + ## configuration. It's essentially a cluster management readiness + ## event. This event is currently only used by the controller and not + ## published to other topics. + ## + ## instances: the set of instance names now ready. + ## + global notify_agents_ready: event(instances: set[string]); + } diff --git a/scripts/policy/frameworks/cluster/controller/boot.zeek b/scripts/policy/frameworks/cluster/controller/boot.zeek index 9d23731946..f06a560760 100644 --- a/scripts/policy/frameworks/cluster/controller/boot.zeek +++ b/scripts/policy/frameworks/cluster/controller/boot.zeek @@ -1,3 +1,10 @@ +##! The cluster controller's boot logic runs in Zeek's supervisor and instructs +##! it to launch the controller process. The controller's main logic resides in +##! main.zeek, similarly to other frameworks. The new process will execute that +##! script. +##! +##! If the current process is not the Zeek supervisor, this does nothing. + @load ./config event zeek_init() diff --git a/scripts/policy/frameworks/cluster/controller/config.zeek b/scripts/policy/frameworks/cluster/controller/config.zeek index 36c4a0b5bd..de4e570115 100644 --- a/scripts/policy/frameworks/cluster/controller/config.zeek +++ b/scripts/policy/frameworks/cluster/controller/config.zeek @@ -1,53 +1,78 @@ +##! Configuration settings for the cluster controller. + @load policy/frameworks/cluster/agent/config module ClusterController; export { - # The name of this controller in the cluster. - # Without the environment variable and no redef, this - # falls back to "controller-". + ## The name of this controller. Defaults to the value of the + ## ZEEK_CONTROLLER_NAME environment variable. When that is unset and the + ## user doesn't redef the value, the implementation defaults to + ## "controller-". const name = getenv("ZEEK_CONTROLLER_NAME") &redef; - # Controller stdout/stderr log files to produce in Zeek's - # working directory. If empty, no such logs will result. + ## The controller's stdout log name. If the string is non-empty, Zeek will + ## produce a free-form log (i.e., not one governed by Zeek's logging + ## framework) in Zeek's working directory. If left empty, no such log + ## results. + ## + ## Note that the controller also establishes a "proper" Zeek log via the + ## :zeek:see:`ClusterController::Log` module. const stdout_file = "controller.stdout" &redef; + + ## The controller's stderr log name. Like :zeek:see:`ClusterController::stdout_file`, + ## but for the stderr stream. const stderr_file = "controller.stderr" &redef; - # The address and port the controller listens on. When - # undefined, falls back to the default_address, which you can - # likewise customize. + ## The network address the controller listens on. By default this uses + ## the value of the ZEEK_CONTROLLER_ADDR environment variable, but you + ## may also redef to a specific value. When empty, the implementation + ## falls back to :zeek:see:`ClusterController::default_address`. const listen_address = getenv("ZEEK_CONTROLLER_ADDR") &redef; + + ## The fallback listen address if :zeek:see:`ClusterController::listen_address` + ## remains empty. Unless redefined, this uses Broker's own default + ## listen address. const default_address = Broker::default_listen_address &redef; + ## The network port the controller listens on. Counterpart to + ## :zeek:see:`ClusterController::listen_address`, defaulting to the + ## ZEEK_CONTROLLER_PORT environment variable. const listen_port = getenv("ZEEK_CONTROLLER_PORT") &redef; + + ## The fallback listen port if :zeek:see:`ClusterController::listen_port` + ## remains empty. const default_port = 2150/tcp &redef; - # A more aggressive default retry interval (vs default 30s) + ## The controller's connect retry interval. Defaults to a more + ## aggressive value compared to Broker's 30s. const connect_retry = 1sec &redef; - # The controller listens for messages on this topic: + ## The controller's Broker topic. Clients send requests to this topic. const topic = "zeek/cluster-control/controller" &redef; - # The set of agents to interact with. When this is non-empty - # at startup, the controller contacts the agents; when it is - # empty, it waits for agents to connect. They key is a name of - # each instance. This should match the $name member of the - # instance records. - const instances: table[string] of ClusterController::Types::Instance = { } &redef; - - # The role of this node in cluster management. Agent and - # controller both redef this. Used during logging. + ## The role of this process in cluster management. Agent and controller + ## both redefine this. Used during logging. const role = ClusterController::Types::NONE &redef; - # Agent and controller currently log only, not via the data cluster's - # logger. (This might get added later.) For now, this means that - # if both write to the same log file, it gets garbled. The following - # lets you specify the working directory specifically for the agent. + ## The timeout for request state. Such state (see the :zeek:see:`ClusterController::Request` + ## module) ties together request and response event pairs. The timeout causes + ## its cleanup in the absence of a timely response. It applies both to + ## state kept for client requests, as well as state in the agents for + ## requests to the supervisor. + const request_timeout = 10sec &redef; + + ## An optional custom output directory for the controller's stdout and + ## stderr logs. Agent and controller currently only log locally, not via + ## the data cluster's logger node. (This might change in the future.) + ## This means that if both write to the same log file, the output gets + ## garbled. const directory = "" &redef; - # The following functions return the effective network endpoint - # information for this controller, in two related forms. + ## Returns a :zeek:see:`Broker::NetworkInfo` record describing the controller. global network_info: function(): Broker::NetworkInfo; + + ## Returns a :zeek:see:`Broker::EndpointInfo` record describing the controller. global endpoint_info: function(): Broker::EndpointInfo; } diff --git a/scripts/policy/frameworks/cluster/controller/log.zeek b/scripts/policy/frameworks/cluster/controller/log.zeek index 49aeb9b282..a7525dec0c 100644 --- a/scripts/policy/frameworks/cluster/controller/log.zeek +++ b/scripts/policy/frameworks/cluster/controller/log.zeek @@ -1,3 +1,8 @@ +##! This module implements straightforward logging abilities for cluster +##! controller and agent. It uses Zeek's logging framework, and works only for +##! nodes managed by the supervisor. In this setting Zeek's logging framework +##! operates locally, i.e., this logging does not involve any logger nodes. + @load ./config module ClusterController::Log; @@ -9,6 +14,7 @@ export { ## A default logging policy hook for the stream. global log_policy: Log::PolicyHook; + ## The controller/agent log supports four different log levels. type Level: enum { DEBUG, INFO, @@ -16,7 +22,7 @@ export { ERROR, }; - ## The record type which contains the column fields of the cluster log. + ## The record type containing the column fields of the agent/controller log. type Info: record { ## The time at which a cluster message was generated. ts: time; @@ -30,10 +36,32 @@ export { message: string; } &log; + ## The log level in use for this node. global log_level = DEBUG &redef; + ## A debug-level log message writer. + ## + ## message: the message to log. + ## + global debug: function(message: string); + + ## An info-level log message writer. + ## + ## message: the message to log. + ## global info: function(message: string); + + ## A warning-level log message writer. + ## + ## message: the message to log. + ## global warning: function(message: string); + + ## An error-level log message writer. (This only logs a message, it does not + ## terminate Zeek or have other runtime effects.) + ## + ## message: the message to log. + ## global error: function(message: string); } diff --git a/scripts/policy/frameworks/cluster/controller/main.zeek b/scripts/policy/frameworks/cluster/controller/main.zeek index afc439b49d..33e0456049 100644 --- a/scripts/policy/frameworks/cluster/controller/main.zeek +++ b/scripts/policy/frameworks/cluster/controller/main.zeek @@ -1,3 +1,8 @@ +##! This is the main "runtime" of the cluster controller. Zeek does not load +##! this directly; rather, the controller's bootstrapping module (in ./boot.zeek) +##! specifies it as the script to run in the node newly created via Zeek's +##! supervisor. + @load base/frameworks/broker @load policy/frameworks/cluster/agent/config @@ -6,55 +11,255 @@ @load ./api @load ./log @load ./request +@load ./util + +module ClusterController::Runtime; redef ClusterController::role = ClusterController::Types::CONTROLLER; +global check_instances_ready: function(); +global add_instance: function(inst: ClusterController::Types::Instance); +global drop_instance: function(inst: ClusterController::Types::Instance); + +global null_config: function(): ClusterController::Types::Configuration; +global is_null_config: function(config: ClusterController::Types::Configuration): bool; + +# Checks whether the given instance is one that we know with different +# communication settings: a a different peering direction, a different listening +# port, etc. Used as a predicate to indicate when we need to drop the existing +# one from our internal state. +global is_instance_connectivity_change: function + (inst: ClusterController::Types::Instance): bool; + +# The set of agents the controller interacts with to manage to currently +# configured cluster. This may be a subset of all the agents known to the +# controller, as tracked by the g_instances_known set. They key is the instance +# name and should match the $name member of the corresponding instance record. +global g_instances: table[string] of ClusterController::Types::Instance = table(); + +# The set of instances that have checked in with the controller. This is a +# superset of g_instances, since it covers any agent that has sent us a +# notify_agent_hello event. +global g_instances_known: set[string] = set(); + +# A corresponding set of instances/agents that we track in order to understand +# when all of the above instances have sent agent_welcome_response events. (An +# alternative would be to use a record that adds a single state bit for each +# instance, and store that above.) +global g_instances_ready: set[string] = set(); + +# The request ID of the most recent configuration update that's come in from +# a client. We track it here until we know we are ready to communicate with all +# agents required by the update. +global g_config_reqid_pending: string = ""; + +# The most recent configuration we have successfully deployed. This is also +# the one we send whenever the client requests it. +global g_config_current: ClusterController::Types::Configuration; + +function send_config_to_agents(req: ClusterController::Request::Request, + config: ClusterController::Types::Configuration) + { + for ( name in g_instances ) + { + if ( name !in g_instances_ready ) + next; + + local agent_topic = ClusterAgent::topic_prefix + "/" + name; + local areq = ClusterController::Request::create(); + areq$parent_id = req$id; + + # We track the requests sent off to each agent. As the + # responses come in, we can check them off as completed, + # and once all are, we respond back to the client. + req$set_configuration_state$requests += areq; + + # We could also broadcast just once on the agent prefix, but + # explicit request/response pairs for each agent seems cleaner. + ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_request %s to %s", areq$id, name)); + Broker::publish(agent_topic, ClusterAgent::API::set_configuration_request, areq$id, config); + } + } + +# This is the &on_change handler for the g_instances_ready set, meaning +# it runs whenever a required agent has confirmed it's ready. +function check_instances_ready() + { + local cur_instances: set[string]; + + for ( inst in g_instances ) + add cur_instances[inst]; + + if ( cur_instances == g_instances_ready ) + event ClusterController::API::notify_agents_ready(cur_instances); + } + +function add_instance(inst: ClusterController::Types::Instance) + { + g_instances[inst$name] = inst; + + if ( inst?$listen_port ) + Broker::peer(cat(inst$host), inst$listen_port, + ClusterController::connect_retry); + + if ( inst$name in g_instances_known ) + { + # The agent has already peered with us. Send welcome to indicate + # it's part of cluster management. Once it responds, we update + # the set of ready instances and proceed as feasible with config + # deployments. + + local req = ClusterController::Request::create(); + + ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_request to %s", inst$name)); + Broker::publish(ClusterAgent::topic_prefix + "/" + inst$name, + ClusterAgent::API::agent_welcome_request, req$id); + } + } + +function drop_instance(inst: ClusterController::Types::Instance) + { + if ( inst$name !in g_instances ) + return; + + # Send the agent a standby so it shuts down its cluster nodes & state + ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_standby_request to %s", inst$name)); + Broker::publish(ClusterAgent::topic_prefix + "/" + inst$name, + ClusterAgent::API::agent_standby_request, ""); + + delete g_instances[inst$name]; + + if ( inst$name in g_instances_ready ) + delete g_instances_ready[inst$name]; + + # The agent remains in g_instances_known, to track that we're able + # to communicate with it in case it's required again. + + ClusterController::Log::info(fmt("dropped instance %s", inst$name)); + } + +function null_config(): ClusterController::Types::Configuration + { + return ClusterController::Types::Configuration($id=""); + } + +function is_null_config(config: ClusterController::Types::Configuration): bool + { + return config$id == ""; + } + +function is_instance_connectivity_change(inst: ClusterController::Types::Instance): bool + { + # If we're not tracking this instance as part of a cluster config, it's + # not a change. (More precisely: we cannot say whether it's changed.) + if ( inst$name !in g_instances ) + return F; + + # The agent has peered with us and now uses a different host. + # XXX 0.0.0.0 is a workaround until we've resolved how agents that peer + # with us obtain their identity. Broker ID? + if ( inst$host != 0.0.0.0 && inst$host != g_instances[inst$name]$host ) + return T; + + # The agent has a listening port and the one we know does not, or vice + # versa. I.e., this is a change in the intended peering direction. + if ( inst?$listen_port != g_instances[inst$name]?$listen_port ) + return T; + + # Both have listening ports, but they differ. + if ( inst?$listen_port && g_instances[inst$name]?$listen_port && + inst$listen_port != g_instances[inst$name]$listen_port ) + return T; + + return F; + } + +event ClusterController::API::notify_agents_ready(instances: set[string]) + { + local insts = ClusterController::Util::set_to_vector(instances); + + ClusterController::Log::info(fmt("rx ClusterController::API:notify_agents_ready %s", join_string_vec(insts, ","))); + + local req = ClusterController::Request::lookup(g_config_reqid_pending); + + # If there's no pending request, when it's no longer available, or it + # doesn't have config state, don't do anything else. + if ( ClusterController::Request::is_null(req) || ! req?$set_configuration_state ) + return; + + # All instances requested in the pending configuration update are now + # known to us. Send them the config. As they send their response events + # we update the client's request state and eventually send the response + # event to the it. + send_config_to_agents(req, req$set_configuration_state$config); + } + event ClusterAgent::API::notify_agent_hello(instance: string, host: addr, api_version: count) { - # See if we already know about this agent; if not, register - # it. - # - # XXX protection against rogue agents? + ClusterController::Log::info(fmt("rx ClusterAgent::API::notify_agent_hello %s %s", instance, host)); - if ( instance in ClusterController::instances ) + # When an agent checks in with a mismatching API version, we log the + # fact and drop its state, if any. + if ( api_version != ClusterController::API::version ) { - # Do nothing, unless this known agent checks in with a mismatching - # API version, in which case we kick it out. - if ( api_version != ClusterController::API::version ) - { - local inst = ClusterController::instances[instance]; - if ( inst?$listen_port ) - { - # We peered with this instance, unpeer. - Broker::unpeer(cat(inst$host), inst$listen_port ); - # XXX what to do if they connected to us? - } - delete ClusterController::instances[instance]; - } + ClusterController::Log::warning( + fmt("instance %s/%s has checked in with incompatible API version %s", + instance, host, api_version)); - # Update the instance name in the pointed-to record, in case it - # was previously named otherwise. Not being too picky here allows - # the user some leeway in spelling out the original config. - ClusterController::instances[instance]$name = instance; + if ( instance in g_instances ) + drop_instance(g_instances[instance]); + if ( instance in g_instances_known ) + delete g_instances_known[instance]; return; } - if ( api_version != ClusterController::API::version ) - { - ClusterController::Log::warning( - fmt("agent %s/%s speaks incompatible agent protocol (%s, need %s), unpeering", - instance, host, api_version, ClusterController::API::version)); - } + add g_instances_known[instance]; - ClusterController::instances[instance] = ClusterController::Types::Instance($name=instance, $host=host); - ClusterController::Log::info(fmt("instance %s/%s has checked in", instance, host)); + if ( instance in g_instances && instance !in g_instances_ready ) + { + # We need this instance for our cluster and have full context for + # it from the configuration. Tell agent. + local req = ClusterController::Request::create(); + + ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_request to %s", instance)); + Broker::publish(ClusterAgent::topic_prefix + "/" + instance, + ClusterAgent::API::agent_welcome_request, req$id); + } } +event ClusterAgent::API::agent_welcome_response(reqid: string, result: ClusterController::Types::Result) + { + ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_welcome_response %s", reqid)); + + local req = ClusterController::Request::lookup(reqid); + + if ( ClusterController::Request::is_null(req) ) + return; + + ClusterController::Request::finish(req$id); + + # An agent we've been waiting to hear back from is ready for cluster + # work. Double-check we still want it, otherwise drop it. + + if ( ! result$success || result$instance !in g_instances ) + { + ClusterController::Log::info(fmt( + "tx ClusterAgent::API::agent_standby_request to %s", result$instance)); + Broker::publish(ClusterAgent::topic_prefix + "/" + result$instance, + ClusterAgent::API::agent_standby_request, ""); + return; + } + + add g_instances_ready[result$instance]; + ClusterController::Log::info(fmt("instance %s ready", result$instance)); + + check_instances_ready(); + } event ClusterAgent::API::notify_change(instance: string, n: ClusterController::Types::Node, - old: ClusterController::Types::State, - new: ClusterController::Types::State) + old: ClusterController::Types::State, + new: ClusterController::Types::State) { # XXX TODO } @@ -96,10 +301,10 @@ event ClusterAgent::API::set_configuration_response(reqid: string, result: Clust return; # All set_configuration requests to instances are done, so respond - # back to client. We need to compose the result, aggregating - # the results we got from the requests to the agents. In the - # end we have one Result per instance requested in the - # original set_configuration_request. + # back to client. We need to compose the result, aggregating + # the results we got from the requests to the agents. In the + # end we have one Result per instance requested in the + # original set_configuration_request. # # XXX we can likely generalize result aggregation in the request module. for ( i in req$set_configuration_state$requests ) @@ -132,7 +337,13 @@ event ClusterAgent::API::set_configuration_response(reqid: string, result: Clust ClusterController::Request::finish(r$id); } - ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", req$id)); + # We're now done with the original set_configuration request. + # Adopt the configuration as the current one. + g_config_current = req$set_configuration_state$config; + g_config_reqid_pending = ""; + + ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", + ClusterController::Request::to_string(req))); event ClusterController::API::set_configuration_response(req$id, req$results); ClusterController::Request::finish(req$id); } @@ -141,25 +352,24 @@ event ClusterController::API::set_configuration_request(reqid: string, config: C { ClusterController::Log::info(fmt("rx ClusterController::API::set_configuration_request %s", reqid)); + local res: ClusterController::Types::Result; local req = ClusterController::Request::create(reqid); - req$set_configuration_state = ClusterController::Request::SetConfigurationState(); - # Compare new configuration to the current one and send updates - # to the instances as needed. - if ( config?$instances ) + req$set_configuration_state = ClusterController::Request::SetConfigurationState($config = config); + + # At the moment there can only be one pending request. + if ( g_config_reqid_pending != "" ) { - # XXX properly handle instance update: connect to new instances provided - # when they are listening, accept connections from new instances that are - # not - for ( inst in config$instances ) - { - if ( inst$name !in ClusterController::instances ) - { - local res = ClusterController::Types::Result($reqid=reqid, $instance=inst$name); - res$error = fmt("instance %s is unknown, skipping", inst$name); - req$results += res; - } - } + res = ClusterController::Types::Result($reqid=reqid); + res$success = F; + res$error = fmt("request %s still pending", g_config_reqid_pending); + req$results += res; + + ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", + ClusterController::Request::to_string(req))); + event ClusterController::API::set_configuration_response(req$id, req$results); + ClusterController::Request::finish(req$id); + return; } # XXX validate the configuration: @@ -169,82 +379,177 @@ event ClusterController::API::set_configuration_request(reqid: string, config: C # - Do node types with optional fields have required values? # ... - # Transmit the configuration on to the agents. They need to be aware of - # each other's location and nodes, so the data cluster nodes can connect - # (for example, so a worker on instance 1 can connect to a logger on - # instance 2). - for ( name in ClusterController::instances ) + # The incoming request is now the pending one. It gets cleared when all + # agents have processed their config updates successfully, or their + # responses time out. + g_config_reqid_pending = req$id; + + # Compare the instance configuration to our current one. If it matches, + # we can proceed to deploying the new data cluster topology. If it does + # not, we need to establish connectivity with agents we connect to, or + # wait until all instances that connect to us have done so. Either triggers + # a notify_agents_ready event, upon which we then deploy the data cluster. + + # The current & new set of instance names. + local insts_current: set[string]; + local insts_new: set[string]; + + # A set of current instances not contained in the new config. + # Those will need to get dropped. + local insts_to_drop: set[string]; + + # The opposite: new instances not yet in our current set. Those we will need + # to establish contact with (or they with us). + local insts_to_add: set[string]; + + # The overlap: instances in both the current and new set. For those we verify + # that we're actually dealign with the same entities, and might need to re- + # connect if not. + local insts_to_keep: set[string]; + + # Alternative representation of insts_to_add, directly providing the instances. + local insts_to_peer: table[string] of ClusterController::Types::Instance; + + # Helpful locals. + local inst_name: string; + local inst: ClusterController::Types::Instance; + + for ( inst_name in g_instances ) + add insts_current[inst_name]; + for ( inst in config$instances ) + add insts_new[inst$name]; + + # Populate TODO lists for instances we need to drop, check, or add. + insts_to_drop = insts_current - insts_new; + insts_to_add = insts_new - insts_current; + insts_to_keep = insts_new & insts_current; + + for ( inst in config$instances ) { - local agent_topic = ClusterAgent::topic_prefix + "/" + name; - local areq = ClusterController::Request::create(); - areq$parent_id = reqid; + if ( inst$name in insts_to_add ) + { + insts_to_peer[inst$name] = inst; + next; + } - # We track the requests sent off to each agent. As the - # responses come in, we can check them off as completed, - # and once all are, we respond back to the client. - req$set_configuration_state$requests += areq; + # Focus on the keepers: check for change in identity/location. + if ( inst$name !in insts_to_keep ) + next; - # XXX could also broadcast just once on the agent prefix, but - # explicit request/response pairs for each agent seems cleaner. - ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_request %s to %s", - areq$id, name)); - Broker::publish(agent_topic, ClusterAgent::API::set_configuration_request, areq$id, config); + if ( is_instance_connectivity_change(inst) ) + { + # The endpoint looks different. We drop the current one + # and need to re-establish connectivity with the new + # one. + add insts_to_drop[inst$name]; + add insts_to_add[inst$name]; + } } - # Response event gets sent via the agents' reponse event. + # Process our TODO lists. Handle drops first, then additions, in + # case we need to re-establish connectivity with an agent. + + for ( inst_name in insts_to_drop ) + drop_instance(g_instances[inst_name]); + for ( inst_name in insts_to_peer ) + add_instance(insts_to_peer[inst_name]); + + # Updates to out instance tables are complete, now check if we're already + # able to send the config to the agents: + check_instances_ready(); } event ClusterController::API::get_instances_request(reqid: string) { ClusterController::Log::info(fmt("rx ClusterController::API::set_instances_request %s", reqid)); + local res = ClusterController::Types::Result($reqid = reqid); local insts: vector of ClusterController::Types::Instance; - for ( i in ClusterController::instances ) - insts += ClusterController::instances[i]; + for ( i in g_instances ) + insts += g_instances[i]; + + res$data = insts; ClusterController::Log::info(fmt("tx ClusterController::API::get_instances_response %s", reqid)); - event ClusterController::API::get_instances_response(reqid, insts); + event ClusterController::API::get_instances_response(reqid, res); + } + +event ClusterController::Request::request_expired(req: ClusterController::Request::Request) + { + # Various handlers for timed-out request state. We use the state members + # to identify how to respond. No need to clean up the request itself, + # since we're getting here via the request module's expiration + # mechanism that handles the cleanup. + local res: ClusterController::Types::Result; + + if ( req?$set_configuration_state ) + { + # This timeout means we no longer have a pending request. + g_config_reqid_pending = ""; + + res = ClusterController::Types::Result($reqid=req$id); + res$success = F; + res$error = "request timed out"; + req$results += res; + + ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", + ClusterController::Request::to_string(req))); + event ClusterController::API::set_configuration_response(req$id, req$results); + } + + if ( req?$test_state ) + { + res = ClusterController::Types::Result($reqid=req$id); + res$success = F; + res$error = "request timed out"; + + ClusterController::Log::info(fmt("tx ClusterController::API::test_timeout_response %s", req$id)); + event ClusterController::API::test_timeout_response(req$id, res); + } + } + +event ClusterController::API::test_timeout_request(reqid: string, with_state: bool) + { + ClusterController::Log::info(fmt("rx ClusterController::API::test_timeout_request %s %s", reqid, with_state)); + + if ( with_state ) + { + # This state times out and triggers a timeout response in the + # above request_expired event handler. + local req = ClusterController::Request::create(reqid); + req$test_state = ClusterController::Request::TestState(); + } } event zeek_init() { - # Controller always listens -- it needs to be able to respond - # to the Zeek client. This port is also used by the agents - # if they connect to the client. + # Initialize null config at startup. We will replace it once we have + # persistence, and again whenever we complete a client's + # set_configuration request. + g_config_current = null_config(); + + # The controller always listens -- it needs to be able to respond to the + # Zeek client. This port is also used by the agents if they connect to + # the client. The client doesn't automatically establish or accept + # connectivity to agents: agents are defined and communicated with as + # defined via configurations defined by the client. + local cni = ClusterController::network_info(); + Broker::listen(cat(cni$address), cni$bound_port); Broker::subscribe(ClusterAgent::topic_prefix); Broker::subscribe(ClusterController::topic); + # Events sent to the client: + Broker::auto_publish(ClusterController::topic, ClusterController::API::get_instances_response); Broker::auto_publish(ClusterController::topic, ClusterController::API::set_configuration_response); - - if ( |ClusterController::instances| > 0 ) - { - # We peer with the agents -- otherwise, the agents peer - # with (i.e., connect to) us. - for ( i in ClusterController::instances ) - { - local inst = ClusterController::instances[i]; - - if ( ! inst?$listen_port ) - { - # XXX config error -- this must be there - next; - } - - Broker::peer(cat(inst$host), inst$listen_port, - ClusterController::connect_retry); - } - } - - # If ClusterController::instances is empty, agents peer with - # us and we do nothing. We'll build up state as the - # notify_agent_hello() events come int. + Broker::auto_publish(ClusterController::topic, + ClusterController::API::test_timeout_response); ClusterController::Log::info("controller is live"); } diff --git a/scripts/policy/frameworks/cluster/controller/request.zeek b/scripts/policy/frameworks/cluster/controller/request.zeek index 868b84d0f0..202a615e6b 100644 --- a/scripts/policy/frameworks/cluster/controller/request.zeek +++ b/scripts/policy/frameworks/cluster/controller/request.zeek @@ -1,23 +1,33 @@ +##! This module implements a request state abstraction that both cluster +##! controller and agent use to tie responses to received request events and be +##! able to time-out such requests. + @load ./types +@load ./config module ClusterController::Request; export { + ## Request records track each request's state. type Request: record { + ## Each request has a hopfully unique ID provided by the requester. id: string; + + ## For requests that result based upon another request (such as when + ## the controller sends requests to agents based on a request it + ## received by the client), this specifies that original, "parent" + ## request. parent_id: string &optional; }; - # API-specific state. XXX we may be able to generalize after this - # has settled a bit more. + # API-specific state. XXX we may be able to generalize after this has + # settled a bit more. It would also be nice to move request-specific + # state out of this module -- we could for example redef Request in + # main.zeek as needed. # State specific to the set_configuration request/response events type SetConfigurationState: record { - requests: vector of Request &default=vector(); - }; - - # State specific to the set_nodes request/response events - type SetNodesState: record { + config: ClusterController::Types::Configuration; requests: vector of Request &default=vector(); }; @@ -26,51 +36,105 @@ export { node: string; }; + # State for testing events + type TestState: record { + }; + # The redef is a workaround so we can use the Request type - # while it is still being defined + # while it is still being defined. redef record Request += { results: ClusterController::Types::ResultVec &default=vector(); finished: bool &default=F; set_configuration_state: SetConfigurationState &optional; - set_nodes_state: SetNodesState &optional; supervisor_state: SupervisorState &optional; + test_state: TestState &optional; }; + ## A token request that serves as a null/nonexistant request. global null_req = Request($id="", $finished=T); + ## This function establishes request state. + ## + ## reqid: the identifier to use for the request. + ## global create: function(reqid: string &default=unique_id("")): Request; + + ## This function looks up the request for a given request ID and returns + ## it. When no such request exists, returns ClusterController::Request::null_req. + ## + ## reqid: the ID of the request state to retrieve. + ## global lookup: function(reqid: string): Request; + + ## This function marks a request as complete and causes Zeek to release + ## its internal state. When the request does not exist, this does + ## nothing. + ## + ## reqid: the ID of the request state to releaase. + ## global finish: function(reqid: string): bool; + ## This event fires when a request times out (as per the + ## ClusterController::request_timeout) before it has been finished via + ## ClusterController::Request::finish(). + ## + ## req: the request state that is expiring. + ## + global request_expired: event(req: Request); + + ## This function is a helper predicate to indicate whether a given + ## request is null. + ## + ## request: a Request record to check. + ## + ## Returns: T if the given request matches the null_req instance, F otherwise. + ## global is_null: function(request: Request): bool; + + ## For troubleshooting, this function renders a request record to a string. + ## + ## request: the request to render. + ## + global to_string: function(request: Request): string; } -# XXX this needs a mechanism for expiring stale requests -global requests: table[string] of Request; +function requests_expire_func(reqs: table[string] of Request, reqid: string): interval + { + event ClusterController::Request::request_expired(reqs[reqid]); + return 0secs; + } + +# This is the global request-tracking table. The table maps from request ID +# strings to corresponding Request records. Entries time out after the +# ClusterController::request_timeout interval. Upon expiration, a +# request_expired event triggers that conveys the request state. +global g_requests: table[string] of Request + &create_expire=ClusterController::request_timeout + &expire_func=requests_expire_func; function create(reqid: string): Request { local ret = Request($id=reqid); - requests[reqid] = ret; + g_requests[reqid] = ret; return ret; } function lookup(reqid: string): Request { - if ( reqid in requests ) - return requests[reqid]; + if ( reqid in g_requests ) + return g_requests[reqid]; return null_req; } function finish(reqid: string): bool { - if ( reqid !in requests ) + if ( reqid !in g_requests ) return F; - local req = requests[reqid]; - delete requests[reqid]; + local req = g_requests[reqid]; + delete g_requests[reqid]; req$finished = T; @@ -84,3 +148,23 @@ function is_null(request: Request): bool return F; } + +function to_string(request: Request): string + { + local results: string_vec; + local res: ClusterController::Types::Result; + local parent_id = ""; + + if ( request?$parent_id ) + parent_id = fmt(" (via %s)", request$parent_id); + + for ( idx in request$results ) + { + res = request$results[idx]; + results[|results|] = ClusterController::Types::result_to_string(res); + } + + return fmt("[request %s%s %s, results: %s]", request$id, parent_id, + request$finished ? "finished" : "pending", + join_string_vec(results, ",")); + } diff --git a/scripts/policy/frameworks/cluster/controller/types.zeek b/scripts/policy/frameworks/cluster/controller/types.zeek index e2e0899a88..9d7bc82e3c 100644 --- a/scripts/policy/frameworks/cluster/controller/types.zeek +++ b/scripts/policy/frameworks/cluster/controller/types.zeek @@ -1,4 +1,6 @@ -# Types for the Cluster Controller framework. These are used by both agent and controller. +##! This module holds the basic types needed for the Cluster Controller +##! framework. These are used by both agent and controller, and several +##! have corresponding equals in the zeek-client implementation. module ClusterController::Types; @@ -14,67 +16,96 @@ export { ## A Zeek-side option with value. type Option: record { - name: string; # Name of option - value: string; # Value of option + name: string; ##< Name of option + value: string; ##< Value of option }; ## Configuration describing a Zeek instance running a Cluster ## Agent. Normally, there'll be one instance per cluster ## system: a single physical system. type Instance: record { - # Unique, human-readable instance name + ## Unique, human-readable instance name name: string; - # IP address of system + ## IP address of system host: addr; - # Agent listening port. Not needed if agents connect to controller. + ## Agent listening port. Not needed if agents connect to controller. listen_port: port &optional; }; + type InstanceVec: vector of Instance; + ## State that a Cluster Node can be in. State changes trigger an ## API notification (see notify_change()). type State: enum { - Running, # Running and operating normally - Stopped, # Explicitly stopped - Failed, # Failed to start; and permanently halted - Crashed, # Crashed, will be restarted, - Unknown, # State not known currently (e.g., because of lost connectivity) + Running, ##< Running and operating normally + Stopped, ##< Explicitly stopped + Failed, ##< Failed to start; and permanently halted + Crashed, ##< Crashed, will be restarted, + Unknown, ##< State not known currently (e.g., because of lost connectivity) }; ## Configuration describing a Cluster Node process. type Node: record { - name: string; # Cluster-unique, human-readable node name - instance: string; # Name of instance where node is to run - p: port; # Port on which this node will listen - role: Supervisor::ClusterRole; # Role of the node. - state: State; # Desired, or current, run state. - scripts: vector of string &optional; # Additional Zeek scripts for node - options: set[Option] &optional; # Zeek options for node - interface: string &optional; # Interface to sniff - cpu_affinity: int &optional; # CPU/core number to pin to - env: table[string] of string &default=table(); # Custom environment vars + name: string; ##< Cluster-unique, human-readable node name + instance: string; ##< Name of instance where node is to run + p: port; ##< Port on which this node will listen + role: Supervisor::ClusterRole; ##< Role of the node. + state: State; ##< Desired, or current, run state. + scripts: vector of string &optional; ##< Additional Zeek scripts for node + options: set[Option] &optional; ##< Zeek options for node + interface: string &optional; ##< Interface to sniff + cpu_affinity: int &optional; ##< CPU/core number to pin to + env: table[string] of string &default=table(); ##< Custom environment vars }; - # Data structure capturing a cluster's complete configuration. + ## Data structure capturing a cluster's complete configuration. type Configuration: record { - id: string &default=unique_id(""); # Unique identifier for a particular configuration + id: string &default=unique_id(""); ##< Unique identifier for a particular configuration ## The instances in the cluster. - ## XXX we may be able to make this optional - instances: set[Instance]; + instances: set[Instance] &default=set(); ## The set of nodes in the cluster, as distributed over the instances. - nodes: set[Node]; + nodes: set[Node] &default=set(); }; - # Return value for request-response API event pairs + ## Return value for request-response API event pairs type Result: record { - reqid: string; # Request ID of operation this result refers to - instance: string; # Name of associated instance (for context) - success: bool &default=T; # True if successful - data: any &optional; # Addl data returned for successful operation - error: string &default=""; # Descriptive error on failure - node: string &optional; # Name of associated node (for context) + reqid: string; ##< Request ID of operation this result refers to + instance: string &default=""; ##< Name of associated instance (for context) + success: bool &default=T; ##< True if successful + data: any &optional; ##< Addl data returned for successful operation + error: string &default=""; ##< Descriptive error on failure + node: string &optional; ##< Name of associated node (for context) }; type ResultVec: vector of Result; + + global result_to_string: function(res: Result): string; } + +function result_to_string(res: Result): string + { + local result = ""; + + if ( res$success ) + result = "success"; + else if ( res$error != "" ) + result = fmt("error (%s)", res$error); + else + result = "error"; + + local details: string_vec; + + if ( res$reqid != "" ) + details[|details|] = fmt("reqid %s", res$reqid); + if ( res$instance != "" ) + details[|details|] = fmt("instance %s", res$instance); + if ( res?$node && res$node != "" ) + details[|details|] = fmt("node %s", res$node); + + if ( |details| > 0 ) + result = fmt("%s (%s)", result, join_string_vec(details, ", ")); + + return result; + } diff --git a/scripts/policy/frameworks/cluster/controller/util.zeek b/scripts/policy/frameworks/cluster/controller/util.zeek new file mode 100644 index 0000000000..0329438f2f --- /dev/null +++ b/scripts/policy/frameworks/cluster/controller/util.zeek @@ -0,0 +1,25 @@ +##! Utility functions for the cluster controller framework, available to agent +##! and controller. + +module ClusterController::Util; + +export { + ## Renders a set of strings to an alphabetically sorted vector. + ## + ## ss: the string set to convert. + ## + ## Returns: the vector of all strings in ss. + global set_to_vector: function(ss: set[string]): vector of string; +} + +function set_to_vector(ss: set[string]): vector of string + { + local res: vector of string; + + for ( s in ss ) + res[|res|] = s; + + sort(res, strcmp); + + return res; + } diff --git a/scripts/policy/frameworks/control/controller.zeek b/scripts/policy/frameworks/control/controller.zeek index b68f89b345..91820b7828 100644 --- a/scripts/policy/frameworks/control/controller.zeek +++ b/scripts/policy/frameworks/control/controller.zeek @@ -41,7 +41,7 @@ event Control::net_stats_response(s: string) &priority=-10 { event terminate_event(); } - + event Control::configuration_update_response() &priority=-10 { event terminate_event(); @@ -68,7 +68,7 @@ function configurable_ids(): id_table # We don't want to update non-const globals because that's usually # where state is stored and those values will frequently be declared # with &redef so that attributes can be redefined. - # + # # NOTE: functions are currently not fully supported for serialization and hence # aren't sent. if ( t$constant && t$redefinable && t$type_name != "func" ) diff --git a/scripts/policy/frameworks/dpd/detect-protocols.zeek b/scripts/policy/frameworks/dpd/detect-protocols.zeek index 2bd69ba196..f721217147 100644 --- a/scripts/policy/frameworks/dpd/detect-protocols.zeek +++ b/scripts/policy/frameworks/dpd/detect-protocols.zeek @@ -22,7 +22,7 @@ export { type dir: enum { NONE, INCOMING, OUTGOING, BOTH }; - option valids: table[Analyzer::Tag, addr, port] of dir = { + option valids: table[AllAnalyzers::Tag, addr, port] of dir = { # A couple of ports commonly used for benign HTTP servers. # For now we want to see everything. @@ -45,7 +45,7 @@ export { # log files, this also saves memory because for these we don't # need to remember which servers we already have reported, which # for some can be a lot. - option suppress_servers: set [Analyzer::Tag] = { + option suppress_servers: set [AllAnalyzers::Tag] = { # Analyzer::ANALYZER_HTTP }; @@ -61,7 +61,7 @@ export { # Entry point for other analyzers to report that they recognized # a certain (sub-)protocol. - global found_protocol: function(c: connection, analyzer: Analyzer::Tag, + global found_protocol: function(c: connection, analyzer: AllAnalyzers::Tag, protocol: string); # Table keeping reported (server, port, analyzer) tuples (and their @@ -74,7 +74,7 @@ export { } # Table that tracks currently active dynamic analyzers per connection. -global conns: table[conn_id] of set[Analyzer::Tag]; +global conns: table[conn_id] of set[AllAnalyzers::Tag]; # Table of reports by other analyzers about the protocol used in a connection. global protocols: table[conn_id] of set[string]; @@ -84,7 +84,7 @@ type protocol : record { sub: string; # "sub-protocols" reported by other sources }; -function get_protocol(c: connection, a: Analyzer::Tag) : protocol +function get_protocol(c: connection, a: AllAnalyzers::Tag) : protocol { local str = ""; if ( c$id in protocols ) @@ -101,7 +101,7 @@ function fmt_protocol(p: protocol) : string return p$sub != "" ? fmt("%s (via %s)", p$sub, p$a) : p$a; } -function do_notice(c: connection, a: Analyzer::Tag, d: dir) +function do_notice(c: connection, a: AllAnalyzers::Tag, d: dir) { if ( d == BOTH ) return; @@ -198,7 +198,7 @@ hook finalize_protocol_detection(c: connection) report_protocols(c); } -event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) +event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) { # Don't report anything running on a well-known port. if ( c$id$resp_p in Analyzer::registered_ports(atype) ) @@ -219,7 +219,7 @@ event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) } } -function found_protocol(c: connection, atype: Analyzer::Tag, protocol: string) +function found_protocol(c: connection, atype: AllAnalyzers::Tag, protocol: string) { # Don't report anything running on a well-known port. if ( c$id$resp_p in Analyzer::registered_ports(atype) ) diff --git a/scripts/policy/frameworks/dpd/packet-segment-logging.zeek b/scripts/policy/frameworks/dpd/packet-segment-logging.zeek index 7dff2b07f8..c624d77bb0 100644 --- a/scripts/policy/frameworks/dpd/packet-segment-logging.zeek +++ b/scripts/policy/frameworks/dpd/packet-segment-logging.zeek @@ -11,7 +11,7 @@ module DPD; export { redef record Info += { ## A chunk of the payload that most likely resulted in the - ## protocol violation. + ## analyzer violation. packet_segment: string &optional &log; }; @@ -20,10 +20,10 @@ export { } -event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, +event analyzer_violation(c: connection, atype: AllAnalyzers::Tag, aid: count, reason: string) &priority=4 { if ( ! c?$dpd ) return; - + c$dpd$packet_segment=fmt("%s", sub_bytes(get_current_packet()$data, 0, packet_segment_size)); } diff --git a/scripts/policy/frameworks/files/detect-MHR.zeek b/scripts/policy/frameworks/files/detect-MHR.zeek index 0c95dadec4..52f8dd7355 100644 --- a/scripts/policy/frameworks/files/detect-MHR.zeek +++ b/scripts/policy/frameworks/files/detect-MHR.zeek @@ -66,7 +66,7 @@ function do_mhr_lookup(hash: string, fi: Notice::FileInfo) event file_hash(f: fa_file, kind: string, hash: string) { - if ( kind == "sha1" && f?$info && f$info?$mime_type && + if ( kind == "sha1" && f?$info && f$info?$mime_type && match_file_types in f$info$mime_type ) do_mhr_lookup(hash, Notice::create_file_info(f)); } diff --git a/scripts/policy/frameworks/files/entropy-test-all-files.zeek b/scripts/policy/frameworks/files/entropy-test-all-files.zeek index 9c704211f8..38c89e8c5d 100644 --- a/scripts/policy/frameworks/files/entropy-test-all-files.zeek +++ b/scripts/policy/frameworks/files/entropy-test-all-files.zeek @@ -1,10 +1,9 @@ - module Files; export { redef record Files::Info += { - ## The information density of the contents of the file, - ## expressed as a number of bits per character. + ## The information density of the contents of the file, + ## expressed as a number of bits per character. entropy: double &log &optional; }; } diff --git a/scripts/policy/frameworks/intel/seen/file-hashes.zeek b/scripts/policy/frameworks/intel/seen/file-hashes.zeek index 2e56ad3c48..e3295c5609 100644 --- a/scripts/policy/frameworks/intel/seen/file-hashes.zeek +++ b/scripts/policy/frameworks/intel/seen/file-hashes.zeek @@ -7,6 +7,6 @@ event file_hash(f: fa_file, kind: string, hash: string) $indicator_type=Intel::FILE_HASH, $f=f, $where=Files::IN_HASH); - + Intel::seen(seen); } \ No newline at end of file diff --git a/scripts/policy/frameworks/intel/whitelist.zeek b/scripts/policy/frameworks/intel/whitelist.zeek index 527d828881..e1aa150c7f 100644 --- a/scripts/policy/frameworks/intel/whitelist.zeek +++ b/scripts/policy/frameworks/intel/whitelist.zeek @@ -22,9 +22,8 @@ hook Intel::extend_match(info: Info, s: Seen, items: set[Item]) &priority=9 break; } } - + if ( whitelisted ) # Prevent logging break; } - diff --git a/scripts/policy/frameworks/software/version-changes.zeek b/scripts/policy/frameworks/software/version-changes.zeek index 865cc20447..060778584b 100644 --- a/scripts/policy/frameworks/software/version-changes.zeek +++ b/scripts/policy/frameworks/software/version-changes.zeek @@ -8,14 +8,14 @@ module Software; export { - redef enum Notice::Type += { + redef enum Notice::Type += { ## For certain software, a version changing may matter. In that ## case, this notice will be generated. Software that matters ## if the version changes can be configured with the ## :zeek:id:`Software::interesting_version_changes` variable. Software_Version_Change, }; - + ## Some software is more interesting when the version changes and this ## is a set of all software that should raise a notice when a different ## version is seen on a host. diff --git a/scripts/policy/integration/barnyard2/main.zeek b/scripts/policy/integration/barnyard2/main.zeek index 6e85db48d1..35f5a281ba 100644 --- a/scripts/policy/integration/barnyard2/main.zeek +++ b/scripts/policy/integration/barnyard2/main.zeek @@ -8,7 +8,7 @@ module Barnyard2; export { redef enum Log::ID += { LOG }; - + global log_policy: Log::PolicyHook; type Info: record { @@ -19,9 +19,9 @@ export { ## Associated alert data. alert: AlertData &log; }; - + ## This can convert a Barnyard :zeek:type:`Barnyard2::PacketID` value to - ## a :zeek:type:`conn_id` value in the case that you might need to index + ## a :zeek:type:`conn_id` value in the case that you might need to index ## into an existing data structure elsewhere within Zeek. global pid2cid: function(p: PacketID): conn_id; } @@ -40,22 +40,22 @@ function pid2cid(p: PacketID): conn_id event barnyard_alert(id: PacketID, alert: AlertData, msg: string, data: string) { Log::write(Barnyard2::LOG, [$ts=network_time(), $pid=id, $alert=alert]); - + #local proto_connection_string: string; #if ( id$src_p == 0/tcp ) # proto_connection_string = fmt("{PROTO:255} %s -> %s", id$src_ip, id$dst_ip); #else - # proto_connection_string = fmt("{%s} %s:%d -> %s:%d", + # proto_connection_string = fmt("{%s} %s:%d -> %s:%d", # to_upper(fmt("%s", get_port_transport_proto(id$dst_p))), # id$src_ip, id$src_p, id$dst_ip, id$dst_p); # - #local snort_alike_msg = fmt("%.6f [**] [%d:%d:%d] %s [**] [Classification: %s] [Priority: %d] %s", + #local snort_alike_msg = fmt("%.6f [**] [%d:%d:%d] %s [**] [Classification: %s] [Priority: %d] %s", # sad$ts, # sad$generator_id, # sad$signature_id, # sad$signature_revision, - # msg, - # sad$classification, - # sad$priority_id, + # msg, + # sad$classification, + # sad$priority_id, # proto_connection_string); } diff --git a/scripts/policy/integration/barnyard2/types.zeek b/scripts/policy/integration/barnyard2/types.zeek index da7015b302..ed8b35cf58 100644 --- a/scripts/policy/integration/barnyard2/types.zeek +++ b/scripts/policy/integration/barnyard2/types.zeek @@ -23,7 +23,7 @@ export { dst_p: port; } &log; - ## This is the event that Barnyard2 instances will send if they're + ## This is the event that Barnyard2 instances will send if they're ## configured with the bro_alert output plugin. global barnyard_alert: event(id: Barnyard2::PacketID, alert: Barnyard2::AlertData, diff --git a/scripts/policy/misc/trim-trace-file.zeek b/scripts/policy/misc/trim-trace-file.zeek index f702e9027c..4e31d5fc3e 100644 --- a/scripts/policy/misc/trim-trace-file.zeek +++ b/scripts/policy/misc/trim-trace-file.zeek @@ -6,7 +6,7 @@ module TrimTraceFile; export { ## The interval between times that the output tracefile is rotated. const trim_interval = 10 mins &redef; - + ## This event can be generated externally to this script if on-demand ## tracefile rotation is required with the caveat that the script ## doesn't currently attempt to get back on schedule automatically and @@ -19,14 +19,14 @@ event TrimTraceFile::go(first_trim: bool) { if ( zeek_is_terminating() || trace_output_file == "" ) return; - + if ( ! first_trim ) { local info = rotate_file_by_name(trace_output_file); if ( info$old_name != "" ) system(fmt("/bin/rm %s", safe_shell_quote(info$new_name))); } - + schedule trim_interval { TrimTraceFile::go(F) }; } @@ -35,4 +35,3 @@ event zeek_init() if ( trim_interval > 0 secs ) schedule trim_interval { TrimTraceFile::go(T) }; } - diff --git a/scripts/policy/protocols/conn/known-hosts.zeek b/scripts/policy/protocols/conn/known-hosts.zeek index b95c1176b2..279fa11917 100644 --- a/scripts/policy/protocols/conn/known-hosts.zeek +++ b/scripts/policy/protocols/conn/known-hosts.zeek @@ -1,5 +1,5 @@ ##! This script logs hosts that Zeek determines have performed complete TCP -##! handshakes and logs the address once per day (by default). The log that +##! handshakes and logs the address once per day (by default). The log that ##! is output provides an easy way to determine a count of the IP addresses in ##! use on a network per day. @@ -29,11 +29,11 @@ export { ## with keys uniformly distributed over proxy nodes in cluster ## operation. const use_host_store = T &redef; - + ## The hosts whose existence should be logged and tracked. ## See :zeek:type:`Host` for possible choices. option host_tracking = LOCAL_HOSTS; - + ## Holds the set of all known hosts. Keys in the store are addresses ## and their associated value will always be the "true" boolean. global host_store: Cluster::StoreInfo; @@ -49,8 +49,8 @@ export { ## :zeek:see:`Known::host_store`. option host_store_timeout = 15sec; - ## The set of all known addresses to store for preventing duplicate - ## logging of addresses. It can also be used from other scripts to + ## The set of all known addresses to store for preventing duplicate + ## logging of addresses. It can also be used from other scripts to ## inspect if an address has been seen in use. ## Maintain the list of known hosts for 24 hours so that the existence ## of each individual address is logged each day. diff --git a/scripts/policy/protocols/conn/known-services.zeek b/scripts/policy/protocols/conn/known-services.zeek index 7143c63547..313c49b940 100644 --- a/scripts/policy/protocols/conn/known-services.zeek +++ b/scripts/policy/protocols/conn/known-services.zeek @@ -84,7 +84,7 @@ export { } redef record connection += { - # This field is to indicate whether or not the processing for detecting + # This field is to indicate whether or not the processing for detecting # and logging the service for this connection is complete. known_services_done: bool &default=F; }; @@ -262,7 +262,7 @@ function known_services_done(c: connection) } if ( ! has_active_service(c) ) - # If we're here during a protocol_confirmation, it's still premature + # If we're here during a analyzer_confirmation, it's still premature # to declare there's an actual service, so wait for the connection # removal to check again (to get more timely reporting we'd have # schedule some recurring event to poll for handshake/activity). @@ -293,7 +293,7 @@ function known_services_done(c: connection) event service_info_commit(info); } -event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=-5 +event analyzer_confirmation(c: connection, atype: AllAnalyzers::Tag, aid: count) &priority=-5 { known_services_done(c); } @@ -314,4 +314,3 @@ event zeek_init() &priority=5 $path="known_services", $policy=log_policy_services]); } - diff --git a/scripts/policy/protocols/dns/detect-external-names.zeek b/scripts/policy/protocols/dns/detect-external-names.zeek index 9533f396a2..8798df6361 100644 --- a/scripts/policy/protocols/dns/detect-external-names.zeek +++ b/scripts/policy/protocols/dns/detect-external-names.zeek @@ -1,6 +1,6 @@ ##! This script detects names which are not within zones considered to be -##! local but resolving to addresses considered local. -##! The :zeek:id:`Site::local_zones` variable **must** be set appropriately for +##! local but resolving to addresses considered local. +##! The :zeek:id:`Site::local_zones` variable **must** be set appropriately for ##! this detection. @load base/frameworks/notice @@ -9,7 +9,7 @@ module DNS; export { - redef enum Notice::Type += { + redef enum Notice::Type += { ## Raised when a non-local name is found to be pointing at a ## local host. The :zeek:id:`Site::local_zones` variable ## **must** be set appropriately for this detection. @@ -21,7 +21,7 @@ event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priori { if ( |Site::local_zones| == 0 ) return; - + # Check for responses from remote hosts that point at local hosts # but the name is not considered to be within a "local" zone. if ( Site::is_local_addr(a) && # referring to a local host @@ -29,7 +29,7 @@ event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priori { NOTICE([$note=External_Name, $msg=fmt("%s is pointing to a local host - %s.", ans$query, a), - $conn=c, + $conn=c, $identifier=cat(a,ans$query)]); } } diff --git a/scripts/policy/protocols/ftp/detect.zeek b/scripts/policy/protocols/ftp/detect.zeek index e1bd627921..1b3128065a 100644 --- a/scripts/policy/protocols/ftp/detect.zeek +++ b/scripts/policy/protocols/ftp/detect.zeek @@ -7,7 +7,7 @@ module FTP; export { redef enum Notice::Type += { - ## Indicates that a successful response to a "SITE EXEC" + ## Indicates that a successful response to a "SITE EXEC" ## command/arg pair was seen. Site_Exec_Success, }; @@ -16,10 +16,10 @@ export { event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=3 { local response_xyz = parse_ftp_reply_code(code); - + # If a successful SITE EXEC command is executed, raise a notice. if ( response_xyz$x == 2 && - c$ftp$cmdarg$cmd == "SITE" && + c$ftp$cmdarg$cmd == "SITE" && /[Ee][Xx][Ee][Cc]/ in c$ftp$cmdarg$arg ) { NOTICE([$note=Site_Exec_Success, $conn=c, diff --git a/scripts/policy/protocols/http/detect-webapps.zeek b/scripts/policy/protocols/http/detect-webapps.zeek index 29adbc6580..8b405eae9f 100644 --- a/scripts/policy/protocols/http/detect-webapps.zeek +++ b/scripts/policy/protocols/http/detect-webapps.zeek @@ -26,7 +26,7 @@ export { event signature_match(state: signature_state, msg: string, data: string) &priority=5 { if ( /^webapp-/ !in state$sig_id ) return; - + local c = state$conn; local si: Software::Info; si = [$name=msg, $unparsed_version=msg, $host=c$id$resp_h, $host_p=c$id$resp_p, $software_type=WEB_APPLICATION]; diff --git a/scripts/policy/protocols/http/header-names.zeek b/scripts/policy/protocols/http/header-names.zeek index 9f4e83638c..8838b41a0a 100644 --- a/scripts/policy/protocols/http/header-names.zeek +++ b/scripts/policy/protocols/http/header-names.zeek @@ -11,15 +11,15 @@ export { ## The vector of HTTP header names sent by the client. No ## header values are included here, just the header names. client_header_names: vector of string &log &optional; - + ## The vector of HTTP header names sent by the server. No ## header values are included here, just the header names. server_header_names: vector of string &log &optional; }; - + ## A boolean value to determine if client header names are to be logged. option log_client_header_names = T; - + ## A boolean value to determine if server header names are to be logged. option log_server_header_names = F; } diff --git a/scripts/policy/protocols/http/var-extraction-uri.zeek b/scripts/policy/protocols/http/var-extraction-uri.zeek index 98eba48fed..776c659530 100644 --- a/scripts/policy/protocols/http/var-extraction-uri.zeek +++ b/scripts/policy/protocols/http/var-extraction-uri.zeek @@ -1,4 +1,4 @@ -##! Extracts and logs variables from the requested URI in the default HTTP +##! Extracts and logs variables from the requested URI in the default HTTP ##! logging stream. @load base/protocols/http diff --git a/scripts/policy/protocols/modbus/track-memmap.zeek b/scripts/policy/protocols/modbus/track-memmap.zeek index c725a27241..b92e90b891 100644 --- a/scripts/policy/protocols/modbus/track-memmap.zeek +++ b/scripts/policy/protocols/modbus/track-memmap.zeek @@ -82,10 +82,10 @@ event modbus_read_holding_registers_response(c: connection, headers: ModbusHeade if ( slave_regs[c$modbus$track_address]$value != registers[i] ) { local delta = network_time() - slave_regs[c$modbus$track_address]$last_set; - event Modbus::changed_register(c, c$modbus$track_address, + event Modbus::changed_register(c, c$modbus$track_address, slave_regs[c$modbus$track_address]$value, registers[i], delta); - + slave_regs[c$modbus$track_address]$last_set = network_time(); slave_regs[c$modbus$track_address]$value = registers[i]; } @@ -102,7 +102,7 @@ event modbus_read_holding_registers_response(c: connection, headers: ModbusHeade event Modbus::changed_register(c: connection, register: count, old_val: count, new_val: count, delta: interval) { - local rec: MemmapInfo = [$ts=network_time(), $uid=c$uid, $id=c$id, + local rec: MemmapInfo = [$ts=network_time(), $uid=c$uid, $id=c$id, $register=register, $old_val=old_val, $new_val=new_val, $delta=delta]; Log::write(REGISTER_CHANGE_LOG, rec); } diff --git a/scripts/policy/protocols/smb/log-cmds.zeek b/scripts/policy/protocols/smb/log-cmds.zeek index 569314e980..0d5e4acde3 100644 --- a/scripts/policy/protocols/smb/log-cmds.zeek +++ b/scripts/policy/protocols/smb/log-cmds.zeek @@ -39,7 +39,7 @@ event smb1_message(c: connection, hdr: SMB1::Header, is_orig: bool) &priority=-5 if ( c$smb_state$current_cmd$status in SMB::ignored_command_statuses ) return; - + if ( c$smb_state$current_cmd$command in SMB::deferred_logging_cmds ) return; diff --git a/scripts/policy/protocols/smtp/blocklists.zeek b/scripts/policy/protocols/smtp/blocklists.zeek index 4524a6dabb..16292c4390 100644 --- a/scripts/policy/protocols/smtp/blocklists.zeek +++ b/scripts/policy/protocols/smtp/blocklists.zeek @@ -6,7 +6,7 @@ module SMTP; export { - redef enum Notice::Type += { + redef enum Notice::Type += { ## An SMTP server sent a reply mentioning an SMTP block list. Blocklist_Error_Message, ## The originator's address is seen in the block list error message. @@ -21,19 +21,19 @@ export { /spamhaus\.org\// | /sophos\.com\/security\// | /spamcop\.net\/bl/ - | /cbl\.abuseat\.org\// - | /sorbs\.net\// + | /cbl\.abuseat\.org\// + | /sorbs\.net\// | /bsn\.borderware\.com\// | /mail-abuse\.com\// | /b\.barracudacentral\.com\// - | /psbl\.surriel\.com\// - | /antispam\.imp\.ch\// + | /psbl\.surriel\.com\// + | /antispam\.imp\.ch\// | /dyndns\.com\/.*spam/ | /rbl\.knology\.net\// | /intercept\.datapacket\.net\// | /uceprotect\.net\// | /hostkarma\.junkemailfilter\.com\//; - + } event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string, @@ -55,8 +55,8 @@ event smtp_reply(c: connection, is_orig: bool, code: count, cmd: string, note = Blocklist_Blocked_Host; message = fmt("%s is on an SMTP block list", c$id$orig_h); } - - NOTICE([$note=note, $conn=c, $msg=message, $sub=msg, + + NOTICE([$note=note, $conn=c, $msg=message, $sub=msg, $identifier=cat(c$id$orig_h)]); } } diff --git a/scripts/policy/protocols/smtp/detect-suspicious-orig.zeek b/scripts/policy/protocols/smtp/detect-suspicious-orig.zeek index 12a9a0c312..94edd62f27 100644 --- a/scripts/policy/protocols/smtp/detect-suspicious-orig.zeek +++ b/scripts/policy/protocols/smtp/detect-suspicious-orig.zeek @@ -24,7 +24,7 @@ event log_smtp(rec: Info) { ip = rec$x_originating_ip; loc = lookup_location(ip); - + if ( (loc?$country_code && loc$country_code in suspicious_origination_countries) || ip in suspicious_origination_networks ) diff --git a/scripts/policy/protocols/smtp/software.zeek b/scripts/policy/protocols/smtp/software.zeek index 342beedae0..06b4ca6c27 100644 --- a/scripts/policy/protocols/smtp/software.zeek +++ b/scripts/policy/protocols/smtp/software.zeek @@ -1,10 +1,10 @@ ##! This script feeds software detected through email into the software -##! framework. Mail clients and webmail interfaces are the only thing +##! framework. Mail clients and webmail interfaces are the only thing ##! currently detected. -##! +##! ##! TODO: ##! -##! * Find some heuristic to determine if email was sent through +##! * Find some heuristic to determine if email was sent through ##! a MS Exchange webmail interface as opposed to a desktop client. @load base/frameworks/software/main @@ -18,13 +18,13 @@ export { MAIL_SERVER, WEBMAIL_SERVER }; - + redef record Info += { ## Boolean indicator of if the message was sent through a ## webmail interface. is_webmail: bool &log &default=F; }; - + ## Assuming that local mail servers are more trustworthy with the ## headers they insert into message envelopes, this default makes Zeek ## not attempt to detect software in inbound message bodies. If mail @@ -34,15 +34,15 @@ export { ## incoming messages (network traffic originating from a non-local ## address), set this variable to EXTERNAL_HOSTS or ALL_HOSTS. option detect_clients_in_messages_from = LOCAL_HOSTS; - - ## A regular expression to match USER-AGENT-like headers to find if a + + ## A regular expression to match USER-AGENT-like headers to find if a ## message was sent with a webmail interface. option webmail_user_agents = - /^iPlanet Messenger/ + /^iPlanet Messenger/ | /^Sun Java\(tm\) System Messenger Express/ | /\(IMP\)/ # Horde Internet Messaging Program | /^SquirrelMail/ - | /^NeoMail/ + | /^NeoMail/ | /ZimbraWebClient/; } @@ -66,12 +66,12 @@ event log_smtp(rec: Info) { s_type = WEBMAIL_SERVER; # If the earliest received header indicates that the connection - # was via HTTP, then that likely means the actual mail software + # was via HTTP, then that likely means the actual mail software # is installed on the second address in the path. if ( rec?$first_received && /via HTTP/ in rec$first_received ) client_ip = rec$path[|rec$path|-2]; } - + if ( addr_matches_host(rec$id$orig_h, detect_clients_in_messages_from) ) { @@ -79,4 +79,3 @@ event log_smtp(rec: Info) } } } - diff --git a/scripts/policy/protocols/ssh/interesting-hostnames.zeek b/scripts/policy/protocols/ssh/interesting-hostnames.zeek index 92f7bfc1dd..db80d7c6ac 100644 --- a/scripts/policy/protocols/ssh/interesting-hostnames.zeek +++ b/scripts/policy/protocols/ssh/interesting-hostnames.zeek @@ -1,7 +1,7 @@ -##! This script will generate a notice if an apparent SSH login originates -##! or heads to a host with a reverse hostname that looks suspicious. By -##! default, the regular expression to match "interesting" hostnames includes -##! names that are typically used for infrastructure hosts like nameservers, +##! This script will generate a notice if an apparent SSH login originates +##! or heads to a host with a reverse hostname that looks suspicious. By +##! default, the regular expression to match "interesting" hostnames includes +##! names that are typically used for infrastructure hosts like nameservers, ##! mail servers, web servers and ftp servers. @load base/frameworks/notice @@ -15,7 +15,7 @@ export { ## :zeek:id:`SSH::interesting_hostnames` regular expression. Interesting_Hostname_Login, }; - + ## Strange/bad host names to see successful SSH logins from or to. option interesting_hostnames = /^d?ns[0-9]*\./ | @@ -49,4 +49,3 @@ event ssh_auth_successful(c: connection, auth_method_none: bool) check_ssh_hostname(c$id, c$uid, host); } } - diff --git a/scripts/policy/protocols/ssh/software.zeek b/scripts/policy/protocols/ssh/software.zeek index ba03bed284..4c44636914 100644 --- a/scripts/policy/protocols/ssh/software.zeek +++ b/scripts/policy/protocols/ssh/software.zeek @@ -1,4 +1,4 @@ -##! Extracts SSH client and server information from SSH +##! Extracts SSH client and server information from SSH ##! connections and forwards it to the software framework. @load base/frameworks/software diff --git a/scripts/policy/protocols/ssl/expiring-certs.zeek b/scripts/policy/protocols/ssl/expiring-certs.zeek index 066ff0d690..a217c03db4 100644 --- a/scripts/policy/protocols/ssl/expiring-certs.zeek +++ b/scripts/policy/protocols/ssl/expiring-certs.zeek @@ -1,4 +1,4 @@ -##! Generate notices when X.509 certificates over SSL/TLS are expired or +##! Generate notices when X.509 certificates over SSL/TLS are expired or ##! going to expire soon based on the date and time values stored within the ##! certificate. diff --git a/scripts/policy/protocols/ssl/known-certs.zeek b/scripts/policy/protocols/ssl/known-certs.zeek index 03b583cc9d..35fbcf0f7b 100644 --- a/scripts/policy/protocols/ssl/known-certs.zeek +++ b/scripts/policy/protocols/ssl/known-certs.zeek @@ -12,13 +12,13 @@ export { redef enum Log::ID += { CERTS_LOG }; global log_policy_certs: Log::PolicyHook; - + type CertsInfo: record { ## The timestamp when the certificate was detected. ts: time &log; ## The address that offered the certificate. host: addr &log; - ## If the certificate was handed out by a server, this is the + ## If the certificate was handed out by a server, this is the ## port that the server was listening on. port_num: port &log &optional; ## Certificate subject. @@ -28,7 +28,7 @@ export { ## Serial number for the certificate. serial: string &log &optional; }; - + ## The certificates whose existence should be logged and tracked. ## Choices are: LOCAL_HOSTS, REMOTE_HOSTS, ALL_HOSTS, NO_HOSTS. option cert_tracking = LOCAL_HOSTS; @@ -38,7 +38,7 @@ export { ## with keys uniformly distributed over proxy nodes in cluster ## operation. const use_cert_store = T &redef; - + type AddrCertHashPair: record { host: addr; hash: string; @@ -60,15 +60,15 @@ export { ## :zeek:see:`Known::cert_store`. option cert_store_timeout = 15sec; - ## The set of all known certificates to store for preventing duplicate - ## logging. It can also be used from other scripts to - ## inspect if a certificate has been seen in use. The string value + ## The set of all known certificates to store for preventing duplicate + ## logging. It can also be used from other scripts to + ## inspect if a certificate has been seen in use. The string value ## in the set is for storing the DER formatted certificate' SHA1 hash. ## ## In cluster operation, this set is uniformly distributed across ## proxy nodes. global certs: set[addr, string] &create_expire=1day &redef; - + ## Event that can be handled to access the loggable record as it is sent ## on to the logging framework. global log_known_certs: event(rec: CertsInfo); diff --git a/scripts/policy/tuning/__load__.zeek b/scripts/policy/tuning/__load__.zeek index 03449882f8..db9fe9a572 100644 --- a/scripts/policy/tuning/__load__.zeek +++ b/scripts/policy/tuning/__load__.zeek @@ -1,2 +1,2 @@ -##! This loads the default tuning +##! This loads the default tuning @load ./defaults \ No newline at end of file diff --git a/scripts/policy/tuning/defaults/packet-fragments.zeek b/scripts/policy/tuning/defaults/packet-fragments.zeek index f95c826547..7ae0e4363c 100644 --- a/scripts/policy/tuning/defaults/packet-fragments.zeek +++ b/scripts/policy/tuning/defaults/packet-fragments.zeek @@ -1,7 +1,7 @@ # Capture TCP fragments, but not UDP (or ICMP), since those are a lot more # common due to high-volume, fragmenting protocols such as NFS :-(. -# This normally isn't used because of the default open packet filter +# This normally isn't used because of the default open packet filter # but we set it anyway in case the user is using a packet filter. # Note: This was removed because the default model now is to have a wide # open packet filter. diff --git a/scripts/policy/tuning/defaults/warnings.zeek b/scripts/policy/tuning/defaults/warnings.zeek index 6c31e82d4e..0220fc78de 100644 --- a/scripts/policy/tuning/defaults/warnings.zeek +++ b/scripts/policy/tuning/defaults/warnings.zeek @@ -1,5 +1,5 @@ ##! This file is meant to print messages on stdout for settings that would be -##! good to set in most cases or other things that could be done to achieve +##! good to set in most cases or other things that could be done to achieve ##! better detection. @load base/utils/site diff --git a/scripts/test-all-policy.zeek b/scripts/test-all-policy.zeek index fd58255d1d..10a7637422 100644 --- a/scripts/test-all-policy.zeek +++ b/scripts/test-all-policy.zeek @@ -24,6 +24,7 @@ # @load frameworks/cluster/controller/main.zeek @load frameworks/cluster/controller/request.zeek @load frameworks/cluster/controller/types.zeek +@load frameworks/cluster/controller/util.zeek @load frameworks/dpd/detect-protocols.zeek @load frameworks/dpd/packet-segment-logging.zeek @load frameworks/intel/do_notice.zeek diff --git a/src/3rdparty b/src/3rdparty index d31b51e6a0..cb626c94f6 160000 --- a/src/3rdparty +++ b/src/3rdparty @@ -1 +1 @@ -Subproject commit d31b51e6a06ad4c71db81981920eb753954abbf8 +Subproject commit cb626c94f67e0ac0437beba076da1184eb1f8ad7 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c0f520fbeb..d8a82b92bb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -280,7 +280,7 @@ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ZAM-AssignFlavorsDefs.h WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) -set_source_files_properties(nb_dns.c PROPERTIES COMPILE_FLAGS +set_source_files_properties(3rdparty/nb_dns.c PROPERTIES COMPILE_FLAGS -fno-strict-aliasing) set(MAIN_SRCS @@ -297,7 +297,6 @@ set(MAIN_SRCS CCL.cc CompHash.cc Conn.cc - ConvertUTF.c DFA.cc DbgBreakpoint.cc DbgHelp.cc @@ -367,13 +366,6 @@ set(MAIN_SRCS ZeekArgs.cc ZeekString.cc ZVal.cc - bsd-getopt-long.c - bro_inet_ntop.c - in_cksum.cc - patricia.c - setsignal.c - strsep.c - modp_numtoa.c supervisor/Supervisor.cc @@ -387,7 +379,6 @@ set(MAIN_SRCS plugin/Component.cc plugin/ComponentManager.h - plugin/TaggedComponent.h plugin/Manager.cc plugin/Plugin.cc @@ -399,9 +390,10 @@ set(MAIN_SRCS script_opt/CPP/Exprs.cc script_opt/CPP/Func.cc script_opt/CPP/GenFunc.cc - script_opt/CPP/HashMgr.cc script_opt/CPP/Inits.cc - script_opt/CPP/RuntimeInit.cc + script_opt/CPP/InitsInfo.cc + script_opt/CPP/RuntimeInits.cc + script_opt/CPP/RuntimeInitSupport.cc script_opt/CPP/RuntimeOps.cc script_opt/CPP/RuntimeVec.cc script_opt/CPP/Stmts.cc @@ -437,12 +429,20 @@ set(MAIN_SRCS script_opt/ZAM/ZInst.cc script_opt/ZAM/ZOp.cc - nb_dns.c digest.h ) set(THIRD_PARTY_SRCS + 3rdparty/bro_inet_ntop.c + 3rdparty/bsd-getopt-long.c + 3rdparty/ConvertUTF.c + 3rdparty/in_cksum.cc + 3rdparty/modp_numtoa.c + 3rdparty/nb_dns.c + 3rdparty/patricia.c + 3rdparty/setsignal.c 3rdparty/sqlite3.c + 3rdparty/strsep.c ) set(GEN_ZAM_SRCS @@ -620,7 +620,15 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ ) install(FILES + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/ConvertUTF.h + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/bro_inet_ntop.h + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/bsd-getopt-long.h + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/modp_numtoa.h + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/nb_dns.h + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/patricia.h + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/setsignal.h ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/sqlite3.h + ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/doctest.h DESTINATION include/zeek/3rdparty ) diff --git a/src/CompHash.cc b/src/CompHash.cc index 89b5201238..56b626fa7f 100644 --- a/src/CompHash.cc +++ b/src/CompHash.cc @@ -501,6 +501,11 @@ bool CompositeHash::SingleValHash(HashKey& hk, const Val* v, Type* bt, bool type return true; } + // All of the rest of the code here depends on v not being null, since it needs + // to get values from it. + if ( ! v ) + return false; + switch ( t ) { case TYPE_INTERNAL_INT: @@ -695,7 +700,7 @@ bool CompositeHash::SingleValHash(HashKey& hk, const Val* v, Type* bt, bool type } break; - case TYPE_INTERNAL_ERROR: + default: return false; } diff --git a/src/Conn.cc b/src/Conn.cc index 46d057a268..36d45dd3b2 100644 --- a/src/Conn.cc +++ b/src/Conn.cc @@ -286,7 +286,7 @@ analyzer::Analyzer* Connection::FindAnalyzer(analyzer::ID id) return adapter ? adapter->FindChild(id) : nullptr; } -analyzer::Analyzer* Connection::FindAnalyzer(const analyzer::Tag& tag) +analyzer::Analyzer* Connection::FindAnalyzer(const zeek::Tag& tag) { return adapter ? adapter->FindChild(tag) : nullptr; } diff --git a/src/Conn.h b/src/Conn.h index 4ce9c51a4c..960df18129 100644 --- a/src/Conn.h +++ b/src/Conn.h @@ -11,12 +11,12 @@ #include "zeek/IPAddr.h" #include "zeek/IntrusivePtr.h" #include "zeek/Rule.h" +#include "zeek/Tag.h" #include "zeek/Timer.h" #include "zeek/UID.h" #include "zeek/WeirdState.h" #include "zeek/ZeekArgs.h" #include "zeek/analyzer/Analyzer.h" -#include "zeek/analyzer/Tag.h" #include "zeek/iosource/Packet.h" #include "zeek/session/Session.h" @@ -136,7 +136,7 @@ public: void FlipRoles(); analyzer::Analyzer* FindAnalyzer(analyzer::ID id); - analyzer::Analyzer* FindAnalyzer(const analyzer::Tag& tag); // find first in tree. + analyzer::Analyzer* FindAnalyzer(const zeek::Tag& tag); // find first in tree. analyzer::Analyzer* FindAnalyzer(const char* name); // find first in tree. TransportProto ConnTransport() const { return proto; } diff --git a/src/ConvertUTF.c b/src/ConvertUTF.c deleted file mode 100644 index b8acb69d27..0000000000 --- a/src/ConvertUTF.c +++ /dev/null @@ -1,755 +0,0 @@ -/*===--- ConvertUTF.c - Universal Character Names conversions ---------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License: - * - * University of Illinois/NCSA - * Open Source License - * - * Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign. - * All rights reserved. - * - * Developed by: - * - * LLVM Team - * - * University of Illinois at Urbana-Champaign - * - * http://llvm.org - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal with the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimers. - * - * * Redistributions in binary form must reproduce the - * above copyright notice, this list of conditions and - * the following disclaimers in the documentation and/or - * other materials provided with the distribution. - * - * * Neither the names of the LLVM Team, University of - * Illinois at Urbana-Champaign, nor the names of its - * contributors may be used to endorse or promote - * products derived from this Software without specific - * prior written permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR - * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS WITH THE SOFTWARE. - * - *===------------------------------------------------------------------------=*/ -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* --------------------------------------------------------------------- - - Conversions between UTF32, UTF-16, and UTF-8. Source code file. - Author: Mark E. Davis, 1994. - Rev History: Rick McGowan, fixes & updates May 2001. - Sept 2001: fixed const & error conditions per - mods suggested by S. Parent & A. Lillich. - June 2002: Tim Dodd added detection and handling of incomplete - source sequences, enhanced error detection, added casts - to eliminate compiler warnings. - July 2003: slight mods to back out aggressive FFFE detection. - Jan 2004: updated switches in from-UTF8 conversions. - Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. - - See the header file "ConvertUTF.h" for complete documentation. - ------------------------------------------------------------------------- */ - - -#include "zeek/ConvertUTF.h" -#ifdef CVTUTF_DEBUG -#include -#endif -#include - -static const int halfShift = 10; /* used for shifting by 10 bits */ - -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF -#define false 0 -#define true 1 - -/* --------------------------------------------------------------------- */ - -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; - -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; - -/* - * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed - * into the first byte, depending on how many bytes follow. There are - * as many entries in this table as there are UTF-8 sequence types. - * (I.e., one byte sequence, two byte... etc.). Remember that sequencs - * for *legal* UTF-8 will be 4 or fewer bytes total. - */ -static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - -/* --------------------------------------------------------------------- */ - -/* The interface converts a whole buffer to avoid function-call overhead. - * Constants have been gathered. Loops & conditionals have been removed as - * much as possible for efficiency, in favor of drop-through switches. - * (See "Note A" at the bottom of the file for equivalent code.) - * If your compiler supports it, the "isLegalUTF8" call can be turned - * into an inline function. - */ - - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF32toUTF16 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF32* source = *sourceStart; - UTF16* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch; - if (target >= targetEnd) { - result = targetExhausted; break; - } - ch = *source++; - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - if (flags == strictConversion) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - *target++ = (UTF16)ch; /* normal case */ - } - } else if (ch > UNI_MAX_LEGAL_UTF32) { - if (flags == strictConversion) { - result = sourceIllegal; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - if (target + 1 >= targetEnd) { - --source; /* Back up source pointer! */ - result = targetExhausted; break; - } - ch -= halfBase; - *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); - *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); - } - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF16toUTF32 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF16* source = *sourceStart; - UTF32* target = *targetStart; - UTF32 ch, ch2; - while (source < sourceEnd) { - const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ - ch = *source++; - /* If we have a surrogate pair, convert to UTF32 first. */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - /* If the 16 bits following the high surrogate are in the source buffer... */ - if (source < sourceEnd) { - ch2 = *source; - /* If it's a low surrogate, convert to UTF32. */ - if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) - + (ch2 - UNI_SUR_LOW_START) + halfBase; - ++source; - } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } else { /* We don't have the 16 bits following the high surrogate. */ - --source; /* return to the high surrogate */ - result = sourceExhausted; - break; - } - } else if (flags == strictConversion) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } - if (target >= targetEnd) { - source = oldSource; /* Back up source pointer! */ - result = targetExhausted; break; - } - *target++ = ch; - } - *sourceStart = source; - *targetStart = target; -#ifdef CVTUTF_DEBUG -if (result == sourceIllegal) { - fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); - fflush(stderr); -} -#endif - return result; -} -ConversionResult ConvertUTF16toUTF8 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF16* source = *sourceStart; - UTF8* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch; - unsigned short bytesToWrite = 0; - const UTF32 byteMask = 0xBF; - const UTF32 byteMark = 0x80; - const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ - ch = *source++; - /* If we have a surrogate pair, convert to UTF32 first. */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { - /* If the 16 bits following the high surrogate are in the source buffer... */ - if (source < sourceEnd) { - UTF32 ch2 = *source; - /* If it's a low surrogate, convert to UTF32. */ - if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { - ch = ((ch - UNI_SUR_HIGH_START) << halfShift) - + (ch2 - UNI_SUR_LOW_START) + halfBase; - ++source; - } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } else { /* We don't have the 16 bits following the high surrogate. */ - --source; /* return to the high surrogate */ - result = sourceExhausted; - break; - } - } else if (flags == strictConversion) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } - /* Figure out how many bytes the result will require */ - if (ch < (UTF32)0x80) { bytesToWrite = 1; - } else if (ch < (UTF32)0x800) { bytesToWrite = 2; - } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; - } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; - } else { bytesToWrite = 3; - ch = UNI_REPLACEMENT_CHAR; - } - - target += bytesToWrite; - if (target > targetEnd) { - source = oldSource; /* Back up source pointer! */ - target -= bytesToWrite; result = targetExhausted; break; - } - switch (bytesToWrite) { /* note: everything falls through. */ - case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); - } - target += bytesToWrite; - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF32toUTF8 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF32* source = *sourceStart; - UTF8* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch; - unsigned short bytesToWrite = 0; - const UTF32 byteMask = 0xBF; - const UTF32 byteMark = 0x80; - ch = *source++; - if (flags == strictConversion ) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - --source; /* return to the illegal value itself */ - result = sourceIllegal; - break; - } - } - /* - * Figure out how many bytes the result will require. Turn any - * illegally large UTF32 things (> Plane 17) into replacement chars. - */ - if (ch < (UTF32)0x80) { bytesToWrite = 1; - } else if (ch < (UTF32)0x800) { bytesToWrite = 2; - } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; - } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; - } else { bytesToWrite = 3; - ch = UNI_REPLACEMENT_CHAR; - result = sourceIllegal; - } - - target += bytesToWrite; - if (target > targetEnd) { - --source; /* Back up source pointer! */ - target -= bytesToWrite; result = targetExhausted; break; - } - switch (bytesToWrite) { /* note: everything falls through. */ - case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; - case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); - } - target += bytesToWrite; - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -/* - * Utility routine to tell whether a sequence of bytes is legal UTF-8. - * This must be called with the length pre-determined by the first byte. - * If not calling this from ConvertUTF8to*, then the length can be set by: - * length = trailingBytesForUTF8[*source]+1; - * and the sequence is illegal right away if there aren't that many bytes - * available. - * If presented with a length > 4, this returns false. The Unicode - * definition of UTF-8 goes up to 4-byte sequences. - */ - -static Boolean isLegalUTF8(const UTF8 *source, int length) { - UTF8 a; - const UTF8 *srcptr = source+length; - switch (length) { - default: return false; - /* Everything else falls through when "true"... */ - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; - - switch (*source) { - /* no fall-through in this inner switch */ - case 0xE0: if (a < 0xA0) return false; break; - case 0xED: if (a > 0x9F) return false; break; - case 0xF0: if (a < 0x90) return false; break; - case 0xF4: if (a > 0x8F) return false; break; - default: if (a < 0x80) return false; - } - - case 1: if (*source >= 0x80 && *source < 0xC2) return false; - } - if (*source > 0xF4) return false; - return true; -} - -/* --------------------------------------------------------------------- */ - -/* - * Exported function to return whether a UTF-8 sequence is legal or not. - * This is not used here; it's just exported. - */ -Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { - int length = trailingBytesForUTF8[*source]+1; - if (length > sourceEnd - source) { - return false; - } - return isLegalUTF8(source, length); -} - -/* --------------------------------------------------------------------- */ - -static unsigned -findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source, - const UTF8 *sourceEnd) { - UTF8 b1, b2, b3; - - assert(!isLegalUTF8Sequence(source, sourceEnd)); - - /* - * Unicode 6.3.0, D93b: - * - * Maximal subpart of an ill-formed subsequence: The longest code unit - * subsequence starting at an unconvertible offset that is either: - * a. the initial subsequence of a well-formed code unit sequence, or - * b. a subsequence of length one. - */ - - if (source == sourceEnd) - return 0; - - /* - * Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8 - * Byte Sequences. - */ - - b1 = *source; - ++source; - if (b1 >= 0xC2 && b1 <= 0xDF) { - /* - * First byte is valid, but we know that this code unit sequence is - * invalid, so the maximal subpart has to end after the first byte. - */ - return 1; - } - - if (source == sourceEnd) - return 1; - - b2 = *source; - ++source; - - if (b1 == 0xE0) { - return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1; - } - if (b1 >= 0xE1 && b1 <= 0xEC) { - return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; - } - if (b1 == 0xED) { - return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1; - } - if (b1 >= 0xEE && b1 <= 0xEF) { - return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; - } - if (b1 == 0xF0) { - if (b2 >= 0x90 && b2 <= 0xBF) { - if (source == sourceEnd) - return 2; - - b3 = *source; - return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; - } - return 1; - } - if (b1 >= 0xF1 && b1 <= 0xF3) { - if (b2 >= 0x80 && b2 <= 0xBF) { - if (source == sourceEnd) - return 2; - - b3 = *source; - return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; - } - return 1; - } - if (b1 == 0xF4) { - if (b2 >= 0x80 && b2 <= 0x8F) { - if (source == sourceEnd) - return 2; - - b3 = *source; - return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; - } - return 1; - } - - assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5); - /* - * There are no valid sequences that start with these bytes. Maximal subpart - * is defined to have length 1 in these cases. - */ - return 1; -} - -/* --------------------------------------------------------------------- */ - -/* - * Exported function to return the total number of bytes in a codepoint - * represented in UTF-8, given the value of the first byte. - */ -unsigned getNumBytesForUTF8(UTF8 first) { - return trailingBytesForUTF8[first] + 1; -} - -/* --------------------------------------------------------------------- */ - -/* - * Exported function to return whether a UTF-8 string is legal or not. - * This is not used here; it's just exported. - */ -Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) { - while (*source != sourceEnd) { - int length = trailingBytesForUTF8[**source] + 1; - if (length > sourceEnd - *source || !isLegalUTF8(*source, length)) - return false; - *source += length; - } - return true; -} - -/* --------------------------------------------------------------------- */ - -ConversionResult ConvertUTF8toUTF16 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { - ConversionResult result = conversionOK; - const UTF8* source = *sourceStart; - UTF16* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (extraBytesToRead >= sourceEnd - source) { - result = sourceExhausted; break; - } - /* Do this check whether lenient or strict */ - if (!isLegalUTF8(source, extraBytesToRead+1)) { - result = sourceIllegal; - break; - } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (target >= targetEnd) { - source -= (extraBytesToRead+1); /* Back up source pointer! */ - result = targetExhausted; break; - } - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - if (flags == strictConversion) { - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - result = sourceIllegal; - break; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - *target++ = (UTF16)ch; /* normal case */ - } - } else if (ch > UNI_MAX_UTF16) { - if (flags == strictConversion) { - result = sourceIllegal; - source -= (extraBytesToRead+1); /* return to the start */ - break; /* Bail out; shouldn't continue */ - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - if (target + 1 >= targetEnd) { - source -= (extraBytesToRead+1); /* Back up source pointer! */ - result = targetExhausted; break; - } - ch -= halfBase; - *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); - *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); - } - } - *sourceStart = source; - *targetStart = target; - return result; -} - -/* --------------------------------------------------------------------- */ - -static ConversionResult ConvertUTF8toUTF32Impl( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags, - Boolean InputIsPartial) { - ConversionResult result = conversionOK; - const UTF8* source = *sourceStart; - UTF32* target = *targetStart; - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (extraBytesToRead >= sourceEnd - source) { - if (flags == strictConversion || InputIsPartial) { - result = sourceExhausted; - break; - } else { - result = sourceIllegal; - - /* - * Replace the maximal subpart of ill-formed sequence with - * replacement character. - */ - source += findMaximalSubpartOfIllFormedUTF8Sequence(source, - sourceEnd); - *target++ = UNI_REPLACEMENT_CHAR; - continue; - } - } - if (target >= targetEnd) { - result = targetExhausted; break; - } - - /* Do this check whether lenient or strict */ - if (!isLegalUTF8(source, extraBytesToRead+1)) { - result = sourceIllegal; - if (flags == strictConversion) { - /* Abort conversion. */ - break; - } else { - /* - * Replace the maximal subpart of ill-formed sequence with - * replacement character. - */ - source += findMaximalSubpartOfIllFormedUTF8Sequence(source, - sourceEnd); - *target++ = UNI_REPLACEMENT_CHAR; - continue; - } - } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; - case 4: ch += *source++; ch <<= 6; - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (ch <= UNI_MAX_LEGAL_UTF32) { - /* - * UTF-16 surrogate values are illegal in UTF-32, and anything - * over Plane 17 (> 0x10FFFF) is illegal. - */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - if (flags == strictConversion) { - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - result = sourceIllegal; - break; - } else { - *target++ = UNI_REPLACEMENT_CHAR; - } - } else { - *target++ = ch; - } - } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ - result = sourceIllegal; - *target++ = UNI_REPLACEMENT_CHAR; - } - } - *sourceStart = source; - *targetStart = target; - return result; -} - -ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, - const UTF8 *sourceEnd, - UTF32 **targetStart, - UTF32 *targetEnd, - ConversionFlags flags) { - return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd, - flags, /*InputIsPartial=*/true); -} - -ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, - const UTF8 *sourceEnd, UTF32 **targetStart, - UTF32 *targetEnd, ConversionFlags flags) { - return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd, - flags, /*InputIsPartial=*/false); -} - -/* --------------------------------------------------------------------- - - Note A. - The fall-through switches in UTF-8 reading code save a - temp variable, some decrements & conditionals. The switches - are equivalent to the following loop: - { - int tmpBytesToRead = extraBytesToRead+1; - do { - ch += *source++; - --tmpBytesToRead; - if (tmpBytesToRead) ch <<= 6; - } while (tmpBytesToRead > 0); - } - In UTF-8 writing code, the switches on "bytesToWrite" are - similarly unrolled loops. - - --------------------------------------------------------------------- */ diff --git a/src/ConvertUTF.h b/src/ConvertUTF.h deleted file mode 100644 index fe7939914e..0000000000 --- a/src/ConvertUTF.h +++ /dev/null @@ -1,233 +0,0 @@ -/*===--- ConvertUTF.h - Universal Character Names conversions ---------------=== - * - * The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License: - * - * University of Illinois/NCSA - * Open Source License - * - * Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign. - * All rights reserved. - * - * Developed by: - * - * LLVM Team - * - * University of Illinois at Urbana-Champaign - * - * http://llvm.org - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal with the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * * Redistributions of source code must retain the above - * copyright notice, this list of conditions and the - * following disclaimers. - * - * * Redistributions in binary form must reproduce the - * above copyright notice, this list of conditions and - * the following disclaimers in the documentation and/or - * other materials provided with the distribution. - * - * * Neither the names of the LLVM Team, University of - * Illinois at Urbana-Champaign, nor the names of its - * contributors may be used to endorse or promote - * products derived from this Software without specific - * prior written permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR - * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS WITH THE SOFTWARE. - * - *==------------------------------------------------------------------------==*/ -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* --------------------------------------------------------------------- - - Conversions between UTF32, UTF-16, and UTF-8. Header file. - - Several funtions are included here, forming a complete set of - conversions between the three formats. UTF-7 is not included - here, but is handled in a separate source file. - - Each of these routines takes pointers to input buffers and output - buffers. The input buffers are const. - - Each routine converts the text between *sourceStart and sourceEnd, - putting the result into the buffer between *targetStart and - targetEnd. Note: the end pointers are *after* the last item: e.g. - *(sourceEnd - 1) is the last item. - - !!! NOTE: The source and end pointers must be aligned properly !!! - - The return result indicates whether the conversion was successful, - and if not, whether the problem was in the source or target buffers. - (Only the first encountered problem is indicated.) - - After the conversion, *sourceStart and *targetStart are both - updated to point to the end of last text successfully converted in - the respective buffers. - - Input parameters: - sourceStart - pointer to a pointer to the source buffer. - The contents of this are modified on return so that - it points at the next thing to be converted. - targetStart - similarly, pointer to pointer to the target buffer. - sourceEnd, targetEnd - respectively pointers to the ends of the - two buffers, for overflow checking only. - - These conversion functions take a ConversionFlags argument. When this - flag is set to strict, both irregular sequences and isolated surrogates - will cause an error. When the flag is set to lenient, both irregular - sequences and isolated surrogates are converted. - - Whether the flag is strict or lenient, all illegal sequences will cause - an error return. This includes sequences such as: , , - or in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code - must check for illegal sequences. - - When the flag is set to lenient, characters over 0x10FFFF are converted - to the replacement character; otherwise (when the flag is set to strict) - they constitute an error. - - Output parameters: - The value "sourceIllegal" is returned from some routines if the input - sequence is malformed. When "sourceIllegal" is returned, the source - value will point to the illegal value that caused the problem. E.g., - in UTF-8 when a sequence is malformed, it points to the start of the - malformed sequence. - - Author: Mark E. Davis, 1994. - Rev History: Rick McGowan, fixes & updates May 2001. - Fixes & updates, Sept 2001. - ------------------------------------------------------------------------- */ - -#pragma once - -/* --------------------------------------------------------------------- - The following 4 definitions are compiler-specific. - The C standard does not guarantee that wchar_t has at least - 16 bits, so wchar_t is no less portable than unsigned short! - All should be unsigned values to avoid sign extension during - bit mask & shift operations. ------------------------------------------------------------------------- */ - -typedef unsigned int UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ -typedef unsigned char Boolean; /* 0 or 1 */ - -/* Some fundamental constants */ -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4 - -#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF -#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE - -typedef enum { - conversionOK, /* conversion successful */ - sourceExhausted, /* partial character in source, but hit end */ - targetExhausted, /* insuff. room in target for conversion */ - sourceIllegal /* source sequence is illegal/malformed */ -} ConversionResult; - -typedef enum { - strictConversion = 0, - lenientConversion -} ConversionFlags; - -/* This is for C++ and does no harm in C */ -#ifdef __cplusplus -extern "C" { -#endif - -ConversionResult ConvertUTF8toUTF16 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); - -/** - * Convert a partial UTF8 sequence to UTF32. If the sequence ends in an - * incomplete code unit sequence, returns \c sourceExhausted. - */ -ConversionResult ConvertUTF8toUTF32Partial( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); - -/** - * Convert a partial UTF8 sequence to UTF32. If the sequence ends in an - * incomplete code unit sequence, returns \c sourceIllegal. - */ -ConversionResult ConvertUTF8toUTF32( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); - -/* NOTE: The source and end pointers must be aligned properly. */ -ConversionResult ConvertUTF16toUTF8 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); - -/* NOTE: The source and end pointers must be aligned properly. */ -ConversionResult ConvertUTF32toUTF8 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); - -/* NOTE: The source and end pointers must be aligned properly. */ -ConversionResult ConvertUTF16toUTF32 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); - -/* NOTE: The source and end pointers must be aligned properly. */ -ConversionResult ConvertUTF32toUTF16 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); - -Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); - -Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd); - -unsigned getNumBytesForUTF8(UTF8 firstByte); - -#ifdef __cplusplus -} -#endif - -/* --------------------------------------------------------------------- */ diff --git a/src/DNS_Mgr.cc b/src/DNS_Mgr.cc index 827f17ecfe..8d776b7351 100644 --- a/src/DNS_Mgr.cc +++ b/src/DNS_Mgr.cc @@ -46,7 +46,7 @@ extern "C" #include -#include "zeek/nb_dns.h" +#include "zeek/3rdparty/nb_dns.h" } using namespace std; diff --git a/src/Debug.cc b/src/Debug.cc index eb08b54207..469d016f2d 100644 --- a/src/Debug.cc +++ b/src/Debug.cc @@ -34,7 +34,7 @@ extern "C" { -#include "zeek/setsignal.h" +#include "zeek/3rdparty/setsignal.h" } using namespace std; diff --git a/src/DebugCmdInfoConstants.cc b/src/DebugCmdInfoConstants.cc deleted file mode 100644 index f1043613d8..0000000000 --- a/src/DebugCmdInfoConstants.cc +++ /dev/null @@ -1,286 +0,0 @@ - -// -// This file was automatically generated from DebugCmdInfoConstants.in -// DO NOT EDIT. -// - -#include "zeek/util.h" -namespace zeek::detail { - -void init_global_dbg_constants () { - - { - DebugCmdInfo* info; - const char * const names[] = { }; - - info = new DebugCmdInfo(dcInvalid, names, 0, false, "This function should not be called", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "help" - }; - - info = new DebugCmdInfo(dcHelp, names, 1, false, "Get help with debugger commands", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "quit" - }; - - info = new DebugCmdInfo(dcQuit, names, 1, false, "Exit Zeek", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "next" - }; - - info = new DebugCmdInfo(dcNext, names, 1, true, "Step to the following statement, skipping function calls", - true); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "step", - "s" - }; - - info = new DebugCmdInfo(dcStep, names, 2, true, "Step to following statements, stepping in to function calls", - true); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "continue", - "c" - }; - - info = new DebugCmdInfo(dcContinue, names, 2, true, "Resume execution of the policy script", - true); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "finish" - }; - - info = new DebugCmdInfo(dcFinish, names, 1, true, "Run until the currently-executing function completes", - true); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "break", - "b" - }; - - info = new DebugCmdInfo(dcBreak, names, 2, false, "Set a breakpoint", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "cond" - }; - - info = new DebugCmdInfo(dcBreakCondition, names, 1, false, "", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "delete", - "d" - }; - - info = new DebugCmdInfo(dcDeleteBreak, names, 2, false, "Delete the specified breakpoints; delete all if no arguments", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "clear" - }; - - info = new DebugCmdInfo(dcClearBreak, names, 1, false, "", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "disable", - "dis" - }; - - info = new DebugCmdInfo(dcDisableBreak, names, 2, false, "", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "enable" - }; - - info = new DebugCmdInfo(dcEnableBreak, names, 1, false, "", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "ignore" - }; - - info = new DebugCmdInfo(dcIgnoreBreak, names, 1, false, "", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "print", - "p", - "set" - }; - - info = new DebugCmdInfo(dcPrint, names, 3, false, "Evaluate an expression and print the result (also aliased as 'set')", - true); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "backtrace", - "bt", - "where" - }; - - info = new DebugCmdInfo(dcBacktrace, names, 3, false, "Print a stack trace (with +- N argument, inner/outer N frames only)", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "frame" - }; - - info = new DebugCmdInfo(dcFrame, names, 1, false, "Select frame number N", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "up" - }; - - info = new DebugCmdInfo(dcUp, names, 1, false, "Select the stack frame one level up", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "down" - }; - - info = new DebugCmdInfo(dcDown, names, 1, false, "Select the stack frame one level down", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "info" - }; - - info = new DebugCmdInfo(dcInfo, names, 1, false, "Get information about the debugging environment", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "list", - "l" - }; - - info = new DebugCmdInfo(dcList, names, 2, false, "Print source lines surrounding specified context", - true); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "display" - }; - - info = new DebugCmdInfo(dcDisplay, names, 1, false, "", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "undisplay" - }; - - info = new DebugCmdInfo(dcUndisplay, names, 1, false, "", - false); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "trace" - }; - - info = new DebugCmdInfo(dcTrace, names, 1, false, "Turn on or off execution tracing (with no arguments, prints current state.)", - false); - g_DebugCmdInfos.push_back(info); - } - -} - -} // namespace zeek::detail diff --git a/src/DebugCmdInfoConstants.h b/src/DebugCmdInfoConstants.h deleted file mode 100644 index 44330482d3..0000000000 --- a/src/DebugCmdInfoConstants.h +++ /dev/null @@ -1,205 +0,0 @@ -void InitGlobalDbgConstants () { - { - - { - DebugCmdInfo* info; - const char * const names[] = { - "help" - }; - - info = new DebugCmdInfo (dcHelp, names, 1, false, "Get help with debugger commands"); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "quit" - }; - - info = new DebugCmdInfo (dcQuit, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "next" - }; - - info = new DebugCmdInfo (dcNext, names, 1, true, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "step" - }; - - info = new DebugCmdInfo (dcStep, names, 1, true, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "continue" - }; - - info = new DebugCmdInfo (dcContinue, names, 1, true, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "finish" - }; - - info = new DebugCmdInfo (dcFinish, names, 1, true, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "break" - }; - - info = new DebugCmdInfo (dcBreak, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "cond" - }; - - info = new DebugCmdInfo (dcBreakCondition, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "delete" - }; - - info = new DebugCmdInfo (dcDeleteBreak, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "clear" - }; - - info = new DebugCmdInfo (dcClearBreak, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "disable" - }; - - info = new DebugCmdInfo (dcDisableBreak, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "enable" - }; - - info = new DebugCmdInfo (dcEnableBreak, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "ignore" - }; - - info = new DebugCmdInfo (dcIgnoreBreak, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "print" - }; - - info = new DebugCmdInfo (dcPrint, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "backtrace", - "bt" - }; - - info = new DebugCmdInfo (dcBacktrace, names, 2, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "frame" - }; - - info = new DebugCmdInfo (dcFrame, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "info" - }; - - info = new DebugCmdInfo (dcInfo, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "list" - }; - - info = new DebugCmdInfo (dcList, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "display" - }; - - info = new DebugCmdInfo (dcDisplay, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - - { - DebugCmdInfo* info; - const char * const names[] = { - "undisplay" - }; - - info = new DebugCmdInfo (dcUndisplay, names, 1, false, ""); - g_DebugCmdInfos.push_back(info); - } - -} diff --git a/src/Desc.cc b/src/Desc.cc index 7eac71491d..1712cea05c 100644 --- a/src/Desc.cc +++ b/src/Desc.cc @@ -9,7 +9,7 @@ #include #include -#include "zeek/ConvertUTF.h" +#include "zeek/3rdparty/ConvertUTF.h" #include "zeek/File.h" #include "zeek/IPAddr.h" #include "zeek/Reporter.h" diff --git a/src/Discard.cc b/src/Discard.cc index 6f3d8b94ca..4ccf71f1df 100644 --- a/src/Discard.cc +++ b/src/Discard.cc @@ -34,7 +34,7 @@ bool Discarder::IsActive() return check_ip || check_tcp || check_udp || check_icmp; } -bool Discarder::NextPacket(const std::unique_ptr& ip, int len, int caplen) +bool Discarder::NextPacket(const std::shared_ptr& ip, int len, int caplen) { bool discard_packet = false; diff --git a/src/Discard.h b/src/Discard.h index 79222bd6f8..9f082bdccc 100644 --- a/src/Discard.h +++ b/src/Discard.h @@ -26,7 +26,7 @@ public: bool IsActive(); - bool NextPacket(const std::unique_ptr& ip, int len, int caplen); + bool NextPacket(const std::shared_ptr& ip, int len, int caplen); protected: Val* BuildData(const u_char* data, int hdrlen, int len, int caplen); diff --git a/src/Expr.cc b/src/Expr.cc index 0c9c866b6c..9271cf7e22 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -2626,7 +2626,7 @@ TypePtr AssignExpr::InitType() const { if ( op1->Tag() != EXPR_LIST ) { - Error("bad initializer"); + Error("bad initializer, first operand should be a list"); return nullptr; } @@ -3405,12 +3405,21 @@ ValPtr RecordConstructorExpr::Eval(Frame* f) const if ( ! map && exprs.length() != rt->NumFields() ) RuntimeErrorWithCallStack("inconsistency evaluating record constructor"); - auto rv = make_intrusive(std::move(rt)); + auto rv = make_intrusive(rt); for ( int i = 0; i < exprs.length(); ++i ) { + auto v_i = exprs[i]->Eval(f); int ind = map ? (*map)[i] : i; - rv->Assign(ind, exprs[i]->Eval(f)); + + if ( v_i && v_i->GetType()->Tag() == TYPE_VECTOR && + v_i->GetType()->IsUnspecifiedVector() ) + { + const auto& t_ind = rt->GetFieldType(ind); + v_i->AsVectorVal()->Concretize(t_ind->Yield()); + } + + rv->Assign(ind, v_i); } return rv; @@ -3481,7 +3490,10 @@ TableConstructorExpr::TableConstructorExpr(ListExprPtr constructor_list, SetType(init_type(op.get())); if ( ! type ) + { SetError(); + return; + } else if ( type->Tag() != TYPE_TABLE || type->AsTableType()->IsSet() ) SetError("values in table(...) constructor do not specify a table"); @@ -4105,6 +4117,13 @@ RecordValPtr coerce_to_record(RecordTypePtr rt, Val* v, const std::vector& cast_intrusive(field_type)) ) rhs = std::move(new_val); } + else if ( rhs_type->Tag() == TYPE_VECTOR && field_type->Tag() == TYPE_VECTOR && + rhs_type->AsVectorType()->IsUnspecifiedVector() ) + { + auto rhs_v = rhs->AsVectorVal(); + if ( ! rhs_v->Concretize(field_type->Yield()) ) + reporter->InternalError("could not concretize empty vector"); + } else if ( BothArithmetic(rhs_type->Tag(), field_type->Tag()) && ! same_type(rhs_type, field_type) ) { @@ -4328,8 +4347,7 @@ InExpr::InExpr(ExprPtr arg_op1, ExprPtr arg_op2) return; } - if ( op2->GetType()->Tag() == TYPE_TABLE && - op2->GetType()->AsTableType()->IsSubNetIndex() ) + if ( op2->GetType()->Tag() == TYPE_TABLE && op2->GetType()->AsTableType()->IsSubNetIndex() ) { SetType(base_type(TYPE_BOOL)); return; @@ -5050,7 +5068,7 @@ ValPtr ListExpr::InitVal(const zeek::Type* t, ValPtr aggr) const ValPtr ListExpr::AddSetInit(const zeek::Type* t, ValPtr aggr) const { if ( aggr->GetType()->Tag() != TYPE_TABLE ) - Internal("bad aggregate in ListExpr::InitVal"); + Internal("bad aggregate in ListExpr::AddSetInit"); TableVal* tv = aggr->AsTableVal(); const TableType* tt = tv->GetType()->AsTableType(); diff --git a/src/Frag.cc b/src/Frag.cc index 06a940008e..17159fee25 100644 --- a/src/Frag.cc +++ b/src/Frag.cc @@ -31,7 +31,7 @@ void FragTimer::Dispatch(double t, bool /* is_expire */) reporter->InternalWarning("fragment timer dispatched w/o reassembler"); } -FragReassembler::FragReassembler(session::Manager* arg_s, const std::unique_ptr& ip, +FragReassembler::FragReassembler(session::Manager* arg_s, const std::shared_ptr& ip, const u_char* pkt, const FragReassemblerKey& k, double t) : Reassembler(0, REASSEM_FRAG) { @@ -74,7 +74,7 @@ FragReassembler::~FragReassembler() delete[] proto_hdr; } -void FragReassembler::AddFragment(double t, const std::unique_ptr& ip, const u_char* pkt) +void FragReassembler::AddFragment(double t, const std::shared_ptr& ip, const u_char* pkt) { const struct ip* ip4 = ip->IP4_Hdr(); @@ -294,7 +294,7 @@ void FragReassembler::BlockInserted(DataBlockMap::const_iterator /* it */) { struct ip* reassem4 = (struct ip*)pkt_start; reassem4->ip_len = htons(frag_size + proto_hdr_len); - reassembled_pkt = std::make_unique(reassem4, true, true); + reassembled_pkt = std::make_shared(reassem4, true, true); DeleteTimer(); } @@ -303,7 +303,7 @@ void FragReassembler::BlockInserted(DataBlockMap::const_iterator /* it */) struct ip6_hdr* reassem6 = (struct ip6_hdr*)pkt_start; reassem6->ip6_plen = htons(frag_size + proto_hdr_len - 40); const IPv6_Hdr_Chain* chain = new IPv6_Hdr_Chain(reassem6, next_proto, n); - reassembled_pkt = std::make_unique(reassem6, true, n, chain, true); + reassembled_pkt = std::make_shared(reassem6, true, n, chain, true); DeleteTimer(); } @@ -338,7 +338,7 @@ FragmentManager::~FragmentManager() Clear(); } -FragReassembler* FragmentManager::NextFragment(double t, const std::unique_ptr& ip, +FragReassembler* FragmentManager::NextFragment(double t, const std::shared_ptr& ip, const u_char* pkt) { uint32_t frag_id = ip->ID(); diff --git a/src/Frag.h b/src/Frag.h index c76989d418..eff680011c 100644 --- a/src/Frag.h +++ b/src/Frag.h @@ -31,17 +31,17 @@ using FragReassemblerKey = std::tuple; class FragReassembler : public Reassembler { public: - FragReassembler(session::Manager* s, const std::unique_ptr& ip, const u_char* pkt, + FragReassembler(session::Manager* s, const std::shared_ptr& ip, const u_char* pkt, const FragReassemblerKey& k, double t); ~FragReassembler() override; - void AddFragment(double t, const std::unique_ptr& ip, const u_char* pkt); + void AddFragment(double t, const std::shared_ptr& ip, const u_char* pkt); void Expire(double t); void DeleteTimer(); void ClearTimer() { expire_timer = nullptr; } - std::unique_ptr ReassembledPkt() { return std::move(reassembled_pkt); } + std::shared_ptr ReassembledPkt() { return std::move(reassembled_pkt); } const FragReassemblerKey& Key() const { return key; } protected: @@ -50,7 +50,7 @@ protected: void Weird(const char* name) const; u_char* proto_hdr; - std::unique_ptr reassembled_pkt; + std::shared_ptr reassembled_pkt; session::Manager* s; uint64_t frag_size; // size of fully reassembled fragment FragReassemblerKey key; @@ -81,7 +81,7 @@ public: FragmentManager() = default; ~FragmentManager(); - FragReassembler* NextFragment(double t, const std::unique_ptr& ip, const u_char* pkt); + FragReassembler* NextFragment(double t, const std::shared_ptr& ip, const u_char* pkt); void Clear(); void Remove(detail::FragReassembler* f); diff --git a/src/IP.cc b/src/IP.cc index 04ac1b6676..330f2b5a85 100644 --- a/src/IP.cc +++ b/src/IP.cc @@ -384,7 +384,13 @@ RecordValPtr IP_Hdr::ToPktHdrVal(RecordValPtr pkt_hdr, int sindex) const auto tcp_hdr = make_intrusive(tcp_hdr_type); int tcp_hdr_len = tp->th_off * 4; - int data_len = PayloadLen() - tcp_hdr_len; + + // account for cases in which the payload length in the TCP header is not set, + // or is set to an impossible value. In these cases, return 0. + int data_len = 0; + auto payload_len = PayloadLen(); + if ( payload_len >= tcp_hdr_len ) + data_len = payload_len - tcp_hdr_len; tcp_hdr->Assign(0, val_mgr->Port(ntohs(tp->th_sport), TRANSPORT_TCP)); tcp_hdr->Assign(1, val_mgr->Port(ntohs(tp->th_dport), TRANSPORT_TCP)); diff --git a/src/IP.h b/src/IP.h index 3c2fbef813..0488290b00 100644 --- a/src/IP.h +++ b/src/IP.h @@ -411,11 +411,18 @@ public: /** * Returns the length of the IP packet's payload (length of packet minus * header length or, for IPv6, also minus length of all extension headers). + * + * Also returns 0 if the IPv4 length field is set to zero - which is, e.g., + * the case when TCP segment offloading is enabled. */ uint16_t PayloadLen() const { if ( ip4 ) - return ntohs(ip4->ip_len) - ip4->ip_hl * 4; + { + // prevent overflow in case of segment offloading/zeroed header length. + auto total_len = ntohs(ip4->ip_len); + return total_len ? total_len - ip4->ip_hl * 4 : 0; + } return ntohs(ip6->ip6_plen) + 40 - ip6_hdrs->TotalLength(); } diff --git a/src/IPAddr.cc b/src/IPAddr.cc index 7c88b82ed9..5cb3f4a998 100644 --- a/src/IPAddr.cc +++ b/src/IPAddr.cc @@ -6,12 +6,12 @@ #include #include +#include "zeek/3rdparty/bro_inet_ntop.h" #include "zeek/Conn.h" #include "zeek/Hash.h" #include "zeek/Reporter.h" #include "zeek/ZeekString.h" #include "zeek/analyzer/Manager.h" -#include "zeek/bro_inet_ntop.h" namespace zeek { @@ -24,8 +24,93 @@ namespace detail ConnKey::ConnKey(const IPAddr& src, const IPAddr& dst, uint16_t src_port, uint16_t dst_port, TransportProto t, bool one_way) - : transport(t) { + Init(src, dst, src_port, dst_port, t, one_way); + } + +ConnKey::ConnKey(const ConnTuple& id) + { + Init(id.src_addr, id.dst_addr, id.src_port, id.dst_port, id.proto, id.is_one_way); + } + +ConnKey& ConnKey::operator=(const ConnKey& rhs) + { + if ( this == &rhs ) + return *this; + + // Because of padding in the object, this needs to memset to clear out + // the extra memory used by padding. Otherwise, the session key stuff + // doesn't work quite right. + memset(this, 0, sizeof(ConnKey)); + + memcpy(&ip1, &rhs.ip1, sizeof(in6_addr)); + memcpy(&ip2, &rhs.ip2, sizeof(in6_addr)); + port1 = rhs.port1; + port2 = rhs.port2; + transport = rhs.transport; + valid = rhs.valid; + + return *this; + } + +ConnKey::ConnKey(Val* v) + { + const auto& vt = v->GetType(); + if ( ! IsRecord(vt->Tag()) ) + { + valid = false; + return; + } + + RecordType* vr = vt->AsRecordType(); + auto vl = v->As(); + + int orig_h, orig_p; // indices into record's value list + int resp_h, resp_p; + + if ( vr == id::conn_id ) + { + orig_h = 0; + orig_p = 1; + resp_h = 2; + resp_p = 3; + } + else + { + // While it's not a conn_id, it may have equivalent fields. + orig_h = vr->FieldOffset("orig_h"); + resp_h = vr->FieldOffset("resp_h"); + orig_p = vr->FieldOffset("orig_p"); + resp_p = vr->FieldOffset("resp_p"); + + if ( orig_h < 0 || resp_h < 0 || orig_p < 0 || resp_p < 0 ) + { + valid = false; + return; + } + + // ### we ought to check that the fields have the right + // types, too. + } + + const IPAddr& orig_addr = vl->GetFieldAs(orig_h); + const IPAddr& resp_addr = vl->GetFieldAs(resp_h); + + auto orig_portv = vl->GetFieldAs(orig_p); + auto resp_portv = vl->GetFieldAs(resp_p); + + Init(orig_addr, resp_addr, htons((unsigned short)orig_portv->Port()), + htons((unsigned short)resp_portv->Port()), orig_portv->PortType(), false); + } + +void ConnKey::Init(const IPAddr& src, const IPAddr& dst, uint16_t src_port, uint16_t dst_port, + TransportProto t, bool one_way) + { + // Because of padding in the object, this needs to memset to clear out + // the extra memory used by padding. Otherwise, the session key stuff + // doesn't work quite right. + memset(this, 0, sizeof(ConnKey)); + // Lookup up connection based on canonical ordering, which is // the smaller of and // followed by the other. @@ -43,25 +128,9 @@ ConnKey::ConnKey(const IPAddr& src, const IPAddr& dst, uint16_t src_port, uint16 port1 = dst_port; port2 = src_port; } - } -ConnKey::ConnKey(const ConnTuple& id) - : ConnKey(id.src_addr, id.dst_addr, id.src_port, id.dst_port, id.proto, id.is_one_way) - { - } - -ConnKey& ConnKey::operator=(const ConnKey& rhs) - { - if ( this == &rhs ) - return *this; - - memcpy(&ip1, &rhs.ip1, sizeof(in6_addr)); - memcpy(&ip2, &rhs.ip2, sizeof(in6_addr)); - port1 = rhs.port1; - port2 = rhs.port2; - transport = rhs.transport; - - return *this; + transport = t; + valid = true; } } // namespace detail diff --git a/src/IPAddr.h b/src/IPAddr.h index 6295e7af1d..ea4ed9ac08 100644 --- a/src/IPAddr.h +++ b/src/IPAddr.h @@ -17,24 +17,28 @@ namespace zeek class String; struct ConnTuple; +class Val; namespace detail { class HashKey; -struct ConnKey +class ConnKey { +public: in6_addr ip1; in6_addr ip2; - uint16_t port1; - uint16_t port2; - TransportProto transport; + uint16_t port1 = 0; + uint16_t port2 = 0; + TransportProto transport = TRANSPORT_UNKNOWN; + bool valid = true; ConnKey(const IPAddr& src, const IPAddr& dst, uint16_t src_port, uint16_t dst_port, TransportProto t, bool one_way); ConnKey(const ConnTuple& conn); ConnKey(const ConnKey& rhs) { *this = rhs; } + ConnKey(Val* v); bool operator<(const ConnKey& rhs) const { return memcmp(this, &rhs, sizeof(ConnKey)) < 0; } bool operator<=(const ConnKey& rhs) const { return memcmp(this, &rhs, sizeof(ConnKey)) <= 0; } @@ -44,6 +48,10 @@ struct ConnKey bool operator>(const ConnKey& rhs) const { return memcmp(this, &rhs, sizeof(ConnKey)) > 0; } ConnKey& operator=(const ConnKey& rhs); + +private: + void Init(const IPAddr& src, const IPAddr& dst, uint16_t src_port, uint16_t dst_port, + TransportProto t, bool one_way); }; using ConnIDKey [[deprecated("Remove in v5.1. Use zeek::detail::ConnKey.")]] = ConnKey; @@ -430,7 +438,7 @@ public: static const IPAddr v6_unspecified; private: - friend struct detail::ConnKey; + friend class detail::ConnKey; friend class IPPrefix; /** diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 66a5aa1d31..e18d6e7e28 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -1,5 +1,12 @@ // See the file "COPYING" in the main distribution directory for copyright. +// We use deprecated APIs for MD5, SHA1 and SHA256. The reason is that, as of OpenSSL 3.0, there is +// no API anymore that lets you store the internal state of hashing functions. For more information, +// see https://github.com/zeek/zeek/issues/1379 and https://github.com/openssl/openssl/issues/14222 +// Since I don't feel like getting warnings every time we compile this file - let's silence them. + +#define OPENSSL_SUPPRESS_DEPRECATED + #include "zeek/OpaqueVal.h" #include @@ -210,11 +217,7 @@ HashVal::HashVal(OpaqueTypePtr t) : OpaqueVal(std::move(t)) MD5Val::MD5Val() : HashVal(md5_type) { } -MD5Val::~MD5Val() - { - if ( IsValid() ) - EVP_MD_CTX_free(ctx); - } +MD5Val::~MD5Val() { } void HashVal::digest_one(EVP_MD_CTX* h, const Val* v) { @@ -245,7 +248,7 @@ ValPtr MD5Val::DoClone(CloneState* state) if ( ! out->Init() ) return nullptr; - EVP_MD_CTX_copy_ex(out->ctx, ctx); + out->ctx = ctx; } return state->NewClone(this, std::move(out)); @@ -254,7 +257,7 @@ ValPtr MD5Val::DoClone(CloneState* state) bool MD5Val::DoInit() { assert(! IsValid()); - ctx = detail::hash_init(detail::Hash_MD5); + MD5_Init(&ctx); return true; } @@ -263,7 +266,7 @@ bool MD5Val::DoFeed(const void* data, size_t size) if ( ! IsValid() ) return false; - detail::hash_update(ctx, data, size); + MD5_Update(&ctx, data, size); return true; } @@ -273,7 +276,7 @@ StringValPtr MD5Val::DoGet() return val_mgr->EmptyString(); u_char digest[MD5_DIGEST_LENGTH]; - detail::hash_final(ctx, digest); + MD5_Final(digest, &ctx); return make_intrusive(detail::md5_digest_print(digest)); } @@ -284,20 +287,9 @@ broker::expected MD5Val::DoSerialize() const if ( ! IsValid() ) return {broker::vector{false}}; - MD5_CTX* md = (MD5_CTX*)EVP_MD_CTX_md_data(ctx); - - broker::vector d = {true, - static_cast(md->A), - static_cast(md->B), - static_cast(md->C), - static_cast(md->D), - static_cast(md->Nl), - static_cast(md->Nh), - static_cast(md->num)}; - - for ( int i = 0; i < MD5_LBLOCK; ++i ) - d.emplace_back(static_cast(md->data[i])); + auto data = std::string(reinterpret_cast(&ctx), sizeof(ctx)); + broker::vector d = {true, data}; return {std::move(d)}; } @@ -317,40 +309,24 @@ bool MD5Val::DoUnserialize(const broker::data& data) return true; } + if ( (*d).size() != 2 ) + return false; + + auto s = caf::get_if(&(*d)[1]); + if ( ! s ) + return false; + + if ( sizeof(ctx) != s->size() ) + return false; + Init(); - MD5_CTX* md = (MD5_CTX*)EVP_MD_CTX_md_data(ctx); - - if ( ! get_vector_idx(*d, 1, &md->A) ) - return false; - if ( ! get_vector_idx(*d, 2, &md->B) ) - return false; - if ( ! get_vector_idx(*d, 3, &md->C) ) - return false; - if ( ! get_vector_idx(*d, 4, &md->D) ) - return false; - if ( ! get_vector_idx(*d, 5, &md->Nl) ) - return false; - if ( ! get_vector_idx(*d, 6, &md->Nh) ) - return false; - if ( ! get_vector_idx(*d, 7, &md->num) ) - return false; - - for ( int i = 0; i < MD5_LBLOCK; ++i ) - { - if ( ! get_vector_idx(*d, 8 + i, &md->data[i]) ) - return false; - } - + memcpy(&ctx, s->data(), s->size()); return true; } SHA1Val::SHA1Val() : HashVal(sha1_type) { } -SHA1Val::~SHA1Val() - { - if ( IsValid() ) - EVP_MD_CTX_free(ctx); - } +SHA1Val::~SHA1Val() { } ValPtr SHA1Val::DoClone(CloneState* state) { @@ -361,7 +337,7 @@ ValPtr SHA1Val::DoClone(CloneState* state) if ( ! out->Init() ) return nullptr; - EVP_MD_CTX_copy_ex(out->ctx, ctx); + out->ctx = ctx; } return state->NewClone(this, std::move(out)); @@ -370,7 +346,7 @@ ValPtr SHA1Val::DoClone(CloneState* state) bool SHA1Val::DoInit() { assert(! IsValid()); - ctx = detail::hash_init(detail::Hash_SHA1); + SHA1_Init(&ctx); return true; } @@ -379,7 +355,7 @@ bool SHA1Val::DoFeed(const void* data, size_t size) if ( ! IsValid() ) return false; - detail::hash_update(ctx, data, size); + SHA1_Update(&ctx, data, size); return true; } @@ -389,7 +365,7 @@ StringValPtr SHA1Val::DoGet() return val_mgr->EmptyString(); u_char digest[SHA_DIGEST_LENGTH]; - detail::hash_final(ctx, digest); + SHA1_Final(digest, &ctx); return make_intrusive(detail::sha1_digest_print(digest)); } @@ -400,20 +376,9 @@ broker::expected SHA1Val::DoSerialize() const if ( ! IsValid() ) return {broker::vector{false}}; - SHA_CTX* md = (SHA_CTX*)EVP_MD_CTX_md_data(ctx); + auto data = std::string(reinterpret_cast(&ctx), sizeof(ctx)); - broker::vector d = {true, - static_cast(md->h0), - static_cast(md->h1), - static_cast(md->h2), - static_cast(md->h3), - static_cast(md->h4), - static_cast(md->Nl), - static_cast(md->Nh), - static_cast(md->num)}; - - for ( int i = 0; i < SHA_LBLOCK; ++i ) - d.emplace_back(static_cast(md->data[i])); + broker::vector d = {true, data}; return {std::move(d)}; } @@ -434,42 +399,24 @@ bool SHA1Val::DoUnserialize(const broker::data& data) return true; } + if ( (*d).size() != 2 ) + return false; + + auto s = caf::get_if(&(*d)[1]); + if ( ! s ) + return false; + + if ( sizeof(ctx) != s->size() ) + return false; + Init(); - SHA_CTX* md = (SHA_CTX*)EVP_MD_CTX_md_data(ctx); - - if ( ! get_vector_idx(*d, 1, &md->h0) ) - return false; - if ( ! get_vector_idx(*d, 2, &md->h1) ) - return false; - if ( ! get_vector_idx(*d, 3, &md->h2) ) - return false; - if ( ! get_vector_idx(*d, 4, &md->h3) ) - return false; - if ( ! get_vector_idx(*d, 5, &md->h4) ) - return false; - if ( ! get_vector_idx(*d, 6, &md->Nl) ) - return false; - if ( ! get_vector_idx(*d, 7, &md->Nh) ) - return false; - if ( ! get_vector_idx(*d, 8, &md->num) ) - return false; - - for ( int i = 0; i < SHA_LBLOCK; ++i ) - { - if ( ! get_vector_idx(*d, 9 + i, &md->data[i]) ) - return false; - } - + memcpy(&ctx, s->data(), s->size()); return true; } SHA256Val::SHA256Val() : HashVal(sha256_type) { } -SHA256Val::~SHA256Val() - { - if ( IsValid() ) - EVP_MD_CTX_free(ctx); - } +SHA256Val::~SHA256Val() { } ValPtr SHA256Val::DoClone(CloneState* state) { @@ -480,7 +427,7 @@ ValPtr SHA256Val::DoClone(CloneState* state) if ( ! out->Init() ) return nullptr; - EVP_MD_CTX_copy_ex(out->ctx, ctx); + out->ctx = ctx; } return state->NewClone(this, std::move(out)); @@ -489,7 +436,7 @@ ValPtr SHA256Val::DoClone(CloneState* state) bool SHA256Val::DoInit() { assert(! IsValid()); - ctx = detail::hash_init(detail::Hash_SHA256); + SHA256_Init(&ctx); return true; } @@ -498,7 +445,7 @@ bool SHA256Val::DoFeed(const void* data, size_t size) if ( ! IsValid() ) return false; - detail::hash_update(ctx, data, size); + SHA256_Update(&ctx, data, size); return true; } @@ -508,7 +455,7 @@ StringValPtr SHA256Val::DoGet() return val_mgr->EmptyString(); u_char digest[SHA256_DIGEST_LENGTH]; - detail::hash_final(ctx, digest); + SHA256_Final(digest, &ctx); return make_intrusive(detail::sha256_digest_print(digest)); } @@ -519,16 +466,9 @@ broker::expected SHA256Val::DoSerialize() const if ( ! IsValid() ) return {broker::vector{false}}; - SHA256_CTX* md = (SHA256_CTX*)EVP_MD_CTX_md_data(ctx); + auto data = std::string(reinterpret_cast(&ctx), sizeof(ctx)); - broker::vector d = {true, static_cast(md->Nl), static_cast(md->Nh), - static_cast(md->num), static_cast(md->md_len)}; - - for ( int i = 0; i < 8; ++i ) - d.emplace_back(static_cast(md->h[i])); - - for ( int i = 0; i < SHA_LBLOCK; ++i ) - d.emplace_back(static_cast(md->data[i])); + broker::vector d = {true, data}; return {std::move(d)}; } @@ -549,30 +489,18 @@ bool SHA256Val::DoUnserialize(const broker::data& data) return true; } + if ( (*d).size() != 2 ) + return false; + + auto s = caf::get_if(&(*d)[1]); + if ( ! s ) + return false; + + if ( sizeof(ctx) != s->size() ) + return false; + Init(); - SHA256_CTX* md = (SHA256_CTX*)EVP_MD_CTX_md_data(ctx); - - if ( ! get_vector_idx(*d, 1, &md->Nl) ) - return false; - if ( ! get_vector_idx(*d, 2, &md->Nh) ) - return false; - if ( ! get_vector_idx(*d, 3, &md->num) ) - return false; - if ( ! get_vector_idx(*d, 4, &md->md_len) ) - return false; - - for ( int i = 0; i < 8; ++i ) - { - if ( ! get_vector_idx(*d, 5 + i, &md->h[i]) ) - return false; - } - - for ( int i = 0; i < SHA_LBLOCK; ++i ) - { - if ( ! get_vector_idx(*d, 13 + i, &md->data[i]) ) - return false; - } - + memcpy(&ctx, s->data(), s->size()); return true; } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index e790a31533..c02dce0691 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include // for u_char @@ -245,7 +246,7 @@ protected: DECLARE_OPAQUE_VALUE(MD5Val) private: - EVP_MD_CTX* ctx; + MD5_CTX ctx; }; class SHA1Val : public HashVal @@ -270,7 +271,7 @@ protected: DECLARE_OPAQUE_VALUE(SHA1Val) private: - EVP_MD_CTX* ctx; + SHA_CTX ctx; }; class SHA256Val : public HashVal @@ -295,7 +296,7 @@ protected: DECLARE_OPAQUE_VALUE(SHA256Val) private: - EVP_MD_CTX* ctx; + SHA256_CTX ctx; }; class EntropyVal : public OpaqueVal diff --git a/src/Options.cc b/src/Options.cc index 9bc235cbcc..9ddeed5b0c 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -17,7 +17,7 @@ #include #include -#include "zeek/bsd-getopt-long.h" +#include "zeek/3rdparty/bsd-getopt-long.h" #include "zeek/logging/writers/ascii/Ascii.h" namespace zeek @@ -85,89 +85,92 @@ void usage(const char* prog, int code) fprintf(stderr, "usage: %s [options] [file ...]\n", prog); fprintf(stderr, "usage: %s --test [doctest-options] -- [options] [file ...]\n", prog); - fprintf(stderr, " | Zeek script file, or read stdin\n"); + fprintf(stderr, " | Zeek script file, or read stdin\n"); fprintf(stderr, - " -a|--parse-only | exit immediately after parsing scripts\n"); + " -a|--parse-only | exit immediately after parsing scripts\n"); fprintf(stderr, - " -b|--bare-mode | don't load scripts from the base/ directory\n"); - fprintf(stderr, " -d|--debug-script | activate Zeek script debugging\n"); - fprintf(stderr, " -e|--exec | augment loaded scripts by given code\n"); - fprintf(stderr, " -f|--filter | tcpdump filter\n"); - fprintf(stderr, " -h|--help | command line help\n"); + " -b|--bare-mode | don't load scripts from the base/ directory\n"); fprintf(stderr, - " -i|--iface | read from given interface (only one allowed)\n"); + " -c|--capture-unprocessed | write unprocessed packets to a tcpdump file\n"); + fprintf(stderr, " -d|--debug-script | activate Zeek script debugging\n"); + fprintf(stderr, " -e|--exec | augment loaded scripts by given code\n"); + fprintf(stderr, " -f|--filter | tcpdump filter\n"); + fprintf(stderr, " -h|--help | command line help\n"); + fprintf(stderr, + " -i|--iface | read from given interface (only one allowed)\n"); fprintf( stderr, - " -p|--prefix | add given prefix to Zeek script file resolution\n"); - fprintf(stderr, " -r|--readfile | read from given tcpdump file (only one " + " -p|--prefix | add given prefix to Zeek script file resolution\n"); + fprintf(stderr, " -r|--readfile | read from given tcpdump file (only one " "allowed, pass '-' as the filename to read from stdin)\n"); - fprintf(stderr, " -s|--rulefile | read rules from given file\n"); - fprintf(stderr, " -t|--tracefile | activate execution tracing\n"); - fprintf(stderr, " -u|--usage-issues | find variable usage issues and exit; use " - "-uu for deeper/more expensive analysis\n"); - fprintf(stderr, " -v|--version | print version and exit\n"); - fprintf(stderr, " -w|--writefile | write to given tcpdump file\n"); + fprintf(stderr, " -s|--rulefile | read rules from given file\n"); + fprintf(stderr, " -t|--tracefile | activate execution tracing\n"); + fprintf(stderr, " -u|--usage-issues | find variable usage issues and exit\n"); + fprintf(stderr, " -v|--version | print version and exit\n"); + fprintf(stderr, " -w|--writefile | write to given tcpdump file\n"); #ifdef DEBUG - fprintf(stderr, " -B|--debug | Enable debugging output for selected " + fprintf(stderr, " -B|--debug | Enable debugging output for selected " "streams ('-B help' for help)\n"); #endif - fprintf(stderr, " -C|--no-checksums | ignore checksums\n"); - fprintf(stderr, " -D|--deterministic | initialize random seeds to zero\n"); - fprintf(stderr, " -F|--force-dns | force DNS\n"); - fprintf(stderr, " -G|--load-seeds | load seeds from given file\n"); - fprintf(stderr, " -H|--save-seeds | save seeds to given file\n"); - fprintf(stderr, " -I|--print-id | print out given ID\n"); - fprintf(stderr, " -N|--print-plugins | print available plugins and exit (-NN " + fprintf(stderr, " -C|--no-checksums | ignore checksums\n"); + fprintf(stderr, " -D|--deterministic | initialize random seeds to zero\n"); + fprintf(stderr, " -F|--force-dns | force DNS\n"); + fprintf(stderr, " -G|--load-seeds | load seeds from given file\n"); + fprintf(stderr, " -H|--save-seeds | save seeds to given file\n"); + fprintf(stderr, " -I|--print-id | print out given ID\n"); + fprintf(stderr, " -N|--print-plugins | print available plugins and exit (-NN " "for verbose)\n"); - fprintf(stderr, " -O|--optimize[=