diff --git a/.cirrus.yml b/.cirrus.yml index e91b0091b7..bfc053f8db 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -8,11 +8,13 @@ cpus: &CPUS 4 btest_jobs: &BTEST_JOBS 4 btest_retries: &BTEST_RETRIES 2 -memory: &MEMORY 4GB +memory: &MEMORY 8GB config: &CONFIG --build-type=release --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install static_config: &STATIC_CONFIG --build-type=release --disable-broker-tests --enable-static-broker --enable-static-binpac --prefix=$CIRRUS_WORKING_DIR/install -sanitizer_config: &SANITIZER_CONFIG --build-type=debug --disable-broker-tests --sanitizers=address,undefined --enable-fuzzers --enable-coverage +asan_sanitizer_config: &ASAN_SANITIZER_CONFIG --build-type=debug --disable-broker-tests --sanitizers=address --enable-fuzzers --enable-coverage +ubsan_sanitizer_config: &UBSAN_SANITIZER_CONFIG --build-type=debug --disable-broker-tests --sanitizers=undefined --enable-fuzzers +tsan_sanitizer_config: &TSAN_SANITIZER_CONFIG --build-type=debug --disable-broker-tests --sanitizers=thread --enable-fuzzers mobile_ipv6_config: &MOBILE_IPV6_CONFIG --build-type=release --enable-mobile-ipv6 --disable-broker-tests --prefix=$CIRRUS_WORKING_DIR/install openssl30_config: &OPENSSL30_CONFIG --build-type=release --disable-broker-tests --with-openssl=/opt/openssl --prefix=$CIRRUS_WORKING_DIR/install @@ -108,9 +110,16 @@ fedora34_task: << : *RESOURCES_TEMPLATE << : *CI_TEMPLATE +centosstream9_task: + container: + # Stream 9 EOL: Around Dec 2027 + dockerfile: ci/centos-stream-9/Dockerfile + << : *RESOURCES_TEMPLATE + << : *CI_TEMPLATE + centosstream8_task: container: - # Stream 8 support should be 5 years, so until 2024. but I cannot find a concrete timeline --cpk + # Stream 8 EOL: May 31, 2024 dockerfile: ci/centos-stream-8/Dockerfile << : *RESOURCES_TEMPLATE << : *CI_TEMPLATE @@ -160,13 +169,6 @@ debian9_32bit_task: << : *RESOURCES_TEMPLATE << : *CI_TEMPLATE -opensuse_leap_15_2_task: - container: - # Opensuse Leap 15.2 EOL: Dec 2021 - dockerfile: ci/opensuse-leap-15.2/Dockerfile - << : *RESOURCES_TEMPLATE - << : *CI_TEMPLATE - opensuse_leap_15_3_task: container: # Opensuse Leap 15.3 EOL: TBD @@ -174,6 +176,13 @@ opensuse_leap_15_3_task: << : *RESOURCES_TEMPLATE << : *CI_TEMPLATE +ubuntu21_task: + container: + # Ubuntu 21.10 EOL: July 2022 + dockerfile: ci/ubuntu-21.10/Dockerfile + << : *RESOURCES_TEMPLATE + << : *CI_TEMPLATE + ubuntu20_task: container: # Ubuntu 20.04 EOL: April 2025 @@ -261,7 +270,7 @@ openssl30_task: env: ZEEK_CI_CONFIGURE_FLAGS: *OPENSSL30_CONFIG -sanitizer_task: +asan_sanitizer_task: container: # Just uses a recent/common distro to run memory error/leak checks. dockerfile: ci/ubuntu-20.04/Dockerfile @@ -270,10 +279,38 @@ sanitizer_task: memory: 12GB << : *CI_TEMPLATE test_fuzzers_script: ./ci/test-fuzzers.sh - coverage_script: ./ci/upload-coverage.sh env: CXXFLAGS: -DZEEK_DICT_DEBUG - ZEEK_CI_CONFIGURE_FLAGS: *SANITIZER_CONFIG - ZEEK_TAILORED_UB_CHECKS: 1 + ZEEK_CI_CONFIGURE_FLAGS: *ASAN_SANITIZER_CONFIG ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1 + ASAN_OPTIONS: detect_leaks=1 + +ubsan_sanitizer_task: + container: + # Just uses a recent/common distro to run undefined behavior checks. + dockerfile: ci/ubuntu-20.04/Dockerfile + cpu: 4 + # AddressSanitizer uses a lot more memory than a typical config. + memory: 12GB + << : *CI_TEMPLATE + test_fuzzers_script: ./ci/test-fuzzers.sh + env: + CXXFLAGS: -DZEEK_DICT_DEBUG + ZEEK_CI_CONFIGURE_FLAGS: *UBSAN_SANITIZER_CONFIG + ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1 + ZEEK_TAILORED_UB_CHECKS: 1 UBSAN_OPTIONS: print_stacktrace=1 + +# tsan_sanitizer_task: +# container: +# # Just uses a recent/common distro to run memory error/leak checks. +# dockerfile: ci/ubuntu-20.04/Dockerfile +# cpu: 4 +# # AddressSanitizer uses a lot more memory than a typical config. +# memory: 12GB +# << : *CI_TEMPLATE +# test_fuzzers_script: ./ci/test-fuzzers.sh +# env: +# CXXFLAGS: -DZEEK_DICT_DEBUG +# ZEEK_CI_CONFIGURE_FLAGS: *TSAN_SANITIZER_CONFIG +# ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1 diff --git a/.github/workflows/coverity-scan.yml b/.github/workflows/coverity-scan.yml index 2347ed1e07..2b4655b5c7 100644 --- a/.github/workflows/coverity-scan.yml +++ b/.github/workflows/coverity-scan.yml @@ -11,14 +11,8 @@ jobs: steps: - uses: actions/checkout@v2 - - - name: Update Submodules - shell: bash - run: | - auth_header="$(git config --local --get http.https://github.com/.extraheader)" - git submodule sync --recursive - git -c "http.extraheader=$auth_header" -c protocol.version=2 \ - submodule update --init --force --recursive --depth=1 + with: + submodules: "recursive" - name: Fetch Dependencies run: | @@ -45,7 +39,7 @@ jobs: wget - name: Install CAF - run: ( cd auxil/broker/caf && ./configure --prefix=`pwd`/build/install-root && cd build && make -j 3 install ) + run: cd auxil/broker/caf && ./configure --prefix=`pwd`/build/install-root && cd build && make -j $(nproc) install - name: Configure run: ./configure --build-type=debug --with-caf=`pwd`/auxil/broker/caf/build/install-root --disable-broker-tests @@ -65,7 +59,7 @@ jobs: - name: Build run: | export PATH=`pwd`/coverity-tools/bin:$PATH - ( cd build && cov-build --dir cov-int make -j 3 ) + ( cd build && cov-build --dir cov-int make -j $(nproc) ) cat build/cov-int/build-log.txt - name: Submit diff --git a/.github/workflows/generate-docs.yml b/.github/workflows/generate-docs.yml index a940523edf..6159ddc528 100644 --- a/.github/workflows/generate-docs.yml +++ b/.github/workflows/generate-docs.yml @@ -1,27 +1,36 @@ name: Generate Documentation on: + pull_request: schedule: - cron: '0 0 * * *' +defaults: + run: + shell: bash + jobs: generate: if: github.repository == 'zeek/zeek' - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: + # We only perform a push if the action was triggered via a schedule + # event, so we only need to authenticate in that case. Use + # unauthenticated access otherwise so this action can e.g., also run from + # clones. - uses: actions/checkout@v2 + if: github.event_name == 'schedule' with: + submodules: "recursive" token: ${{ secrets.ZEEK_BOT_TOKEN }} + - uses: actions/checkout@v2 + if: github.event_name != 'schedule' + with: + submodules: "recursive" - - name: Sync Submodules - shell: bash - run: | - auth_header="$(git config --local --get http.https://github.com/.extraheader)" - git submodule sync --recursive - git -c "http.extraheader=$auth_header" -c protocol.version=2 \ - submodule update --init --force --recursive --depth=1 - ( cd doc && git checkout master ) + - name: Switch doc submodule to master + run: cd doc && git checkout master - name: Fetch Dependencies run: | @@ -51,14 +60,12 @@ jobs: sudo pip3 install -r doc/requirements.txt - name: Configure - run: ./configure + run: ./configure --disable-broker-tests --disable-cpp-tests - name: Build - run: | - ( cd build && make -j 3 ) + run: cd build && make -j $(nproc) - name: Generate Docs - shell: bash run: | git config --global user.name zeek-bot git config --global user.email info@zeek.org @@ -76,25 +83,31 @@ jobs: echo "*** Check for Sphinx Warnings ***" grep -q WARNING make.out && exit 1 rm make.out - echo "*** Pushing zeek-docs Changes ***" - git remote set-url origin "https://zeek-bot:${{ secrets.ZEEK_BOT_TOKEN }}@github.com/zeek/zeek-docs" + + - name: Push zeek-docs Changes + if: github.event_name == 'schedule' + run: | + cd doc git add scripts/ script-reference/ git status - git commit -m "Generate docs" && git push || /bin/true - cd .. + # git commit errors when there's nothing to commit, so guard it + # with a check that detects whether there's anything to commit/push. + git diff-index --quiet HEAD || { git commit -m "Generate docs" && git push; } - name: Update zeek-docs Submodule + if: github.event_name == 'schedule' run: | - echo "*** Update zeek/doc Submodule ***" git config --global user.name zeek-bot git config --global user.email info@zeek.org - git remote add auth "https://zeek-bot:${{ secrets.ZEEK_BOT_TOKEN }}@github.com/zeek/zeek" git add doc git status - git commit -m 'Update doc submodule [nomail] [skip ci]' && git push auth master || /bin/true + # Similar logic here: proceed only if there's a change in the submodule. + git diff-index --quiet HEAD || { git commit -m 'Update doc submodule [nomail] [skip ci]' && git push; } - name: Send email - if: failure() + # Only send notifications for scheduled runs. Runs from pull requests + # show failures in the Github UI. + if: failure() && github.event_name == 'schedule' uses: dawidd6/action-send-mail@v3.4.1 with: server_address: ${{secrets.SMTP_HOST}} diff --git a/.gitmodules b/.gitmodules index 4318212fe0..cb20683187 100644 --- a/.gitmodules +++ b/.gitmodules @@ -49,3 +49,12 @@ [submodule "auxil/zeek-client"] path = auxil/zeek-client url = https://github.com/zeek/zeek-client +[submodule "auxil/gen-zam"] + path = auxil/gen-zam + url = https://github.com/zeek/gen-zam +[submodule "auxil/c-ares"] + path = auxil/c-ares + url = https://github.com/c-ares/c-ares +[submodule "auxil/out_ptr"] + path = auxil/out_ptr + url = https://github.com/soasis/out_ptr.git diff --git a/CHANGES b/CHANGES index 872c6db0d2..19748c3725 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,477 @@ +5.0.0-dev.332 | 2022-04-28 19:52:04 +0000 + + * Initialize OpenSSL on startup (Dominik Charousset, Corelight) + + * Avoid double-initialization of OpenSSL (Dominik Charousset, Corelight) + + * Canonify intel.log in read-file-dist-cluster test (Dominik Charousset, Corelight) + + * Port Zeek to latest Broker API (Dominik Charousset) + +5.0.0-dev.322 | 2022-04-27 21:00:29 +0000 + + * Disable OpenSSL initialization starting with 1.1.0 (Johanna Amann, Corelight) + + Starting with OpenSSL 1.1.0, library initialization is no longer + required - and might even be harmful. + + See https://wiki.openssl.org/index.php/Library_Initialization for + details. + +5.0.0-dev.319 | 2022-04-27 17:42:42 +0000 + + * Wrap call to doctest's MESSAGE() method in Reporter in try/catch block (Tim Wojtulewicz, Corelight) + + Also check whether doctest is even enabled before trying to use it. + + * Pre-initialize c-ares channel object. Fixes Coverity 1488318 (Tim Wojtulewicz, Corelight) + + * Ask c-ares for the next timeout instead of passing a fixed value (Tim Wojtulewicz, Corelight) + + * Remove obsolete DNS_Mgr::asyncs_timeouts (Tim Wojtulewicz, Corelight) + +5.0.0-dev.314 | 2022-04-27 09:43:23 -0700 + + * Management framework: consistency fixes around event() vs Broker::publish() (Christian Kreibich, Corelight) + +5.0.0-dev.312 | 2022-04-26 09:52:34 -0700 + + * Rework FindCAres.cmake to not use ExternalProject, fixing OBS builds (Tim Wojtulewicz, Corelight) + +5.0.0-dev.309 | 2022-04-22 13:11:12 -0700 + + * Add DNS fuzzing corpus from c-ares (Tim Wojtulewicz, Corelight) + + * Set larger UDP buffer to avoid TCP fallback if possible (Tim Wojtulewicz, Corelight) + + This commit sets the UDP buffer to a larger size, as well as adds + an EDNS block to the DNS request passing this size. This allows + DNS servers to return larger responses, and in turn allow c-ares + to avoid TCP fallback due to requests failing because of the lack + of buffer size. + + * Add new features to IOSource::Manager, used by DNS_Mgr (Tim Wojtulewicz, Corelight) + + - iosource_mgr can now track write events to file descriptors as well + as read events. This adds an argument to both RegisterFd() and + UnregisterFd() for setting the mode, defaulting to read. + - IOSources can now implement a ProcessFd() method that allows them to + handle events to single file descriptors instead of of having to + loop through/track sets of them at processing time. + + * Add out_ptr, use for c-ares interface calls (Tim Wojtulewicz, Corelight) + + * Store all mappings in a single map instead of split by type (Tim Wojtulewicz, Corelight) + + This opens up the possibility of storing other request types outside + of T_A, T_PTR and T_TXT without requiring redoing the caching. It + also fixes the caching code in DNS_Mapping, adding a version number + to the start of the cache file so the cache structure can be modified + and old caches invalidated more easily. + + * Add merging to DNS_Mgr::AddResult() to support both ipv4 and ipv6 responses simultaneously (Tim Wojtulewicz, Corelight) + + * Rework DNS_Mgr API to be more consistent and to support more request types (Tim Wojtulewicz, Corelight) + + * Replace nb_dns library with C-Ares (Tim Wojtulewicz, Corelight) + + * Add unit testing for DNS_Mgr and related classes (Tim Wojtulewicz, Corelight) + + * Update doc gen VM to ubuntu-latest, output cmake version during configure (Tim Wojtulewicz, Corelight) + + * Use doctest macro to tie Reporter output to test cases (Tim Wojtulewicz, Corelight) + + * Add const versions of dereference operators for DictEntry (Tim Wojtulewicz, Corelight) + + * Add DNS fuzzer (Tim Wojtulewicz, Corelight) + +5.0.0-dev.288 | 2022-04-22 07:00:56 -0700 + + * Fix generate-docs action for running on forks. (Benjamin Bannier, Corelight) + + The generate-docs action previously always required secrets to run so + that it could possibly perform a push (if run from a schedule), and to + send out an email on failure. Since secrets are unavailable for forks + this meant that this action would always fail for PRs from forks. + + In this patch we use an unauthenticated clone unless running from a + schedule. This is fine as for PRs this action would just regenerate the + docs to check for errors, but not to actually update them (no push + performed). We also change the failure notification step to only execute + for scheduled runs. + +5.0.0-dev.286 | 2022-04-21 13:34:34 -0700 + + * Suppress progress dots in zkg's output in Docker package-install check (Christian Kreibich, Corelight) + +5.0.0-dev.284 | 2022-04-21 09:17:28 -0700 + + * Enable vptr undefined behavior check (Tim Wojtulewicz, Corelight) + +5.0.0-dev.282 | 2022-04-20 17:17:55 -0700 + + * Update libkqueue for Coverity and build warning fixes (Tim Wojtulewicz, Corelight) + +5.0.0-dev.280 | 2022-04-19 09:42:28 -0700 + + * Escape special characters in paths before using them as regexes (Tim Wojtulewicz, Corelight) + +5.0.0-dev.277 | 2022-04-18 16:38:27 -0700 + + * Management framework updates (Christian Kreibich, Corelight) + + - bump external testsuite + - allow selecting cluster nodes in get_id_value + - minor tweaks to logging component + - bump zeek-client to pull in get-id-value command + - improve handling of node run states + - add get_id_value dispatch + - allow dispatching "actions" on cluster nodes. + - some renaming to avoid the term "data cluster" + - allow agents to communicate with cluster nodes + + * Avoid whitespace around function type strings in JSON rendering (Christian Kreibich, Corelight) + + * Disable TSan CI task temporarily while we sort out some intermittent test failures (Tim Wojtulewicz, Corelight) + +5.0.0-dev.265 | 2022-04-18 12:45:08 -0700 + + * state-holding fix: track unique identifiers for Func's in CompHash's, not Func's themselves (Vern Paxson, Corelight) + +5.0.0-dev.263 | 2022-04-18 09:22:30 -0700 + + * Add "Reporter" entry to fix plugin hook_name() vs HookType imbalance (Christian Kreibich, Corelight) + + The hook_name() list was missing an entry corresponding to HOOK_REPORTER. + + Co-authored-by: Peter Cullen + +5.0.0-dev.259 | 2022-04-14 10:26:29 -0700 + + * GH-2038: Don't sleep when non-selectable PktSrc has data available (Anthony Coddington) + + PktSrc::GetNextTimeout always returned a fixed timeout of 20 microseconds for non-selectable packet sources regardless of whether they have packets available. This adds unnecessary delay every FindReadySources poll_interval when packets are available to be read. + + Instead, for non-selectable packet sources, check whether packets are available and return a timeout of 0 to indicate data is available. This is closer to the behaviour of the old capture loop. + + This was mitigated somewhat by the fact FindReadySources poll interval defaults to 100 packets, and live sources are added to the ready list regardless of whether they have packets available (unless it is time to force a poll). + +5.0.0-dev.257 | 2022-04-14 10:13:28 -0700 + + * Re-instantiate providing location information to `LoadFile` hooks. (Robin Sommer, Corelight) + + #1835 subtly changed the semantics of the `LoadFile` plugin hook to no + longer have the current script location available for signature files + being loaded through `@load-sigs`. This was undocumented behavior, so + it's technically not a regression, but since at least one external + plugin is depending on it, this change restores the old behavior. + +5.0.0-dev.255 | 2022-04-14 10:12:49 -0700 + + * Fix another crash during dictionary iteration. (Robin Sommer, Corelight) + + Closes #2017. + + * Fix assertions in dictionary that can trigger for benign reasons. (Robin Sommer, Corelight) + + These assertions were checking for a situation that I believe can + happen legitimately: a robust iterator pointing to an index that, + after some table resizing, happens to be inside the overflow area and + hence empty. We'll now move it to the end of the table in the case. + + * Fix robust iterators when modifying dictionary during iteration. (Robin Sommer, Corelight) + + When inserting/deleting elements, we now remove their `DictEntries` + from any robust iterators' bookkeeping. First, we don't need that + information anymore, and second the `DictEntries` contain pointers + that may become invalid. + + I don't know how to write a unit test for this unfortunately because + it depends on where exactly things land in the hash table. + + Btw, memory mgmt for DictEntries is pretty fragile: They contain + pointers to both memory they own (`key`) and memory they don't own + (`value`). The former type of pointers is shallow-copied on + assignment/copy-construction, meaning that there can be multiple + instances seemingly owning the same memory. That only works because + deletion is manual, and not part of standard destruction. The second + type of pointer has a similar problem, except that it's managed + externally. It's important to not end up with multiple `DictEntries` + pointing to the same value (which is actually what that iterator + bookkeeping did). + + Addresses #2032. + +5.0.0-dev.250 | 2022-04-14 09:51:23 -0700 + + * Split asan/ubsan CI builds, add tsan build (Tim Wojtulewicz, Corelight) + +5.0.0-dev.248 | 2022-04-14 08:59:34 -0700 + + * Disable object-size analysis if optimization set to -O0 (Tim Wojtulewicz, Corelight) + +5.0.0-dev.246 | 2022-04-14 10:48:19 +0200 + + * Allow analyzer violations to explicitly set tag. (Robin Sommer, Corelight) + +5.0.0-dev.244 | 2022-04-13 10:52:58 -0700 + + * Add test to ensure enum_to_int's return values are ordered (Yacin Nadji, Corelight) + +5.0.0-dev.242 | 2022-04-13 10:51:21 -0700 + + * Add unit test for other get_word() version (Tim Wojtulewicz, Corelight) + +5.0.0-dev.240 | 2022-04-11 12:46:51 -0700 + + * Mask our signal handlers' triggering signals around thread creation (Christian Kreibich, Corelight) + +5.0.0-dev.238 | 2022-04-11 12:40:02 -0700 + + * GH-2026: Ensure both protocol and analyzer confirmation and violation events can be called (Tim Wojtulewicz, Corelight) + +5.0.0-dev.235 | 2022-04-09 00:08:50 +0000 + + * Update libkqueue to 2.6.0 release [skip ci] [nomail] (Tim Wojtulewicz) + +5.0.0-dev.233 | 2022-04-08 11:30:52 -0700 + + * Bump submodules to pull in InstallSymlink fix (Christian Kreibich, Corelight) + +5.0.0-dev.231 | 2022-04-05 18:04:47 -0700 + + * fix for ill-formed (complex) &default function (Vern Paxson, Corelight) + + * type-checking for use of empty table constructors in expressions (Vern Paxson, Corelight) + + * catch empty constructors used for type inference (Vern Paxson, Corelight) + suppress repeated error messages + + * factoring to make checking of &default attributes externally accessible (Vern Paxson, Corelight) + + * bug fix for empty table constructors with &default attributes (plus a typo) (Vern Paxson, Corelight) + +5.0.0-dev.222 | 2022-04-05 18:04:15 -0700 + + * reduce interpreter frames for compiled function bodies (Vern Paxson, Corelight) + +5.0.0-dev.219 | 2022-04-05 16:07:48 -0700 + + * Correct origin documentation of the version field in the HTTP log. (Christian Kreibich, Corelight) + +5.0.0-dev.217 | 2022-04-04 13:27:32 -0700 + + * Move new TLS decryption capabilities up to Zeek 5 in NEWS file (Christian Kreibich, Corelight) + + * Update NEWS to reflect recent updates (Christian Kreibich, Corelight) + +5.0.0-dev.214 | 2022-04-04 10:52:41 -0700 + + * fix & btest for ZAM bug with inlined nested loop (Vern Paxson, Corelight) + +5.0.0-dev.212 | 2022-04-04 10:51:20 -0700 + + * GH-2009: Use auto to fix ZIP analyzer failure on some platforms (Tim Wojtulewicz, Corelight) + +5.0.0-dev.210 | 2022-03-28 17:04:51 -0700 + + * Add cmake-time reporting of bifcl, binpac, and gen-zam used for build (Christian Kreibich, Corelight) + + * Build Gen-ZAM from a submodule and support use of pre-existing executable (Christian Kreibich, Corelight) + +5.0.0-dev.204 | 2022-03-25 15:31:21 -0700 + + * --event-trace / -E option to generate event trace (Vern Paxson, Corelight) + + * hooks to support event tracing (Vern Paxson, Corelight) + + * classes providing event-tracing/dumping functionality (Vern Paxson, Corelight) + + * provide access to Val internals for event tracing purposes (Vern Paxson, Corelight) + + * set_network_time() BiF in support of event replaying (Vern Paxson, Corelight) + +5.0.0-dev.195 | 2022-03-24 11:01:28 -0700 + + * switch variable initialization over to being expression-based (Vern Paxson, Corelight) + + * simplification of Val classes now that they don't have to support initialization (Vern Paxson, Corelight) + + * rework type inference due to switch from separate initializers to expressions (Vern Paxson, Corelight) + + * avoid evaluating calls to determine whether an expression value is ignored (Vern Paxson, Corelight) + + * reworking of expressions to unify =/+=/-= with initialization (Vern Paxson, Corelight) + + * allow {} expression lists for =/+=/-= RHS (Vern Paxson, Corelight) + +5.0.0-dev.177 | 2022-03-23 13:05:51 +0100 + + * Improve the formatting of the SSL::Info::ssl_history documentation (Johanna Amann, Corelight) + +5.0.0-dev.173 | 2022-03-16 15:06:05 -0700 + + * Fix document generation (Christian Kreibich, Corelight) + +5.0.0-dev.169 | 2022-03-10 11:09:37 -0700 + + * add raw_bytes_to_v6_addr in docs when raw_bytes_to_v4_addr is present (Yacin Nadji, Corelight) + + * Zero out bytes by default for consistent return value on error (Yacin Nadji, Corelight) + + * Add tests for raw_bytes_to_v6_addr (Yacin Nadji, Corelight) + + * Add raw_bytes_to_v6_addr function (Yacin Nadji, Corelight) + +5.0.0-dev.164 | 2022-03-08 09:30:37 -0700 + + * Update 3rdparty submodule for bsd-getopt-long fix (Tim Wojtulewicz) + +5.0.0-dev.162 | 2022-03-07 12:36:37 +0100 + + * Improve error message when receiving unexpected record content via + Broker. (Robin Sommer, Corelight) + +5.0.0-dev.160 | 2022-03-02 13:48:07 +0000 + + * restored record constructor checking for missing-but-mandatory fields. This includea a new btest + as well as a fix to the base-scrpts. (Vern Paxson, Corelight) + +5.0.0-dev.156 | 2022-03-02 08:23:50 +0000 + + * The is_num(), is_alpha(), and is_alnum() BiFs now return F on empty string. + The testcases for these functions, and for is_ascii() were expanded. The documentation of is_ascii() + concerning behavior of an empty string was clarified (Christian Kreibich, Corelight) + +5.0.0-dev.151 | 2022-03-02 08:09:28 +0000 + + * SSL: rudimentary decryption for TLS 1.2 (Florian Wilkens, Johanna Amann) + + With this version, we support rudimentary decryption of TLS 1.2 connections, if the key material + of the connection (in our case the pre-master secret) is available. Note that this functionality + only works for TLS 1.2 connections using the TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 cipher suite. + No other combinations are currently supported. + + For more information, see the NEWS entry and the TLS Decryption documentation. + +5.0.0-dev.121 | 2022-02-24 09:11:03 -0700 + + * GH-1980: Deprecate and return warning for zeek-config's caf-root option (Tim Wojtulewicz, Corelight) + +5.0.0-dev.118 | 2022-02-23 10:51:57 -0700 + + * GH-1949: Remove unused timer_mgr_inactivity_timeout global (Tim Wojtulewicz, Corelight) + +5.0.0-dev.116 | 2022-02-21 18:17:13 -0700 + + * remove deprecated union and timer types, addressing #1898 (Matthew Luckie) + +5.0.0-dev.114 | 2022-02-11 09:30:04 -0800 + + * Minor modernizations to Github workflows (Christian Kreibich, Corelight) + +5.0.0-dev.112 | 2022-02-10 17:56:27 -0800 + + * Reorg of the cluster controller to new "Management framework" layout (Christian Kreibich, Corelight) + + * Bump external cluster testsuite to reflect Management framework reorg (Christian Kreibich, Corelight) + + * Bump zeek-client to reflect Management framework reorg (Christian Kreibich, Corelight) + +5.0.0-dev.108 | 2022-02-10 10:35:02 -0700 + + * Fixing a big pile of Coverity issues (Tim Wojtulewicz, Corelight) + +5.0.0-dev.106 | 2022-02-09 15:15:21 -0800 + + * Expand generate-docs Github workflow to test docs build on PRs (Christian Kreibich, Corelight) + +5.0.0-dev.104 | 2022-02-09 13:14:04 -0800 + + * Updates to the cluster controller scripts to fix the docs build (Christian Kreibich, Corelight) + + * Bump zeek-client for Broker enum fix/workaround (Christian Kreibich, Corelight) + +5.0.0-dev.100 | 2022-02-07 14:18:50 -0800 + + * Add capture to a Sumstats when-statement to fix deprecation warning (Christian Kreibich, Corelight) + +5.0.0-dev.97 | 2022-02-07 16:24:06 +0100 + + * Update to latest Broker without public CAF dependencies. (Dominik + Charousset, Corelight) + + * Fix GCC builds and string output for Broker errors (Dominik + Charousset, Corelight) + +5.0.0-dev.94 | 2022-02-07 08:14:47 -0700 + + * String/StringVal: Replace char*/string constructors with string_view (Tim Wojtulewicz, Corelight) + +5.0.0-dev.92 | 2022-02-04 10:33:47 -0700 + + * fix existing checks for looking to use C++ when it's not available (Vern Paxson, Corelight) + +5.0.0-dev.90 | 2022-02-04 10:32:41 -0700 + + * fixes for ZAM profiling, which didn't get fully integrated originally (Vern Paxson, Corelight) + + * minor enhancements for ZAM inlining (Vern Paxson, Corelight) + +5.0.0-dev.87 | 2022-02-03 13:17:25 -0800 + + * Expansion of cluster controller functionality (Christian Kreibich, Corelight) + + - Bump external cluster testsuite + - Bump zeek-client for the get-nodes command + - Add ClusterController::API::get_nodes_request/response event pair + - Support optional listening ports for cluster nodes + - Don't auto-publish Supervisor response events in the cluster agent + - Make members of the ClusterController::Types::State enum all-caps + - Be more conservative with triggering request timeout events + - Move redefs of ClusterController::Request::Request to their places of use + - Simplify ClusterController::API::set_configuration_request/response + +5.0.0-dev.77 | 2022-02-03 11:20:16 +0000 + + * Match DPD TLS signature on one-sided connections. (Johanna Amann, Corelight) + + This commit changes DPD matching for TLS connections. A one-sided match + is enough to enable DPD now. + + This commit also removes DPD for SSLv2 connections. SSLv2 connections do + basically no longer happen in the wild. SSLv2 is also really finnicky to + identify correctly - there is very little data required to match it, and + basically all matches today will be false positives. If DPD for SSLv2 is + still desired, the optional signature in policy/protocols/ssl/dpd-v2.sig + can be loaded. + +5.0.0-dev.74 | 2022-02-02 09:46:00 +0100 + + * GH-1890: Consistently warn about mixing vector and scalar operand + depreciaton (Zeke Medley, Corelight) + +5.0.0-dev.72 | 2022-02-02 09:36:30 +0100 + + * Let TCP-based application analyzers operate without any TCP parent + analyzer. (Robin Sommer, Corelight) + +5.0.0-dev.70 | 2022-01-25 13:52:00 -0700 + + * bug fix for vector slice assignment (Vern Paxson, Corelight) + +5.0.0-dev.67 | 2022-01-25 12:25:48 +0000 + + * updated Bro->Zeek in comments in the source tree (Vern Paxson, Corelight) + +5.0.0-dev.65 | 2022-01-24 13:41:25 -0800 + + * CI updates (Christian Kreibich, Corelight) + + - add Ubuntu 21.10 + - remove OpenSUSE Leap 15.2 (EOL) + - add CentOS Stream 9 + 5.0.0-dev.61 | 2022-01-17 10:35:15 +0000 * fix for adding a non-managed type to an empty vector (Vern Paxson, Corelight) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fc7f6d618..5f7df8ffdb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,7 +178,6 @@ if ( ZEEK_SANITIZERS ) # list(APPEND _check_list "nullability-assign") # Not normally part of "undefined" # list(APPEND _check_list "nullability-return") # Not normally part of "undefined" # list(APPEND _check_list "objc-cast") # Not truly UB - list(APPEND _check_list "object-size") # list(APPEND _check_list "pointer-overflow") # Not implemented in older GCCs list(APPEND _check_list "return") list(APPEND _check_list "returns-nonnull-attribute") @@ -188,7 +187,14 @@ if ( ZEEK_SANITIZERS ) list(APPEND _check_list "unreachable") # list(APPEND _check_list "unsigned-integer-overflow") # Not truly UB list(APPEND _check_list "vla-bound") - # list(APPEND _check_list "vptr") # TODO: fix associated errors + list(APPEND _check_list "vptr") + + # Clang complains if this one is defined and the optimizer is set to -O0. We + # only set that optimization level if NO_OPTIMIZATIONS is passed, so disable + # the option if that's set. + if ( NOT DEFINED ENV{NO_OPTIMIZATIONS} ) + list(APPEND _check_list "object-size") + endif () string(REPLACE ";" "," _ub_checks "${_check_list}") set(ZEEK_SANITIZER_UB_CHECKS "${_ub_checks}" CACHE INTERNAL "" FORCE) @@ -294,6 +300,10 @@ if ( NOT BIFCL_EXE_PATH ) add_subdirectory(auxil/bifcl) endif () +if ( NOT GEN_ZAM_EXE_PATH ) + add_subdirectory(auxil/gen-zam) +endif () + if (ENABLE_JEMALLOC) if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") if (DEFINED JEMALLOC_ROOT_DIR) @@ -331,36 +341,15 @@ if ( PYTHON_VERSION_STRING VERSION_LESS ${ZEEK_PYTHON_MIN} ) message(FATAL_ERROR "Python ${ZEEK_PYTHON_MIN} or greater is required.") endif () -if ( CAF_ROOT OR BROKER_ROOT_DIR ) - # TODO: drop < 3.12 compatibility check when raising the minimum CMake version - if ( CAF_ROOT AND CMAKE_VERSION VERSION_LESS 3.12 ) - find_package(CAF ${CAF_VERSION_MIN_REQUIRED} REQUIRED - COMPONENTS openssl test io core - PATHS "${CAF_ROOT}") - else () - find_package(CAF ${CAF_VERSION_MIN_REQUIRED} REQUIRED - COMPONENTS openssl test io core) - endif () - message(STATUS "Using system CAF version ${CAF_VERSION}") - # TODO: drop these legacy variables and simply use the targets consistently - set(CAF_LIBRARIES CAF::core CAF::io CAF::openssl CACHE INTERNAL "") - set(caf_dirs "") - foreach (caf_lib IN LISTS CAF_LIBRARIES ITEMS CAF::test) - get_target_property(dirs ${caf_lib} INTERFACE_INCLUDE_DIRECTORIES) - if ( dirs ) - list(APPEND caf_dirs ${dirs}) - endif () - endforeach () - list(REMOVE_DUPLICATES caf_dirs) - list(GET caf_dirs 0 caf_dir) - set(CAF_INCLUDE_DIRS "${caf_dirs}" CACHE INTERNAL "") -endif () - add_subdirectory(auxil/paraglob) set(zeekdeps ${zeekdeps} paraglob) -if ( BROKER_ROOT_DIR ) - find_package(Broker REQUIRED) +if ( Broker_ROOT ) + find_package(Broker REQUIRED PATHS "${Broker_ROOT}") + set(zeekdeps ${zeekdeps} ${BROKER_LIBRARY}) + set(broker_includes ${BROKER_INCLUDE_DIR}) +elseif ( BROKER_ROOT_DIR ) + find_package(Broker REQUIRED PATHS "${BROKER_ROOT_DIR}") set(zeekdeps ${zeekdeps} ${BROKER_LIBRARY}) set(broker_includes ${BROKER_INCLUDE_DIR}) else () @@ -385,11 +374,6 @@ else () set(broker_includes ${CMAKE_CURRENT_SOURCE_DIR}/auxil/broker/include ${CMAKE_CURRENT_BINARY_DIR}/auxil/broker/include) endif () -# CAF_LIBRARIES and CAF_INCLUDE_DIRS are defined either by calling -# find_package(CAF) or by calling add_subdirectory(auxil/broker). In either case, -# we have to care about CAF here because Broker headers can pull in CAF -# headers. -set(zeekdeps ${zeekdeps} ${CAF_LIBRARIES}) include_directories(BEFORE ${PCAP_INCLUDE_DIR} ${BIND_INCLUDE_DIR} @@ -462,7 +446,7 @@ endif () # Any headers that are possibly bundled in the Zeek source-tree and that are supposed # to have priority over any pre-existing/system-wide headers need to appear early in # compiler search path. -include_directories(BEFORE ${broker_includes} ${CAF_INCLUDE_DIRS}) +include_directories(BEFORE ${broker_includes}) include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/auxil/highwayhash) include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/auxil/paraglob/include) include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/auxil/rapidjson/include) @@ -496,6 +480,8 @@ include(CheckNameserCompat) include(GetArchitecture) include(RequireCXX17) include(FindKqueue) +include(FindCAres) +include_directories(BEFORE "auxil/out_ptr/include") if ( (OPENSSL_VERSION VERSION_EQUAL "1.1.0") OR (OPENSSL_VERSION VERSION_GREATER "1.1.0") ) set(ZEEK_HAVE_OPENSSL_1_1 true CACHE INTERNAL "" FORCE) @@ -522,12 +508,6 @@ execute_process(COMMAND "${CMAKE_COMMAND}" -E create_symlink "." "${CMAKE_CURRENT_BINARY_DIR}/zeek") -if ( CAF_ROOT ) - set(ZEEK_CONFIG_CAF_ROOT_DIR ${CAF_ROOT}) -else () - set(ZEEK_CONFIG_CAF_ROOT_DIR ${ZEEK_ROOT_DIR}) -endif () - if ( BinPAC_ROOT_DIR ) set(ZEEK_CONFIG_BINPAC_ROOT_DIR ${BinPAC_ROOT_DIR}) else () @@ -665,6 +645,21 @@ else () set(_install_btest_tools_msg "no pcaps") endif () +set(_bifcl_exe_path "included") +if ( BIFCL_EXE_PATH ) + set(_bifcl_exe_path ${BIFCL_EXE_PATH}) +endif () + +set(_binpac_exe_path "included") +if ( BINPAC_EXE_PATH ) + set(_binpac_exe_path ${BINPAC_EXE_PATH}) +endif () + +set(_gen_zam_exe_path "included") +if ( GEN_ZAM_EXE_PATH ) + set(_gen_zam_exe_path ${GEN_ZAM_EXE_PATH}) +endif () + message( "\n====================| Zeek Build Summary |====================" "\n" @@ -686,8 +681,11 @@ message( "\n" "\nZeekControl: ${INSTALL_ZEEKCTL}" "\nAux. Tools: ${INSTALL_AUX_TOOLS}" + "\nBifCL: ${_bifcl_exe_path}" + "\nBinPAC: ${_binpac_exe_path}" "\nBTest: ${INSTALL_BTEST}" "\nBTest tooling: ${_install_btest_tools_msg}" + "\nGen-ZAM: ${_gen_zam_exe_path}" "\nzkg: ${INSTALL_ZKG}" "\n" "\nlibmaxminddb: ${USE_GEOIP}" diff --git a/NEWS b/NEWS index 9c5eec6d6f..f2e5f86a3f 100644 --- a/NEWS +++ b/NEWS @@ -12,9 +12,43 @@ Breaking Changes New Functionality ----------------- +- Zeek now supports generation and replay of event traces via the new + ``--event-trace`` / ``-E`` command-line options. For details, see: + https://docs.zeek.org/en/master/quickstart.html#tracing-events + +- Zeek now features limited TLS decryption capabilities. This feature is experimental + and only works for TLS 1.2 connections that use the TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 + ciphersuite. Furthermore Zeek requires access to the pre-master secret of each TLS + connection. Typically this functionality will be most useful when analyzing trace-files + where the TLS client recorded the key material. For more details and examples how to + use this functionality, see the TLS Decryption documentation at + https://docs.zeek.org/en/master/frameworks/tls-decryption.html + +- The new --with-gen-zam configure flag and its corresponding GEN_ZAM_EXE_PATH + cmake variable allow reuse of a previously built Gen-ZAM code generator. This + aids cross-compilation: the Zeek build process normally compiles Gen-ZAM on + the fly, but when cross-compiling will do so for the target platform, breaking + its use on the host platform. Gen-ZAM is similar to binpac and bifcl in this + regard. Like binpac and bifcl, it's now also available as a standalone git + repository and hooked into the Zeek distribution as a submodule. + +- Zeek now uses the c-ares (https://c-ares.org) library for performing DNS + requests, replacing an old custom implementation of a DNS resolver. Switching + to this library simplifies the DNS code, adds support for IPv6 lookups, and + adds the ability to support more DNS request types in the future. + Changed Functionality --------------------- +- The behavior of the ``=``, ``+=``, and ``-=`` operators has been expanded and + unified. It now covers ``{ ... }`` initializer lists, supports cross-product + initialization, enables ``+=`` for table, set, vector and pattern values, + similarly allows ``-=`` for table and set values, and supports listing + multiple sets for ``+=`` initialization. For details, see: + https://docs.zeek.org/en/master/script-reference/operators.html#assignment-operators + +- The is_num(), is_alpha(), and is_alnum() BiFs now return F for the empty string. + Deprecated Functionality ------------------------ @@ -32,6 +66,15 @@ Breaking Changes changes to return types from a number of methods. With this change, any uses of the `zeek::*::Tag` types will need to be replaced by `zeek::Tag`. +- The DPD signature for SSL version 2 is no longer enabled by default. SSLv2 + is basically extinct nowadays - and the protocol has a relatively high probability + of matching with random traffic and being misidentified. If you want to enable + the SSLv2 dpd signature, you can load the signature from `policy/protocols/ssl/dpd-v2.sig` + + The DPD signature for SSL version 3 and up (including TLS 1.0 and above) now matches + for one-sided connections and does not require a reverst match anymore. This prevents + missed handshakes, where the client handshake contains a lot of data. + New Functionality ----------------- diff --git a/VERSION b/VERSION index ff7d3e34b0..50a1de96a8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.0.0-dev.61 +5.0.0-dev.332 diff --git a/auxil/bifcl b/auxil/bifcl index eed5effea5..3b5efa59f1 160000 --- a/auxil/bifcl +++ b/auxil/bifcl @@ -1 +1 @@ -Subproject commit eed5effea5661e03b50d0436fecb620a05fb1250 +Subproject commit 3b5efa59f137fc5a15ace602d44e176f97bae083 diff --git a/auxil/broker b/auxil/broker index 41c524f172..9f2f16c1da 160000 --- a/auxil/broker +++ b/auxil/broker @@ -1 +1 @@ -Subproject commit 41c524f172aa357422f1ccf7e806b448d5def08e +Subproject commit 9f2f16c1da94b03790bc8b9f69bc2688e97b781f diff --git a/auxil/c-ares b/auxil/c-ares new file mode 160000 index 0000000000..2aa086f822 --- /dev/null +++ b/auxil/c-ares @@ -0,0 +1 @@ +Subproject commit 2aa086f822aad5017a6f2061ef656f237a62d0ed diff --git a/auxil/gen-zam b/auxil/gen-zam new file mode 160000 index 0000000000..f8c8fb36fb --- /dev/null +++ b/auxil/gen-zam @@ -0,0 +1 @@ +Subproject commit f8c8fb36fb07a2c1703c63dd8624cf1329c0c4d0 diff --git a/auxil/libkqueue b/auxil/libkqueue index aeaeed2119..374aeb5202 160000 --- a/auxil/libkqueue +++ b/auxil/libkqueue @@ -1 +1 @@ -Subproject commit aeaeed21198d6f41d0cf70bda63fe0f424922ac5 +Subproject commit 374aeb52020e289e8574f059f87a96010d9a46b9 diff --git a/auxil/out_ptr b/auxil/out_ptr new file mode 160000 index 0000000000..ea379b2f35 --- /dev/null +++ b/auxil/out_ptr @@ -0,0 +1 @@ +Subproject commit ea379b2f35e28d6ee894e05ad4c26ed60a613d30 diff --git a/auxil/zeek-aux b/auxil/zeek-aux index 0f120aa00c..e76e84e175 160000 --- a/auxil/zeek-aux +++ b/auxil/zeek-aux @@ -1 +1 @@ -Subproject commit 0f120aa00c2b666ed5c430a6bcf1043b82f17e64 +Subproject commit e76e84e175463f9989b492b1d119e3d846fe696d diff --git a/auxil/zeek-client b/auxil/zeek-client index 553d897734..a08d9978ac 160000 --- a/auxil/zeek-client +++ b/auxil/zeek-client @@ -1 +1 @@ -Subproject commit 553d897734b6d9abbc2e4467fae89f68a2c7315d +Subproject commit a08d9978ac6ff6481ad1e6b18f0376568c08f8c1 diff --git a/auxil/zeekctl b/auxil/zeekctl index e7fd4d552e..9cab8f5c62 160000 --- a/auxil/zeekctl +++ b/auxil/zeekctl @@ -1 +1 @@ -Subproject commit e7fd4d552ec7c3e55cb556943bf8b499d2db17e1 +Subproject commit 9cab8f5c62577c54126e6c63343e024ad6f441bc diff --git a/ci/centos-stream-9/Dockerfile b/ci/centos-stream-9/Dockerfile new file mode 100644 index 0000000000..0ad201521d --- /dev/null +++ b/ci/centos-stream-9/Dockerfile @@ -0,0 +1,36 @@ +FROM quay.io/centos/centos:stream9 + +# dnf config-manager isn't available at first, and +# we need it to install the CRB repo below. +RUN dnf -y install 'dnf-command(config-manager)' + +# What used to be powertools is now called "CRB". +# We need it for some of the packages installed below. +# https://docs.fedoraproject.org/en-US/epel/ +RUN dnf config-manager --set-enabled crb +RUN dnf -y install \ + https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \ + https://dl.fedoraproject.org/pub/epel/epel-next-release-latest-9.noarch.rpm + +# The --nobest flag is hopefully temporary. Without it we currently hit +# package versioning conflicts around OpenSSL. +RUN dnf -y --nobest install \ + bison \ + cmake \ + diffutils \ + flex \ + git \ + gcc \ + gcc-c++ \ + libpcap-devel \ + make \ + openssl-devel \ + python3-devel \ + python3-pip\ + sqlite \ + swig \ + which \ + zlib-devel \ + && dnf clean all && rm -rf /var/cache/dnf + +RUN pip3 install junit2html diff --git a/ci/opensuse-leap-15.2/Dockerfile b/ci/opensuse-leap-15.2/Dockerfile deleted file mode 100644 index 7b23204cde..0000000000 --- a/ci/opensuse-leap-15.2/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM opensuse/leap:15.2 - -RUN zypper in -y \ - cmake \ - make \ - gcc \ - gcc-c++ \ - python3 \ - python3-devel \ - flex \ - bison \ - libpcap-devel \ - libopenssl-devel \ - zlib-devel \ - swig \ - git \ - curl \ - python3-pip \ - which \ - gzip \ - tar \ - && rm -rf /var/cache/zypp - - -RUN pip3 install junit2html diff --git a/ci/ubuntu-21.10/Dockerfile b/ci/ubuntu-21.10/Dockerfile new file mode 100644 index 0000000000..4a51a122e9 --- /dev/null +++ b/ci/ubuntu-21.10/Dockerfile @@ -0,0 +1,33 @@ +FROM ubuntu:21.10 + +ENV DEBIAN_FRONTEND="noninteractive" TZ="America/Los_Angeles" + +RUN apt-get update && apt-get -y install \ + git \ + cmake \ + make \ + gcc \ + g++ \ + flex \ + bison \ + libpcap-dev \ + libssl-dev \ + python3 \ + python3-dev \ + python3-pip\ + swig \ + zlib1g-dev \ + libmaxminddb-dev \ + libkrb5-dev \ + bsdmainutils \ + sqlite3 \ + curl \ + wget \ + unzip \ + ruby \ + bc \ + lcov \ + && rm -rf /var/lib/apt/lists/* + +RUN pip3 install junit2html +RUN gem install coveralls-lcov diff --git a/cmake b/cmake index 105f6c9df6..588e6da051 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 105f6c9df616a4c2286d5ef38c2b31a718192301 +Subproject commit 588e6da051cb82a1cd24400f94b86338957d646a diff --git a/configure b/configure index 8f0fb27e41..0565fd1dff 100755 --- a/configure +++ b/configure @@ -85,6 +85,8 @@ Usage: $0 [OPTION]... [VAR=VALUE]... (Zeek uses an embedded version by default) --with-caf=PATH path to C++ Actor Framework install root (a Broker dependency that is embedded by default) + --with-gen-zam=PATH path to Gen-ZAM code generator + (Zeek uses an embedded version by default) --with-flex=PATH path to flex executable --with-libkqueue=PATH path to libkqueue install root (Zeek uses an embedded version by default) @@ -338,6 +340,9 @@ while [ $# -ne 0 ]; do --with-flex=*) append_cache_entry FLEX_EXECUTABLE PATH $optarg ;; + --with-gen-zam=*) + append_cache_entry GEN_ZAM_EXE_PATH PATH $optarg + ;; --with-geoip=*) append_cache_entry LibMMDB_ROOT_DIR PATH $optarg ;; @@ -437,6 +442,8 @@ echo "Build Directory : $builddir" echo "Source Directory: $sourcedir" cd $builddir +echo "Using $(cmake --version | head -1)" +echo if [ -n "$CMakeGenerator" ]; then "$CMakeCommand" -G "$CMakeGenerator" $CMakeCacheEntries $sourcedir else diff --git a/doc b/doc index 789c4b2f4c..a5adc652ed 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 789c4b2f4c6b10193ee39f5be82cc9028eddfc38 +Subproject commit a5adc652ed3c7cf179a19c8b373a9443dd1fc5dc diff --git a/docker/btest/docker/structure_tests.sh b/docker/btest/docker/structure_tests.sh index ba20fec05a..17a54aa86d 100644 --- a/docker/btest/docker/structure_tests.sh +++ b/docker/btest/docker/structure_tests.sh @@ -17,7 +17,9 @@ docker run --rm "${TEST_TAG}" btest --version | sed 's/^[0-9].*/XXX/g' docker run --rm "${TEST_TAG}" zkg config # Check that a plugin can be installed. We pick any plugin with minimal deps here. -docker run --rm "${TEST_TAG}" zkg install --force sethhall/domain-tld | sed 's/(.*)/(XXX)/' +docker run --rm "${TEST_TAG}" zkg install --force sethhall/domain-tld | + sed 's/"\.*$/"/' | + sed 's/(.*)/(XXX)/' # Check that the Broker Python module loads docker run --rm "${TEST_TAG}" python3 -c "import broker" diff --git a/scripts/base/frameworks/software/main.zeek b/scripts/base/frameworks/software/main.zeek index 9fed88668b..b6a86ba644 100644 --- a/scripts/base/frameworks/software/main.zeek +++ b/scripts/base/frameworks/software/main.zeek @@ -95,7 +95,7 @@ export { ## even if it exposes itself with an alternate name. The ## yielded string is the name that will be logged and generally ## used for everything. - global alternate_names: table[string] of string { + global alternate_names: table[string] of string = { ["Flash Player"] = "Flash", } &default=function(a: string): string { return a; }; diff --git a/scripts/base/frameworks/sumstats/cluster.zeek b/scripts/base/frameworks/sumstats/cluster.zeek index f055355170..40f6eb43c9 100644 --- a/scripts/base/frameworks/sumstats/cluster.zeek +++ b/scripts/base/frameworks/sumstats/cluster.zeek @@ -481,7 +481,7 @@ function request_key(ss_name: string, key: Key): Result add dynamic_requests[uid]; event SumStats::cluster_get_result(uid, ss_name, key, F); - return when ( uid in done_with && Cluster::worker_count == done_with[uid] ) + return when [uid, ss_name, key] ( uid in done_with && Cluster::worker_count == done_with[uid] ) { #print "done with request_key"; local result = key_requests[uid]; diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 9e102ed6fc..d8c3ec2114 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -4963,9 +4963,6 @@ const dpd_ignore_ports = F &redef; ## connection if it misses the initial handshake. const likely_server_ports: set[port] &redef; -## Per-incident timer managers are drained after this amount of inactivity. -const timer_mgr_inactivity_timeout = 1 min &redef; - ## If true, output profiling for Time-Machine queries. const time_machine_profiling = F &redef; diff --git a/scripts/base/protocols/conn/main.zeek b/scripts/base/protocols/conn/main.zeek index 5f2999b206..cda5261bff 100644 --- a/scripts/base/protocols/conn/main.zeek +++ b/scripts/base/protocols/conn/main.zeek @@ -239,10 +239,11 @@ function determine_service(c: connection): string function set_conn(c: connection, eoc: bool) { if ( ! c?$conn ) - c$conn = Info(); + { + local p = get_port_transport_proto(c$id$resp_p); + c$conn = Info($ts=c$start_time, $uid=c$uid, $proto=p); + } - c$conn$ts=c$start_time; - c$conn$uid=c$uid; c$conn$id=c$id; if ( c?$tunnel && |c$tunnel| > 0 ) { @@ -250,7 +251,6 @@ function set_conn(c: connection, eoc: bool) c$conn$tunnel_parents = set(); add c$conn$tunnel_parents[c$tunnel[|c$tunnel|-1]$uid]; } - c$conn$proto=get_port_transport_proto(c$id$resp_p); if( |Site::local_nets| > 0 ) { c$conn$local_orig=Site::is_local_addr(c$id$orig_h); diff --git a/scripts/base/protocols/dhcp/consts.zeek b/scripts/base/protocols/dhcp/consts.zeek index 0bf16ded96..8507e91c9f 100644 --- a/scripts/base/protocols/dhcp/consts.zeek +++ b/scripts/base/protocols/dhcp/consts.zeek @@ -28,7 +28,7 @@ export { } &default = function(n: count): string { return fmt("unknown-message-type-%d", n); }; ## Option types mapped to their names. - const option_types: table[int] of string = { + const option_types = { [0] = "Pad", [1] = "Subnet Mask", [2] = "Time Offset", @@ -185,5 +185,5 @@ export { [221] = "Virtual Subnet Selection (VSS) Option", [252] = "auto-proxy-config", [255] = "End", - } &default = function(n: int): string { return fmt("unknown-option-type-%d", n); }; + } &default = function(n: count): string { return fmt("unknown-option-type-%d", n); }; } diff --git a/scripts/base/protocols/http/main.zeek b/scripts/base/protocols/http/main.zeek index c12da898d5..f54216b0dd 100644 --- a/scripts/base/protocols/http/main.zeek +++ b/scripts/base/protocols/http/main.zeek @@ -43,9 +43,12 @@ export { uri: string &log &optional; ## Value of the "referer" header. The comment is deliberately ## misspelled like the standard declares, but the name used here - ## is "referrer" spelled correctly. + ## is "referrer", spelled correctly. referrer: string &log &optional; - ## Value of the version portion of the request. + ## Value of the version portion of the reply. If you require + ## message-level detail, consider the :zeek:see:`http_request` and + ## :zeek:see:`http_reply` events, which report each message's + ## version string. version: string &log &optional; ## Value of the User-Agent header from the client. user_agent: string &log &optional; diff --git a/scripts/base/protocols/smb/consts.zeek b/scripts/base/protocols/smb/consts.zeek index 9b68419baa..42ba43639e 100644 --- a/scripts/base/protocols/smb/consts.zeek +++ b/scripts/base/protocols/smb/consts.zeek @@ -112,7 +112,7 @@ export { const rpc_sub_cmds: table[string] of rpc_cmd_table = { ["4b324fc8-1670-01d3-1278-5a47bf6ee188"] = srv_cmds, ["6bffd098-a112-3610-9833-46c3f87e345a"] = wksta_cmds, - } &redef &default=function(i: string):rpc_cmd_table { return table() &default=function(j: string):string { return fmt("unknown-uuid-%s", j); }; }; + } &redef &default=function(i: string):rpc_cmd_table { return table() &default=function(j: count):string { return fmt("unknown-uuid-%d", j); }; }; } diff --git a/scripts/base/protocols/ssl/dpd.sig b/scripts/base/protocols/ssl/dpd.sig index 1b8cad2f76..2603441d9a 100644 --- a/scripts/base/protocols/ssl/dpd.sig +++ b/scripts/base/protocols/ssl/dpd.sig @@ -1,17 +1,17 @@ -signature dpd_ssl_server { +signature dpd_tls_server { ip-proto == tcp - # Server hello. - payload /^((\x15\x03[\x00\x01\x02\x03]....)?\x16\x03[\x00\x01\x02\x03]..\x02...((\x03[\x00\x01\x02\x03\x04])|(\x7F[\x00-\x50]))|...?\x04..\x00\x02).*/ - requires-reverse-signature dpd_ssl_client - enable "ssl" + # SSL3 / TLS Server hello. + payload /^(\x15\x03[\x00\x01\x02\x03]....)?\x16\x03[\x00\x01\x02\x03]..\x02...((\x03[\x00\x01\x02\x03\x04])|(\x7F[\x00-\x50])).*/ tcp-state responder + enable "ssl" } -signature dpd_ssl_client { +signature dpd_tls_client { ip-proto == tcp - # Client hello. - payload /^(\x16\x03[\x00\x01\x02\x03]..\x01...\x03[\x00\x01\x02\x03]|...?\x01[\x00\x03][\x00\x01\x02\x03\x04]).*/ + # SSL3 / TLS Client hello. + payload /^\x16\x03[\x00\x01\x02\x03]..\x01...\x03[\x00\x01\x02\x03].*/ tcp-state originator + enable "ssl" } signature dpd_dtls_client { diff --git a/scripts/base/protocols/ssl/main.zeek b/scripts/base/protocols/ssl/main.zeek index 2b610707e3..9257a89922 100644 --- a/scripts/base/protocols/ssl/main.zeek +++ b/scripts/base/protocols/ssl/main.zeek @@ -71,38 +71,44 @@ export { ## SSL history showing which types of packets we received in which order. ## Letters have the following meaning with client-sent letters being capitalized: - ## H hello_request - ## C client_hello - ## S server_hello - ## V hello_verify_request - ## T NewSessionTicket - ## X certificate - ## K server_key_exchange - ## R certificate_request - ## N server_hello_done - ## Y certificate_verify - ## G client_key_exchange - ## F finished - ## W certificate_url - ## U certificate_status - ## A supplemental_data - ## Z unassigned_handshake_type - ## I change_cipher_spec - ## B heartbeat - ## D application_data - ## E end_of_early_data - ## O encrypted_extensions - ## P key_update - ## M message_hash - ## J hello_retry_request - ## L alert - ## Q unknown_content_type + ## + ## ====== ==================================================== + ## Letter Meaning + ## ====== ==================================================== + ## H hello_request + ## C client_hello + ## S server_hello + ## V hello_verify_request + ## T NewSessionTicket + ## X certificate + ## K server_key_exchange + ## R certificate_request + ## N server_hello_done + ## Y certificate_verify + ## G client_key_exchange + ## F finished + ## W certificate_url + ## U certificate_status + ## A supplemental_data + ## Z unassigned_handshake_type + ## I change_cipher_spec + ## B heartbeat + ## D application_data + ## E end_of_early_data + ## O encrypted_extensions + ## P key_update + ## M message_hash + ## J hello_retry_request + ## L alert + ## Q unknown_content_type + ## ====== ==================================================== + ## ssl_history: string &log &default=""; }; ## The default root CA bundle. By default, the mozilla-ca-list.zeek ## script sets this to Mozilla's root CA list. - const root_certs: table[string] of string = {} &redef; + const root_certs: table[string] of string &redef; ## The record type which contains the field for the Certificate ## Transparency log bundle. diff --git a/scripts/policy/frameworks/cluster/agent/__load__.zeek b/scripts/policy/frameworks/cluster/agent/__load__.zeek deleted file mode 100644 index f7f36173f3..0000000000 --- a/scripts/policy/frameworks/cluster/agent/__load__.zeek +++ /dev/null @@ -1,4 +0,0 @@ -##! The entry point for the cluster agent. It runs bootstrap logic for launching -##! the agent process via Zeek's Supervisor. - -@load ./boot diff --git a/scripts/policy/frameworks/cluster/agent/main.zeek b/scripts/policy/frameworks/cluster/agent/main.zeek deleted file mode 100644 index f545186304..0000000000 --- a/scripts/policy/frameworks/cluster/agent/main.zeek +++ /dev/null @@ -1,271 +0,0 @@ -##! This is the main "runtime" of a cluster agent. Zeek does not load this -##! directly; rather, the agent's bootstrapping module (in ./boot.zeek) -##! specifies it as the script to run in the node newly created via Zeek's -##! supervisor. - -@load base/frameworks/broker - -@load policy/frameworks/cluster/controller/config -@load policy/frameworks/cluster/controller/log -@load policy/frameworks/cluster/controller/request - -@load ./api - -module ClusterAgent::Runtime; - -redef ClusterController::role = ClusterController::Types::AGENT; - -# The global configuration as passed to us by the controller -global g_config: ClusterController::Types::Configuration; - -# A map to make other instance info accessible -global g_instances: table[string] of ClusterController::Types::Instance; - -# A map for the nodes we run on this instance, via this agent. -global g_nodes: table[string] of ClusterController::Types::Node; - -# The node map employed by the supervisor to describe the cluster -# topology to newly forked nodes. We refresh it when we receive -# new configurations. -global g_data_cluster: table[string] of Supervisor::ClusterEndpoint; - - -event SupervisorControl::create_response(reqid: string, result: string) - { - local req = ClusterController::Request::lookup(reqid); - if ( ClusterController::Request::is_null(req) ) - return; - - local name = req$supervisor_state$node; - - if ( |result| > 0 ) - { - local msg = fmt("failed to create node %s: %s", name, result); - ClusterController::Log::error(msg); - event ClusterAgent::API::notify_error(ClusterAgent::name, msg, name); - } - - ClusterController::Request::finish(reqid); - } - -event SupervisorControl::destroy_response(reqid: string, result: bool) - { - local req = ClusterController::Request::lookup(reqid); - if ( ClusterController::Request::is_null(req) ) - return; - - local name = req$supervisor_state$node; - - if ( ! result ) - { - local msg = fmt("failed to destroy node %s, %s", name, reqid); - ClusterController::Log::error(msg); - event ClusterAgent::API::notify_error(ClusterAgent::name, msg, name); - } - - ClusterController::Request::finish(reqid); - } - -function supervisor_create(nc: Supervisor::NodeConfig) - { - local req = ClusterController::Request::create(); - req$supervisor_state = ClusterController::Request::SupervisorState($node = nc$name); - event SupervisorControl::create_request(req$id, nc); - ClusterController::Log::info(fmt("issued supervisor create for %s, %s", nc$name, req$id)); - } - -function supervisor_destroy(node: string) - { - local req = ClusterController::Request::create(); - req$supervisor_state = ClusterController::Request::SupervisorState($node = node); - event SupervisorControl::destroy_request(req$id, node); - ClusterController::Log::info(fmt("issued supervisor destroy for %s, %s", node, req$id)); - } - -event ClusterAgent::API::set_configuration_request(reqid: string, config: ClusterController::Types::Configuration) - { - ClusterController::Log::info(fmt("rx ClusterAgent::API::set_configuration_request %s", reqid)); - - local nodename: string; - local node: ClusterController::Types::Node; - local nc: Supervisor::NodeConfig; - local msg: string; - - # Adopt the global configuration provided. - # XXX this can later handle validation and persistence - # XXX should do this transactionally, only set when all else worked - g_config = config; - - # Refresh the instances table: - g_instances = table(); - for ( inst in config$instances ) - g_instances[inst$name] = inst; - - # Terminate existing nodes - for ( nodename in g_nodes ) - supervisor_destroy(nodename); - - g_nodes = table(); - - # Refresh the data cluster and nodes tables - - g_data_cluster = table(); - for ( node in config$nodes ) - { - if ( node$instance == ClusterAgent::name ) - g_nodes[node$name] = node; - - local cep = Supervisor::ClusterEndpoint( - $role = node$role, - $host = g_instances[node$instance]$host, - $p = node$p); - - if ( node?$interface ) - cep$interface = node$interface; - - g_data_cluster[node$name] = cep; - } - - # Apply the new configuration via the supervisor - - for ( nodename in g_nodes ) - { - node = g_nodes[nodename]; - nc = Supervisor::NodeConfig($name=nodename); - - if ( ClusterAgent::cluster_directory != "" ) - nc$directory = ClusterAgent::cluster_directory; - - if ( node?$interface ) - nc$interface = node$interface; - if ( node?$cpu_affinity ) - nc$cpu_affinity = node$cpu_affinity; - if ( node?$scripts ) - nc$scripts = node$scripts; - if ( node?$env ) - nc$env = node$env; - - # XXX could use options to enable per-node overrides for - # directory, stdout, stderr, others? - - nc$cluster = g_data_cluster; - supervisor_create(nc); - } - - # XXX this currently doesn not fail if any of above problems occurred, - # mainly due to the tediousness of handling the supervisor's response - # events asynchonously. The only indication of error will be - # notification events to the controller. - - if ( reqid != "" ) - { - local res = ClusterController::Types::Result( - $reqid = reqid, - $instance = ClusterAgent::name); - - ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_response %s", - ClusterController::Types::result_to_string(res))); - event ClusterAgent::API::set_configuration_response(reqid, res); - } - } - -event ClusterAgent::API::agent_welcome_request(reqid: string) - { - ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_welcome_request %s", reqid)); - - local res = ClusterController::Types::Result( - $reqid = reqid, - $instance = ClusterAgent::name); - - ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_response %s", - ClusterController::Types::result_to_string(res))); - event ClusterAgent::API::agent_welcome_response(reqid, res); - } - -event ClusterAgent::API::agent_standby_request(reqid: string) - { - ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_standby_request %s", reqid)); - - # We shut down any existing cluster nodes via an empty configuration, - # and fall silent. We do not unpeer/disconnect (assuming we earlier - # peered/connected -- otherwise there's nothing we can do here via - # Broker anyway), mainly to keep open the possibility of running - # cluster nodes again later. - event ClusterAgent::API::set_configuration_request("", ClusterController::Types::Configuration()); - - local res = ClusterController::Types::Result( - $reqid = reqid, - $instance = ClusterAgent::name); - - ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_standby_response %s", - ClusterController::Types::result_to_string(res))); - event ClusterAgent::API::agent_standby_response(reqid, res); - } - -event Broker::peer_added(peer: Broker::EndpointInfo, msg: string) - { - # This does not (cannot?) immediately verify that the new peer - # is in fact a controller, so we might send this in vain. - # Controllers register the agent upon receipt of the event. - - local epi = ClusterAgent::endpoint_info(); - - event ClusterAgent::API::notify_agent_hello(epi$id, - to_addr(epi$network$address), ClusterAgent::API::version); - } - -event zeek_init() - { - local epi = ClusterAgent::endpoint_info(); - local agent_topic = ClusterAgent::topic_prefix + "/" + epi$id; - - # The agent needs to peer with the supervisor -- this doesn't currently - # happen automatically. The address defaults to Broker's default, which - # relies on ZEEK_DEFAULT_LISTEN_ADDR and so might just be "". Broker - # internally falls back to listening on any; we pick 127.0.0.1. - local supervisor_addr = Broker::default_listen_address; - if ( supervisor_addr == "" ) - supervisor_addr = "127.0.0.1"; - - Broker::peer(supervisor_addr, Broker::default_port, Broker::default_listen_retry); - - # Agents need receive communication targeted at it, and any responses - # from the supervisor. - Broker::subscribe(agent_topic); - Broker::subscribe(SupervisorControl::topic_prefix); - - # Auto-publish a bunch of events. Glob patterns or module-level - # auto-publish would be helpful here. - Broker::auto_publish(agent_topic, ClusterAgent::API::set_configuration_response); - Broker::auto_publish(agent_topic, ClusterAgent::API::agent_welcome_response); - Broker::auto_publish(agent_topic, ClusterAgent::API::agent_standby_response); - - Broker::auto_publish(agent_topic, ClusterAgent::API::notify_agent_hello); - Broker::auto_publish(agent_topic, ClusterAgent::API::notify_change); - Broker::auto_publish(agent_topic, ClusterAgent::API::notify_error); - Broker::auto_publish(agent_topic, ClusterAgent::API::notify_log); - - Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::create_request); - Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::create_response); - Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::destroy_request); - Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::destroy_response); - Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::restart_request); - Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::restart_response); - Broker::auto_publish(SupervisorControl::topic_prefix, SupervisorControl::stop_request); - - # Establish connectivity with the controller. - if ( ClusterAgent::controller$address != "0.0.0.0" ) - { - # We connect to the controller. - Broker::peer(ClusterAgent::controller$address, - ClusterAgent::controller$bound_port, - ClusterController::connect_retry); - } - else - { - # Controller connects to us; listen for it. - Broker::listen(cat(epi$network$address), epi$network$bound_port); - } - - ClusterController::Log::info("agent is live"); - } diff --git a/scripts/policy/frameworks/cluster/controller/__load__.zeek b/scripts/policy/frameworks/cluster/controller/__load__.zeek deleted file mode 100644 index 6cd1dc789d..0000000000 --- a/scripts/policy/frameworks/cluster/controller/__load__.zeek +++ /dev/null @@ -1,4 +0,0 @@ -##! The entry point for the cluster controller. It runs bootstrap logic for -##! launching the controller process via Zeek's Supervisor. - -@load ./boot diff --git a/scripts/policy/frameworks/cluster/controller/boot.zeek b/scripts/policy/frameworks/cluster/controller/boot.zeek deleted file mode 100644 index f06a560760..0000000000 --- a/scripts/policy/frameworks/cluster/controller/boot.zeek +++ /dev/null @@ -1,36 +0,0 @@ -##! The cluster controller's boot logic runs in Zeek's supervisor and instructs -##! it to launch the controller process. The controller's main logic resides in -##! main.zeek, similarly to other frameworks. The new process will execute that -##! script. -##! -##! If the current process is not the Zeek supervisor, this does nothing. - -@load ./config - -event zeek_init() - { - if ( ! Supervisor::is_supervisor() ) - return; - - local epi = ClusterController::endpoint_info(); - local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T, - $scripts=vector("policy/frameworks/cluster/controller/main.zeek")); - - if ( ClusterController::directory != "" ) - sn$directory = ClusterController::directory; - if ( ClusterController::stdout_file != "" ) - sn$stdout_file = ClusterController::stdout_file; - if ( ClusterController::stderr_file != "" ) - sn$stderr_file = ClusterController::stderr_file; - - # This helps Zeek run controller and agent with a minimal set of scripts. - sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "CONTROLLER"; - - local res = Supervisor::create(sn); - - if ( res != "" ) - { - print(fmt("error: supervisor could not create controller node: %s", res)); - exit(1); - } - } diff --git a/scripts/policy/frameworks/cluster/controller/config.zeek b/scripts/policy/frameworks/cluster/controller/config.zeek deleted file mode 100644 index de4e570115..0000000000 --- a/scripts/policy/frameworks/cluster/controller/config.zeek +++ /dev/null @@ -1,110 +0,0 @@ -##! Configuration settings for the cluster controller. - -@load policy/frameworks/cluster/agent/config - -module ClusterController; - -export { - ## The name of this controller. Defaults to the value of the - ## ZEEK_CONTROLLER_NAME environment variable. When that is unset and the - ## user doesn't redef the value, the implementation defaults to - ## "controller-". - const name = getenv("ZEEK_CONTROLLER_NAME") &redef; - - ## The controller's stdout log name. If the string is non-empty, Zeek will - ## produce a free-form log (i.e., not one governed by Zeek's logging - ## framework) in Zeek's working directory. If left empty, no such log - ## results. - ## - ## Note that the controller also establishes a "proper" Zeek log via the - ## :zeek:see:`ClusterController::Log` module. - const stdout_file = "controller.stdout" &redef; - - ## The controller's stderr log name. Like :zeek:see:`ClusterController::stdout_file`, - ## but for the stderr stream. - const stderr_file = "controller.stderr" &redef; - - ## The network address the controller listens on. By default this uses - ## the value of the ZEEK_CONTROLLER_ADDR environment variable, but you - ## may also redef to a specific value. When empty, the implementation - ## falls back to :zeek:see:`ClusterController::default_address`. - const listen_address = getenv("ZEEK_CONTROLLER_ADDR") &redef; - - ## The fallback listen address if :zeek:see:`ClusterController::listen_address` - ## remains empty. Unless redefined, this uses Broker's own default - ## listen address. - const default_address = Broker::default_listen_address &redef; - - ## The network port the controller listens on. Counterpart to - ## :zeek:see:`ClusterController::listen_address`, defaulting to the - ## ZEEK_CONTROLLER_PORT environment variable. - const listen_port = getenv("ZEEK_CONTROLLER_PORT") &redef; - - ## The fallback listen port if :zeek:see:`ClusterController::listen_port` - ## remains empty. - const default_port = 2150/tcp &redef; - - ## The controller's connect retry interval. Defaults to a more - ## aggressive value compared to Broker's 30s. - const connect_retry = 1sec &redef; - - ## The controller's Broker topic. Clients send requests to this topic. - const topic = "zeek/cluster-control/controller" &redef; - - ## The role of this process in cluster management. Agent and controller - ## both redefine this. Used during logging. - const role = ClusterController::Types::NONE &redef; - - ## The timeout for request state. Such state (see the :zeek:see:`ClusterController::Request` - ## module) ties together request and response event pairs. The timeout causes - ## its cleanup in the absence of a timely response. It applies both to - ## state kept for client requests, as well as state in the agents for - ## requests to the supervisor. - const request_timeout = 10sec &redef; - - ## An optional custom output directory for the controller's stdout and - ## stderr logs. Agent and controller currently only log locally, not via - ## the data cluster's logger node. (This might change in the future.) - ## This means that if both write to the same log file, the output gets - ## garbled. - const directory = "" &redef; - - ## Returns a :zeek:see:`Broker::NetworkInfo` record describing the controller. - global network_info: function(): Broker::NetworkInfo; - - ## Returns a :zeek:see:`Broker::EndpointInfo` record describing the controller. - global endpoint_info: function(): Broker::EndpointInfo; -} - -function network_info(): Broker::NetworkInfo - { - local ni: Broker::NetworkInfo; - - if ( ClusterController::listen_address != "" ) - ni$address = ClusterController::listen_address; - else if ( ClusterController::default_address != "" ) - ni$address = ClusterController::default_address; - else - ni$address = "127.0.0.1"; - - if ( ClusterController::listen_port != "" ) - ni$bound_port = to_port(ClusterController::listen_port); - else - ni$bound_port = ClusterController::default_port; - - return ni; - } - -function endpoint_info(): Broker::EndpointInfo - { - local epi: Broker::EndpointInfo; - - if ( ClusterController::name != "" ) - epi$id = ClusterController::name; - else - epi$id = fmt("controller-%s", gethostname()); - - epi$network = network_info(); - - return epi; - } diff --git a/scripts/policy/frameworks/cluster/controller/main.zeek b/scripts/policy/frameworks/cluster/controller/main.zeek deleted file mode 100644 index 33e0456049..0000000000 --- a/scripts/policy/frameworks/cluster/controller/main.zeek +++ /dev/null @@ -1,555 +0,0 @@ -##! This is the main "runtime" of the cluster controller. Zeek does not load -##! this directly; rather, the controller's bootstrapping module (in ./boot.zeek) -##! specifies it as the script to run in the node newly created via Zeek's -##! supervisor. - -@load base/frameworks/broker - -@load policy/frameworks/cluster/agent/config -@load policy/frameworks/cluster/agent/api - -@load ./api -@load ./log -@load ./request -@load ./util - -module ClusterController::Runtime; - -redef ClusterController::role = ClusterController::Types::CONTROLLER; - -global check_instances_ready: function(); -global add_instance: function(inst: ClusterController::Types::Instance); -global drop_instance: function(inst: ClusterController::Types::Instance); - -global null_config: function(): ClusterController::Types::Configuration; -global is_null_config: function(config: ClusterController::Types::Configuration): bool; - -# Checks whether the given instance is one that we know with different -# communication settings: a a different peering direction, a different listening -# port, etc. Used as a predicate to indicate when we need to drop the existing -# one from our internal state. -global is_instance_connectivity_change: function - (inst: ClusterController::Types::Instance): bool; - -# The set of agents the controller interacts with to manage to currently -# configured cluster. This may be a subset of all the agents known to the -# controller, as tracked by the g_instances_known set. They key is the instance -# name and should match the $name member of the corresponding instance record. -global g_instances: table[string] of ClusterController::Types::Instance = table(); - -# The set of instances that have checked in with the controller. This is a -# superset of g_instances, since it covers any agent that has sent us a -# notify_agent_hello event. -global g_instances_known: set[string] = set(); - -# A corresponding set of instances/agents that we track in order to understand -# when all of the above instances have sent agent_welcome_response events. (An -# alternative would be to use a record that adds a single state bit for each -# instance, and store that above.) -global g_instances_ready: set[string] = set(); - -# The request ID of the most recent configuration update that's come in from -# a client. We track it here until we know we are ready to communicate with all -# agents required by the update. -global g_config_reqid_pending: string = ""; - -# The most recent configuration we have successfully deployed. This is also -# the one we send whenever the client requests it. -global g_config_current: ClusterController::Types::Configuration; - -function send_config_to_agents(req: ClusterController::Request::Request, - config: ClusterController::Types::Configuration) - { - for ( name in g_instances ) - { - if ( name !in g_instances_ready ) - next; - - local agent_topic = ClusterAgent::topic_prefix + "/" + name; - local areq = ClusterController::Request::create(); - areq$parent_id = req$id; - - # We track the requests sent off to each agent. As the - # responses come in, we can check them off as completed, - # and once all are, we respond back to the client. - req$set_configuration_state$requests += areq; - - # We could also broadcast just once on the agent prefix, but - # explicit request/response pairs for each agent seems cleaner. - ClusterController::Log::info(fmt("tx ClusterAgent::API::set_configuration_request %s to %s", areq$id, name)); - Broker::publish(agent_topic, ClusterAgent::API::set_configuration_request, areq$id, config); - } - } - -# This is the &on_change handler for the g_instances_ready set, meaning -# it runs whenever a required agent has confirmed it's ready. -function check_instances_ready() - { - local cur_instances: set[string]; - - for ( inst in g_instances ) - add cur_instances[inst]; - - if ( cur_instances == g_instances_ready ) - event ClusterController::API::notify_agents_ready(cur_instances); - } - -function add_instance(inst: ClusterController::Types::Instance) - { - g_instances[inst$name] = inst; - - if ( inst?$listen_port ) - Broker::peer(cat(inst$host), inst$listen_port, - ClusterController::connect_retry); - - if ( inst$name in g_instances_known ) - { - # The agent has already peered with us. Send welcome to indicate - # it's part of cluster management. Once it responds, we update - # the set of ready instances and proceed as feasible with config - # deployments. - - local req = ClusterController::Request::create(); - - ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_request to %s", inst$name)); - Broker::publish(ClusterAgent::topic_prefix + "/" + inst$name, - ClusterAgent::API::agent_welcome_request, req$id); - } - } - -function drop_instance(inst: ClusterController::Types::Instance) - { - if ( inst$name !in g_instances ) - return; - - # Send the agent a standby so it shuts down its cluster nodes & state - ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_standby_request to %s", inst$name)); - Broker::publish(ClusterAgent::topic_prefix + "/" + inst$name, - ClusterAgent::API::agent_standby_request, ""); - - delete g_instances[inst$name]; - - if ( inst$name in g_instances_ready ) - delete g_instances_ready[inst$name]; - - # The agent remains in g_instances_known, to track that we're able - # to communicate with it in case it's required again. - - ClusterController::Log::info(fmt("dropped instance %s", inst$name)); - } - -function null_config(): ClusterController::Types::Configuration - { - return ClusterController::Types::Configuration($id=""); - } - -function is_null_config(config: ClusterController::Types::Configuration): bool - { - return config$id == ""; - } - -function is_instance_connectivity_change(inst: ClusterController::Types::Instance): bool - { - # If we're not tracking this instance as part of a cluster config, it's - # not a change. (More precisely: we cannot say whether it's changed.) - if ( inst$name !in g_instances ) - return F; - - # The agent has peered with us and now uses a different host. - # XXX 0.0.0.0 is a workaround until we've resolved how agents that peer - # with us obtain their identity. Broker ID? - if ( inst$host != 0.0.0.0 && inst$host != g_instances[inst$name]$host ) - return T; - - # The agent has a listening port and the one we know does not, or vice - # versa. I.e., this is a change in the intended peering direction. - if ( inst?$listen_port != g_instances[inst$name]?$listen_port ) - return T; - - # Both have listening ports, but they differ. - if ( inst?$listen_port && g_instances[inst$name]?$listen_port && - inst$listen_port != g_instances[inst$name]$listen_port ) - return T; - - return F; - } - -event ClusterController::API::notify_agents_ready(instances: set[string]) - { - local insts = ClusterController::Util::set_to_vector(instances); - - ClusterController::Log::info(fmt("rx ClusterController::API:notify_agents_ready %s", join_string_vec(insts, ","))); - - local req = ClusterController::Request::lookup(g_config_reqid_pending); - - # If there's no pending request, when it's no longer available, or it - # doesn't have config state, don't do anything else. - if ( ClusterController::Request::is_null(req) || ! req?$set_configuration_state ) - return; - - # All instances requested in the pending configuration update are now - # known to us. Send them the config. As they send their response events - # we update the client's request state and eventually send the response - # event to the it. - send_config_to_agents(req, req$set_configuration_state$config); - } - -event ClusterAgent::API::notify_agent_hello(instance: string, host: addr, api_version: count) - { - ClusterController::Log::info(fmt("rx ClusterAgent::API::notify_agent_hello %s %s", instance, host)); - - # When an agent checks in with a mismatching API version, we log the - # fact and drop its state, if any. - if ( api_version != ClusterController::API::version ) - { - ClusterController::Log::warning( - fmt("instance %s/%s has checked in with incompatible API version %s", - instance, host, api_version)); - - if ( instance in g_instances ) - drop_instance(g_instances[instance]); - if ( instance in g_instances_known ) - delete g_instances_known[instance]; - - return; - } - - add g_instances_known[instance]; - - if ( instance in g_instances && instance !in g_instances_ready ) - { - # We need this instance for our cluster and have full context for - # it from the configuration. Tell agent. - local req = ClusterController::Request::create(); - - ClusterController::Log::info(fmt("tx ClusterAgent::API::agent_welcome_request to %s", instance)); - Broker::publish(ClusterAgent::topic_prefix + "/" + instance, - ClusterAgent::API::agent_welcome_request, req$id); - } - } - -event ClusterAgent::API::agent_welcome_response(reqid: string, result: ClusterController::Types::Result) - { - ClusterController::Log::info(fmt("rx ClusterAgent::API::agent_welcome_response %s", reqid)); - - local req = ClusterController::Request::lookup(reqid); - - if ( ClusterController::Request::is_null(req) ) - return; - - ClusterController::Request::finish(req$id); - - # An agent we've been waiting to hear back from is ready for cluster - # work. Double-check we still want it, otherwise drop it. - - if ( ! result$success || result$instance !in g_instances ) - { - ClusterController::Log::info(fmt( - "tx ClusterAgent::API::agent_standby_request to %s", result$instance)); - Broker::publish(ClusterAgent::topic_prefix + "/" + result$instance, - ClusterAgent::API::agent_standby_request, ""); - return; - } - - add g_instances_ready[result$instance]; - ClusterController::Log::info(fmt("instance %s ready", result$instance)); - - check_instances_ready(); - } - -event ClusterAgent::API::notify_change(instance: string, n: ClusterController::Types::Node, - old: ClusterController::Types::State, - new: ClusterController::Types::State) - { - # XXX TODO - } - -event ClusterAgent::API::notify_error(instance: string, msg: string, node: string) - { - # XXX TODO - } - -event ClusterAgent::API::notify_log(instance: string, msg: string, node: string) - { - # XXX TODO - } - -event ClusterAgent::API::set_configuration_response(reqid: string, result: ClusterController::Types::Result) - { - ClusterController::Log::info(fmt("rx ClusterAgent::API::set_configuration_response %s", reqid)); - - # Retrieve state for the request we just got a response to - local areq = ClusterController::Request::lookup(reqid); - if ( ClusterController::Request::is_null(areq) ) - return; - - # Record the result and mark the request as done. This also - # marks the request as done in the parent-level request, since - # these records are stored by reference. - areq$results[0] = result; # We only have a single result here atm - areq$finished = T; - - # Update the original request from the client: - local req = ClusterController::Request::lookup(areq$parent_id); - if ( ClusterController::Request::is_null(req) ) - return; - - # If there are any requests to the agents still unfinished, - # we're not done yet. - for ( i in req$set_configuration_state$requests ) - if ( ! req$set_configuration_state$requests[i]$finished ) - return; - - # All set_configuration requests to instances are done, so respond - # back to client. We need to compose the result, aggregating - # the results we got from the requests to the agents. In the - # end we have one Result per instance requested in the - # original set_configuration_request. - # - # XXX we can likely generalize result aggregation in the request module. - for ( i in req$set_configuration_state$requests ) - { - local r = req$set_configuration_state$requests[i]; - - local success = T; - local errors: string_vec; - local instance = ""; - - for ( j in r$results ) - { - local res = r$results[j]; - instance = res$instance; - - if ( res$success ) - next; - - success = F; - errors += fmt("node %s failed: %s", res$node, res$error); - } - - req$results += ClusterController::Types::Result( - $reqid = req$id, - $instance = instance, - $success = success, - $error = join_string_vec(errors, ", ") - ); - - ClusterController::Request::finish(r$id); - } - - # We're now done with the original set_configuration request. - # Adopt the configuration as the current one. - g_config_current = req$set_configuration_state$config; - g_config_reqid_pending = ""; - - ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", - ClusterController::Request::to_string(req))); - event ClusterController::API::set_configuration_response(req$id, req$results); - ClusterController::Request::finish(req$id); - } - -event ClusterController::API::set_configuration_request(reqid: string, config: ClusterController::Types::Configuration) - { - ClusterController::Log::info(fmt("rx ClusterController::API::set_configuration_request %s", reqid)); - - local res: ClusterController::Types::Result; - local req = ClusterController::Request::create(reqid); - - req$set_configuration_state = ClusterController::Request::SetConfigurationState($config = config); - - # At the moment there can only be one pending request. - if ( g_config_reqid_pending != "" ) - { - res = ClusterController::Types::Result($reqid=reqid); - res$success = F; - res$error = fmt("request %s still pending", g_config_reqid_pending); - req$results += res; - - ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", - ClusterController::Request::to_string(req))); - event ClusterController::API::set_configuration_response(req$id, req$results); - ClusterController::Request::finish(req$id); - return; - } - - # XXX validate the configuration: - # - Are node instances among defined instances? - # - Are all names unique? - # - Are any node options understood? - # - Do node types with optional fields have required values? - # ... - - # The incoming request is now the pending one. It gets cleared when all - # agents have processed their config updates successfully, or their - # responses time out. - g_config_reqid_pending = req$id; - - # Compare the instance configuration to our current one. If it matches, - # we can proceed to deploying the new data cluster topology. If it does - # not, we need to establish connectivity with agents we connect to, or - # wait until all instances that connect to us have done so. Either triggers - # a notify_agents_ready event, upon which we then deploy the data cluster. - - # The current & new set of instance names. - local insts_current: set[string]; - local insts_new: set[string]; - - # A set of current instances not contained in the new config. - # Those will need to get dropped. - local insts_to_drop: set[string]; - - # The opposite: new instances not yet in our current set. Those we will need - # to establish contact with (or they with us). - local insts_to_add: set[string]; - - # The overlap: instances in both the current and new set. For those we verify - # that we're actually dealign with the same entities, and might need to re- - # connect if not. - local insts_to_keep: set[string]; - - # Alternative representation of insts_to_add, directly providing the instances. - local insts_to_peer: table[string] of ClusterController::Types::Instance; - - # Helpful locals. - local inst_name: string; - local inst: ClusterController::Types::Instance; - - for ( inst_name in g_instances ) - add insts_current[inst_name]; - for ( inst in config$instances ) - add insts_new[inst$name]; - - # Populate TODO lists for instances we need to drop, check, or add. - insts_to_drop = insts_current - insts_new; - insts_to_add = insts_new - insts_current; - insts_to_keep = insts_new & insts_current; - - for ( inst in config$instances ) - { - if ( inst$name in insts_to_add ) - { - insts_to_peer[inst$name] = inst; - next; - } - - # Focus on the keepers: check for change in identity/location. - if ( inst$name !in insts_to_keep ) - next; - - if ( is_instance_connectivity_change(inst) ) - { - # The endpoint looks different. We drop the current one - # and need to re-establish connectivity with the new - # one. - add insts_to_drop[inst$name]; - add insts_to_add[inst$name]; - } - } - - # Process our TODO lists. Handle drops first, then additions, in - # case we need to re-establish connectivity with an agent. - - for ( inst_name in insts_to_drop ) - drop_instance(g_instances[inst_name]); - for ( inst_name in insts_to_peer ) - add_instance(insts_to_peer[inst_name]); - - # Updates to out instance tables are complete, now check if we're already - # able to send the config to the agents: - check_instances_ready(); - } - -event ClusterController::API::get_instances_request(reqid: string) - { - ClusterController::Log::info(fmt("rx ClusterController::API::set_instances_request %s", reqid)); - - local res = ClusterController::Types::Result($reqid = reqid); - local insts: vector of ClusterController::Types::Instance; - - for ( i in g_instances ) - insts += g_instances[i]; - - res$data = insts; - - ClusterController::Log::info(fmt("tx ClusterController::API::get_instances_response %s", reqid)); - event ClusterController::API::get_instances_response(reqid, res); - } - -event ClusterController::Request::request_expired(req: ClusterController::Request::Request) - { - # Various handlers for timed-out request state. We use the state members - # to identify how to respond. No need to clean up the request itself, - # since we're getting here via the request module's expiration - # mechanism that handles the cleanup. - local res: ClusterController::Types::Result; - - if ( req?$set_configuration_state ) - { - # This timeout means we no longer have a pending request. - g_config_reqid_pending = ""; - - res = ClusterController::Types::Result($reqid=req$id); - res$success = F; - res$error = "request timed out"; - req$results += res; - - ClusterController::Log::info(fmt("tx ClusterController::API::set_configuration_response %s", - ClusterController::Request::to_string(req))); - event ClusterController::API::set_configuration_response(req$id, req$results); - } - - if ( req?$test_state ) - { - res = ClusterController::Types::Result($reqid=req$id); - res$success = F; - res$error = "request timed out"; - - ClusterController::Log::info(fmt("tx ClusterController::API::test_timeout_response %s", req$id)); - event ClusterController::API::test_timeout_response(req$id, res); - } - } - -event ClusterController::API::test_timeout_request(reqid: string, with_state: bool) - { - ClusterController::Log::info(fmt("rx ClusterController::API::test_timeout_request %s %s", reqid, with_state)); - - if ( with_state ) - { - # This state times out and triggers a timeout response in the - # above request_expired event handler. - local req = ClusterController::Request::create(reqid); - req$test_state = ClusterController::Request::TestState(); - } - } - -event zeek_init() - { - # Initialize null config at startup. We will replace it once we have - # persistence, and again whenever we complete a client's - # set_configuration request. - g_config_current = null_config(); - - # The controller always listens -- it needs to be able to respond to the - # Zeek client. This port is also used by the agents if they connect to - # the client. The client doesn't automatically establish or accept - # connectivity to agents: agents are defined and communicated with as - # defined via configurations defined by the client. - - local cni = ClusterController::network_info(); - - Broker::listen(cat(cni$address), cni$bound_port); - - Broker::subscribe(ClusterAgent::topic_prefix); - Broker::subscribe(ClusterController::topic); - - # Events sent to the client: - - Broker::auto_publish(ClusterController::topic, - ClusterController::API::get_instances_response); - Broker::auto_publish(ClusterController::topic, - ClusterController::API::set_configuration_response); - Broker::auto_publish(ClusterController::topic, - ClusterController::API::test_timeout_response); - - ClusterController::Log::info("controller is live"); - } diff --git a/scripts/policy/frameworks/management/__load__.zeek b/scripts/policy/frameworks/management/__load__.zeek new file mode 100644 index 0000000000..96192ea366 --- /dev/null +++ b/scripts/policy/frameworks/management/__load__.zeek @@ -0,0 +1,11 @@ +##! This loads Management framework functionality needed by both the controller +##! and agents. Note that there's no notion of loading "the Management +##! framework" -- one always loads "management/controller" or +##! "management/agent". This __load__ script exists only to simplify loading all +##! common functionality. + +@load ./config +@load ./log +@load ./request +@load ./types +@load ./util diff --git a/scripts/policy/frameworks/management/agent/__load__.zeek b/scripts/policy/frameworks/management/agent/__load__.zeek new file mode 100644 index 0000000000..57cefe0757 --- /dev/null +++ b/scripts/policy/frameworks/management/agent/__load__.zeek @@ -0,0 +1,4 @@ +##! The entry point for the Management framework's cluster agent. It runs +##! bootstrap logic for launching the agent process via Zeek's Supervisor. + +@load ./boot diff --git a/scripts/policy/frameworks/cluster/agent/api.zeek b/scripts/policy/frameworks/management/agent/api.zeek similarity index 56% rename from scripts/policy/frameworks/cluster/agent/api.zeek rename to scripts/policy/frameworks/management/agent/api.zeek index 7957677457..8ba47ee67d 100644 --- a/scripts/policy/frameworks/cluster/agent/api.zeek +++ b/scripts/policy/frameworks/management/agent/api.zeek @@ -4,16 +4,15 @@ ##! "_response", respectively. @load base/frameworks/supervisor/control -@load policy/frameworks/cluster/controller/types +@load policy/frameworks/management/types -module ClusterAgent::API; +module Management::Agent::API; export { ## A simple versioning scheme, used to track basic compatibility of ## controller and agent. const version = 1; - # Agent API events ## The controller sends this event to convey a new cluster configuration @@ -22,14 +21,14 @@ export { ## ## reqid: a request identifier string, echoed in the response event. ## - ## config: a :zeek:see:`ClusterController::Types::Configuration` record + ## config: a :zeek:see:`Management::Configuration` record ## describing the cluster topology. Note that this contains the full ## topology, not just the part pertaining to this agent. That's because ## the cluster framework requires full cluster visibility to establish ## the needed peerings. ## global set_configuration_request: event(reqid: string, - config: ClusterController::Types::Configuration); + config: Management::Configuration); ## Response to a set_configuration_request event. The agent sends ## this back to the controller. @@ -39,7 +38,62 @@ export { ## result: the result record. ## global set_configuration_response: event(reqid: string, - result: ClusterController::Types::Result); + result: Management::Result); + + + ## The controller sends this event to request a list of + ## :zeek:see:`Management::NodeStatus` records that capture + ## the status of Supervisor-managed nodes running on this instance. + ## instances. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + global get_nodes_request: event(reqid: string); + + ## Response to a get_nodes_request event. The agent sends this back to the + ## controller. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: a :zeek:see:`Management::Result` record. Its data + ## member is a vector of :zeek:see:`Management::NodeStatus` + ## records, covering the nodes at this instance. The result may also + ## indicate failure, with error messages indicating what went wrong. + ## + global get_nodes_response: event(reqid: string, result: Management::Result); + + + ## The controller sends this to every agent to request a dispatch (the + ## execution of a pre-implemented activity) to all cluster nodes. This + ## is the generic controller-agent "back-end" implementation of explicit + ## client-controller "front-end" interactions, including: + ## + ## - :zeek:see:`Management::Controller::API::get_id_value_request`: two + ## arguments, the first being "get_id_value" and the second the name + ## of the ID to look up. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + ## action: the requested dispatch command, with any arguments. + ## + ## nodes: a set of cluster node names (e.g. "worker-01") to retrieve + ## the values from. An empty set, supplied by default, means + ## retrieval from all nodes managed by the agent. + global node_dispatch_request: event(reqid: string, action: vector of string, + nodes: set[string] &default=set()); + + ## Response to a node_dispatch_request event. Each agent sends this back + ## to the controller to report the dispatch outcomes on all nodes managed + ## by that agent. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: a :zeek:type:`vector` of :zeek:see:`Management::Result` + ## records. Each record covers one Zeek cluster node managed by this + ## agent. Upon success, each :zeek:see:`Management::Result` record's + ## data member contains the dispatches' response in a data type + ## appropriate for the respective dispatch. + global node_dispatch_response: event(reqid: string, result: Management::ResultVec); ## The controller sends this event to confirm to the agent that it is @@ -58,7 +112,7 @@ export { ## result: the result record. ## global agent_welcome_response: event(reqid: string, - result: ClusterController::Types::Result); + result: Management::Result); ## The controller sends this event to convey that the agent is not @@ -81,7 +135,7 @@ export { ## result: the result record. ## global agent_standby_response: event(reqid: string, - result: ClusterController::Types::Result); + result: Management::Result); # Notification events, agent -> controller @@ -91,7 +145,7 @@ export { ## communicate with. It is a controller-level equivalent of ## `:zeek:see:`Broker::peer_added`. ## - ## instance: an instance name, really the agent's name as per :zeek:see:`ClusterAgent::name`. + ## instance: an instance name, really the agent's name as per :zeek:see:`Management::Agent::name`. ## ## host: the IP address of the agent. (This may change in the future.) ## @@ -105,9 +159,9 @@ export { # Report node state changes. global notify_change: event(instance: string, - n: ClusterController::Types::Node, - old: ClusterController::Types::State, - new: ClusterController::Types::State); + n: Management::Node, + old: Management::State, + new: Management::State); # Report operational error. global notify_error: event(instance: string, msg: string, node: string &default=""); diff --git a/scripts/policy/frameworks/cluster/agent/boot.zeek b/scripts/policy/frameworks/management/agent/boot.zeek similarity index 63% rename from scripts/policy/frameworks/cluster/agent/boot.zeek rename to scripts/policy/frameworks/management/agent/boot.zeek index daff5b2d24..7b8bedd088 100644 --- a/scripts/policy/frameworks/cluster/agent/boot.zeek +++ b/scripts/policy/frameworks/management/agent/boot.zeek @@ -1,5 +1,5 @@ ##! The cluster agent boot logic runs in Zeek's supervisor and instructs it to -##! launch an agent process. The agent's main logic resides in main.zeek, +##! launch a Management agent process. The agent's main logic resides in main.zeek, ##! similarly to other frameworks. The new process will execute that script. ##! ##! If the current process is not the Zeek supervisor, this does nothing. @@ -17,16 +17,16 @@ event zeek_init() if ( ! Supervisor::is_supervisor() ) return; - local epi = ClusterAgent::endpoint_info(); + local epi = Management::Agent::endpoint_info(); local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T, - $scripts=vector("policy/frameworks/cluster/agent/main.zeek")); + $scripts=vector("policy/frameworks/management/agent/main.zeek")); - if ( ClusterAgent::directory != "" ) - sn$directory = ClusterAgent::directory; - if ( ClusterAgent::stdout_file_suffix != "" ) - sn$stdout_file = epi$id + "." + ClusterAgent::stdout_file_suffix; - if ( ClusterAgent::stderr_file_suffix != "" ) - sn$stderr_file = epi$id + "." + ClusterAgent::stderr_file_suffix; + if ( Management::Agent::directory != "" ) + sn$directory = Management::Agent::directory; + if ( Management::Agent::stdout_file_suffix != "" ) + sn$stdout_file = epi$id + "." + Management::Agent::stdout_file_suffix; + if ( Management::Agent::stderr_file_suffix != "" ) + sn$stderr_file = epi$id + "." + Management::Agent::stderr_file_suffix; # This helps Zeek run controller and agent with a minimal set of scripts. sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "AGENT"; diff --git a/scripts/policy/frameworks/cluster/agent/config.zeek b/scripts/policy/frameworks/management/agent/config.zeek similarity index 56% rename from scripts/policy/frameworks/cluster/agent/config.zeek rename to scripts/policy/frameworks/management/agent/config.zeek index 732dc39450..29567b8f5e 100644 --- a/scripts/policy/frameworks/cluster/agent/config.zeek +++ b/scripts/policy/frameworks/management/agent/config.zeek @@ -1,8 +1,9 @@ ##! Configuration settings for a cluster agent. -@load policy/frameworks/cluster/controller/types +@load policy/frameworks/management/config +@load policy/frameworks/management/types -module ClusterAgent; +module Management::Agent; export { ## The name this agent uses to represent the cluster instance it @@ -14,55 +15,49 @@ export { ## Agent stdout log configuration. If the string is non-empty, Zeek will ## produce a free-form log (i.e., not one governed by Zeek's logging ## framework) in Zeek's working directory. The final log's name is - ## ".", where the name is taken from :zeek:see:`ClusterAgent::name`, + ## ".", where the name is taken from :zeek:see:`Management::Agent::name`, ## and the suffix is defined by the following variable. If left empty, ## no such log results. ## ## Note that the agent also establishes a "proper" Zeek log via the - ## :zeek:see:`ClusterController::Log` module. + ## :zeek:see:`Management::Log` module. const stdout_file_suffix = "agent.stdout" &redef; - ## Agent stderr log configuration. Like :zeek:see:`ClusterAgent::stdout_file_suffix`, + ## Agent stderr log configuration. Like :zeek:see:`Management::Agent::stdout_file_suffix`, ## but for the stderr stream. const stderr_file_suffix = "agent.stderr" &redef; ## The network address the agent listens on. This only takes effect if ## the agent isn't configured to connect to the controller (see - ## :zeek:see:`ClusterAgent::controller`). By default this uses the value of the + ## :zeek:see:`Management::Agent::controller`). By default this uses the value of the ## ZEEK_AGENT_ADDR environment variable, but you may also redef to ## a specific value. When empty, the implementation falls back to - ## :zeek:see:`ClusterAgent::default_address`. + ## :zeek:see:`Management::default_address`. const listen_address = getenv("ZEEK_AGENT_ADDR") &redef; - ## The fallback listen address if :zeek:see:`ClusterAgent::listen_address` - ## remains empty. Unless redefined, this uses Broker's own default listen - ## address. - const default_address = Broker::default_listen_address &redef; - ## The network port the agent listens on. Counterpart to - ## :zeek:see:`ClusterAgent::listen_address`, defaulting to the ZEEK_AGENT_PORT + ## :zeek:see:`Management::Agent::listen_address`, defaulting to the ZEEK_AGENT_PORT ## environment variable. const listen_port = getenv("ZEEK_AGENT_PORT") &redef; - ## The fallback listen port if :zeek:see:`ClusterAgent::listen_port` remains empty. + ## The fallback listen port if :zeek:see:`Management::Agent::listen_port` remains empty. const default_port = 2151/tcp &redef; ## The agent's Broker topic prefix. For its own communication, the agent - ## suffixes this with "/", based on :zeek:see:`ClusterAgent::name`. - const topic_prefix = "zeek/cluster-control/agent" &redef; + ## suffixes this with "/", based on :zeek:see:`Management::Agent::name`. + const topic_prefix = "zeek/management/agent" &redef; ## The network coordinates of the controller. When defined, the agent ## peers with (and connects to) the controller; otherwise the controller ## will peer (and connect to) the agent, listening as defined by - ## :zeek:see:`ClusterAgent::listen_address` and :zeek:see:`ClusterAgent::listen_port`. + ## :zeek:see:`Management::Agent::listen_address` and :zeek:see:`Management::Agent::listen_port`. const controller: Broker::NetworkInfo = [ $address="0.0.0.0", $bound_port=0/unknown] &redef; - ## An optional custom output directory for the agent's stdout and stderr - ## logs. Agent and controller currently only log locally, not via the - ## data cluster's logger node. (This might change in the future.) This - ## means that if both write to the same log file, the output gets - ## garbled. + ## An optional custom output directory for stdout/stderr. Agent and + ## controller currently only log locally, not via the data cluster's + ## logger node. This means that if both write to the same log file, + ## output gets garbled. const directory = "" &redef; ## The working directory for data cluster nodes created by this @@ -71,20 +66,20 @@ export { ## cluster nodes. const cluster_directory = "" &redef; - ## Returns a :zeek:see:`ClusterController::Types::Instance` describing this + ## Returns a :zeek:see:`Management::Instance` describing this ## instance (its agent name plus listening address/port, as applicable). - global instance: function(): ClusterController::Types::Instance; + global instance: function(): Management::Instance; ## Returns a :zeek:see:`Broker::EndpointInfo` record for this instance. - ## Similar to :zeek:see:`ClusterAgent::instance`, but with slightly different + ## Similar to :zeek:see:`Management::Agent::instance`, but with slightly different ## data format. global endpoint_info: function(): Broker::EndpointInfo; } -function instance(): ClusterController::Types::Instance +function instance(): Management::Instance { local epi = endpoint_info(); - return ClusterController::Types::Instance($name=epi$id, + return Management::Instance($name=epi$id, $host=to_addr(epi$network$address), $listen_port=epi$network$bound_port); } @@ -94,22 +89,22 @@ function endpoint_info(): Broker::EndpointInfo local epi: Broker::EndpointInfo; local network: Broker::NetworkInfo; - if ( ClusterAgent::name != "" ) - epi$id = ClusterAgent::name; + if ( Management::Agent::name != "" ) + epi$id = Management::Agent::name; else epi$id = fmt("agent-%s", gethostname()); - if ( ClusterAgent::listen_address != "" ) - network$address = ClusterAgent::listen_address; - else if ( ClusterAgent::default_address != "" ) - network$address = ClusterAgent::default_address; + if ( Management::Agent::listen_address != "" ) + network$address = Management::Agent::listen_address; + else if ( Management::default_address != "" ) + network$address = Management::default_address; else network$address = "127.0.0.1"; - if ( ClusterAgent::listen_port != "" ) - network$bound_port = to_port(ClusterAgent::listen_port); + if ( Management::Agent::listen_port != "" ) + network$bound_port = to_port(Management::Agent::listen_port); else - network$bound_port = ClusterAgent::default_port; + network$bound_port = Management::Agent::default_port; epi$network = network; diff --git a/scripts/policy/frameworks/management/agent/main.zeek b/scripts/policy/frameworks/management/agent/main.zeek new file mode 100644 index 0000000000..45993b29c4 --- /dev/null +++ b/scripts/policy/frameworks/management/agent/main.zeek @@ -0,0 +1,586 @@ +##! This is the main "runtime" of a cluster agent. Zeek does not load this +##! directly; rather, the agent's bootstrapping module (in ./boot.zeek) +##! specifies it as the script to run in the node newly created via Zeek's +##! supervisor. + +@load base/frameworks/broker +@load policy/frameworks/management +@load policy/frameworks/management/node/api +@load policy/frameworks/management/node/config + +@load ./api +@load ./config + +module Mangement::Agent::Runtime; + +# This export is mainly to appease Zeekygen's need to understand redefs of the +# Request record below. Without it, it fails to establish link targets for the +# tucked-on types. +export { + ## Request state specific to the agent's Supervisor interactions. + type SupervisorState: record { + node: string; ##< Name of the node the Supervisor is acting on. + }; + + ## Request state for node dispatches, tracking the requested action + ## as well as received responses. + type NodeDispatchState: record { + ## The dispatched action. The first string is a command, + ## any remaining strings its arguments. + action: vector of string; + + ## Request state for every node managed by this agent. + requests: set[string] &default=set(); + }; +} + +redef record Management::Request::Request += { + supervisor_state: SupervisorState &optional; + node_dispatch_state: NodeDispatchState &optional; +}; + +# Tag our logs correctly +redef Management::Log::role = Management::AGENT; + +# The global configuration as passed to us by the controller +global g_config: Management::Configuration; + +# A map to make other instance info accessible +global g_instances: table[string] of Management::Instance; + +# A map for the nodes we run on this instance, via this agent. +global g_nodes: table[string] of Management::Node; + +# The complete node map employed by the supervisor to describe the cluster +# topology to newly forked nodes. We refresh it when we receive new +# configurations. +global g_cluster: table[string] of Supervisor::ClusterEndpoint; + + +function agent_topic(): string + { + local epi = Management::Agent::endpoint_info(); + return Management::Agent::topic_prefix + "/" + epi$id; + } + +event SupervisorControl::create_response(reqid: string, result: string) + { + local req = Management::Request::lookup(reqid); + if ( Management::Request::is_null(req) ) + return; + + local name = req$supervisor_state$node; + + if ( |result| > 0 ) + { + local msg = fmt("failed to create node %s: %s", name, result); + Management::Log::error(msg); + Broker::publish(agent_topic(), + Management::Agent::API::notify_error, + Management::Agent::name, msg, name); + } + + Management::Request::finish(reqid); + } + +event SupervisorControl::destroy_response(reqid: string, result: bool) + { + local req = Management::Request::lookup(reqid); + if ( Management::Request::is_null(req) ) + return; + + local name = req$supervisor_state$node; + + if ( ! result ) + { + local msg = fmt("failed to destroy node %s, %s", name, reqid); + Management::Log::error(msg); + Broker::publish(agent_topic(), + Management::Agent::API::notify_error, + Management::Agent::name, msg, name); + } + + Management::Request::finish(reqid); + } + +function supervisor_create(nc: Supervisor::NodeConfig) + { + local req = Management::Request::create(); + req$supervisor_state = SupervisorState($node = nc$name); + Broker::publish(SupervisorControl::topic_prefix, + SupervisorControl::create_request, req$id, nc); + Management::Log::info(fmt("issued supervisor create for %s, %s", nc$name, req$id)); + } + +function supervisor_destroy(node: string) + { + local req = Management::Request::create(); + req$supervisor_state = SupervisorState($node = node); + Broker::publish(SupervisorControl::topic_prefix, + SupervisorControl::destroy_request, req$id, node); + Management::Log::info(fmt("issued supervisor destroy for %s, %s", node, req$id)); + } + +event Management::Agent::API::set_configuration_request(reqid: string, config: Management::Configuration) + { + Management::Log::info(fmt("rx Management::Agent::API::set_configuration_request %s", reqid)); + + local nodename: string; + local node: Management::Node; + local nc: Supervisor::NodeConfig; + local msg: string; + + # Adopt the global configuration provided. + # XXX this can later handle validation and persistence + # XXX should do this transactionally, only set when all else worked + g_config = config; + + # Refresh the instances table: + g_instances = table(); + for ( inst in config$instances ) + g_instances[inst$name] = inst; + + # Terminate existing nodes + for ( nodename in g_nodes ) + supervisor_destroy(nodename); + + # Refresh the cluster and nodes tables + g_nodes = table(); + g_cluster = table(); + + for ( node in config$nodes ) + { + if ( node$instance == Management::Agent::name ) + g_nodes[node$name] = node; + + # The cluster and supervisor frameworks require a port for every + # node, using 0/unknown to signify "don't listen". We use + # optional values and map an absent value to 0/unknown. + local p = 0/unknown; + + if ( node?$p ) + p = node$p; + + local cep = Supervisor::ClusterEndpoint( + $role = node$role, + $host = g_instances[node$instance]$host, + $p = p); + + if ( node?$interface ) + cep$interface = node$interface; + + g_cluster[node$name] = cep; + } + + # Apply the new configuration via the supervisor + + for ( nodename in g_nodes ) + { + node = g_nodes[nodename]; + node$state = Management::PENDING; + + nc = Supervisor::NodeConfig($name=nodename); + + if ( Management::Agent::cluster_directory != "" ) + nc$directory = Management::Agent::cluster_directory; + + if ( node?$interface ) + nc$interface = node$interface; + if ( node?$cpu_affinity ) + nc$cpu_affinity = node$cpu_affinity; + if ( node?$scripts ) + nc$scripts = node$scripts; + if ( node?$env ) + nc$env = node$env; + + # Always add the policy/management/node scripts to any cluster + # node, since we require it to be able to communicate with the + # node. + nc$scripts[|nc$scripts|] = "policy/frameworks/management/node"; + + # XXX could use options to enable per-node overrides for + # directory, stdout, stderr, others? + + nc$cluster = g_cluster; + supervisor_create(nc); + } + + # XXX this currently doesn not fail if any of above problems occurred, + # mainly due to the tediousness of handling the supervisor's response + # events asynchonously. The only indication of error will be + # notification events to the controller. + + if ( reqid != "" ) + { + local res = Management::Result( + $reqid = reqid, + $instance = Management::Agent::name); + + Management::Log::info(fmt("tx Management::Agent::API::set_configuration_response %s", + Management::result_to_string(res))); + Broker::publish(agent_topic(), + Management::Agent::API::set_configuration_response, reqid, res); + } + } + +event SupervisorControl::status_response(reqid: string, result: Supervisor::Status) + { + local req = Management::Request::lookup(reqid); + if ( Management::Request::is_null(req) ) + return; + + Management::Request::finish(reqid); + + local res = Management::Result( + $reqid = req$parent_id, $instance = Management::Agent::name); + + local node_statuses: Management::NodeStatusVec; + + for ( node in result$nodes ) + { + local sns = result$nodes[node]; # Supervisor node status + local cns = Management::NodeStatus( + $node=node, $state=Management::PENDING); + + # Identify the role of the node. For cluster roles (worker, + # manager, etc) we derive this from the cluster node table. For + # agent and controller, we identify via environment variables + # that the controller framework establishes upon creation (see + # the respective boot.zeek scripts). + if ( node in sns$node$cluster ) + { + cns$cluster_role = sns$node$cluster[node]$role; + + # For cluster nodes, copy run state from g_nodes, our + # live node status table. + if ( node in g_nodes ) + cns$state = g_nodes[node]$state; + + # The supervisor's responses use 0/tcp (not 0/unknown) + # when indicating an unused port because its internal + # serialization always assumes TCP. + if ( sns$node$cluster[node]$p != 0/tcp ) + cns$p = sns$node$cluster[node]$p; + } + else + { + if ( "ZEEK_CLUSTER_MGMT_NODE" in sns$node$env ) + { + local role = sns$node$env["ZEEK_CLUSTER_MGMT_NODE"]; + if ( role == "CONTROLLER" ) + { + cns$mgmt_role = Management::CONTROLLER; + + # Automatically declare the controller in running state + # here -- we'd not have received a request that brought + # us here otherwise. + cns$state = Management::RUNNING; + + # The controller always listens, so the Zeek client can connect. + cns$p = Management::Agent::endpoint_info()$network$bound_port; + } + else if ( role == "AGENT" ) + { + cns$mgmt_role = Management::AGENT; + + # Similarly to above, always declare agent running. We are. :) + cns$state = Management::RUNNING; + + # If we have a controller address, the agent connects to it + # and does not listen. See zeek_init() below for similar logic. + if ( Management::Agent::controller$address == "0.0.0.0" ) + cns$p = Management::Agent::endpoint_info()$network$bound_port; + } + else + Management::Log::warning(fmt( + "unexpected cluster management node type '%'", role)); + } + } + + # A PID is available if a supervised node has fully launched. + if ( sns?$pid ) + cns$pid = sns$pid; + + node_statuses += cns; + } + + res$data = node_statuses; + + Management::Log::info(fmt("tx Management::Agent::API::get_nodes_response %s", + Management::result_to_string(res))); + Broker::publish(agent_topic(), + Management::Agent::API::get_nodes_response, req$parent_id, res); + } + +event Management::Agent::API::get_nodes_request(reqid: string) + { + Management::Log::info(fmt("rx Management::Agent::API::get_nodes_request %s", reqid)); + + local req = Management::Request::create(); + req$parent_id = reqid; + + Broker::publish(SupervisorControl::topic_prefix, + SupervisorControl::status_request, req$id, ""); + Management::Log::info(fmt("issued supervisor status, %s", req$id)); + } + +event Management::Node::API::node_dispatch_response(reqid: string, result: Management::Result) + { + local node = "unknown node"; + if ( result?$node ) + node = result$node; + + Management::Log::info(fmt("rx Management::Node::API::node_dispatch_response %s from %s", reqid, node)); + + # Retrieve state for the request we just got a response to + local nreq = Management::Request::lookup(reqid); + if ( Management::Request::is_null(nreq) ) + return; + + # Find the original request from the controller + local req = Management::Request::lookup(nreq$parent_id); + if ( Management::Request::is_null(req) ) + return; + + # Mark the responding node as done. Nodes normally fill their own name + # into the result; we only double-check for resilience. Nodes failing to + # report themselves would eventually lead to request timeout. + if ( result?$node ) + { + if ( result$node in req$node_dispatch_state$requests ) + delete req$node_dispatch_state$requests[result$node]; + else + { + # An unknown or duplicate response -- do nothing. + Management::Log::debug(fmt("response %s not expected, ignoring", reqid)); + return; + } + } + + # The usual special treatment for Broker values that are of type "any": + # confirm their type here based on the requested dispatch command. + switch req$node_dispatch_state$action[0] + { + case "get_id_value": + if ( result?$data ) + result$data = result$data as string; + break; + default: + Management::Log::error(fmt("unexpected dispatch command %s", + req$node_dispatch_state$action[0])); + break; + } + + # The result has the reporting node filled in but not the agent/instance + # (which the node doesn't know about), so add it now. + result$instance = Management::Agent::instance()$name; + + # Add this result to the overall response + req$results[|req$results|] = result; + + # If we still have pending queries out to the agents, do nothing: we'll + # handle this soon, or our request will time out and we respond with + # error. + if ( |req$node_dispatch_state$requests| > 0 ) + return; + + # Release the agent-nodes request state, since we now have all responses. + Management::Request::finish(nreq$id); + + # Send response event back to controller and clean up main request state. + Management::Log::info(fmt("tx Management::Agent::API::node_dispatch_response %s", + Management::Request::to_string(req))); + Broker::publish(agent_topic(), + Management::Agent::API::node_dispatch_response, req$id, req$results); + Management::Request::finish(req$id); + } + +event Management::Agent::API::node_dispatch_request(reqid: string, action: vector of string, nodes: set[string]) + { + Management::Log::info(fmt("rx Management::Agent::API::node_dispatch_request %s %s %s", reqid, action, nodes)); + + local node: string; + local cluster_nodes: set[string]; + local nodes_final: set[string]; + + for ( node in g_nodes ) + add cluster_nodes[node]; + + # If this request includes cluster nodes to query, check if this agent + # manages any of those nodes. If it doesn't, respond with an empty + # results vector immediately. Note that any globally unknown nodes + # that the client might have requested already got filtered by the + # controller, so we don't need to worry about them here. + + if ( |nodes| > 0 ) + { + nodes_final = nodes & cluster_nodes; + + if ( |nodes_final| == 0 ) + { + Management::Log::info(fmt( + "tx Management::Agent::API::node_dispatch_response %s, no node overlap", + reqid)); + Broker::publish(agent_topic(), + Management::Agent::API::node_dispatch_response, reqid, vector()); + return; + } + } + else if ( |g_nodes| == 0 ) + { + # Special case: the client did not request specific nodes. If + # we aren't running any nodes, respond right away, since there's + # nothing to dispatch to. + Management::Log::info(fmt( + "tx Management::Agent::API::node_dispatch_response %s, no nodes registered", + reqid)); + Broker::publish(agent_topic(), + Management::Agent::API::node_dispatch_response, reqid, vector()); + return; + } + else + { + # We send to all known nodes. + nodes_final = cluster_nodes; + } + + local res: Management::Result; + local req = Management::Request::create(reqid); + + req$node_dispatch_state = NodeDispatchState($action=action); + + # Build up dispatch state for tracking responses. We only dispatch to + # nodes that are in state RUNNING, as those have confirmed they're ready + # to communicate. For others, establish error state in now. + for ( node in nodes_final ) + { + if ( g_nodes[node]$state == Management::RUNNING ) + add req$node_dispatch_state$requests[node]; + else + { + res = Management::Result($reqid=reqid, $node=node); + res$success = F; + res$error = fmt("cluster node %s not in runnning state", node); + req$results += res; + } + } + + # Corner case: nothing is in state RUNNING. + if ( |req$node_dispatch_state$requests| == 0 ) + { + Management::Log::info(fmt( + "tx Management::Agent::API::node_dispatch_response %s, no nodes running", + reqid)); + Broker::publish(agent_topic(), + Management::Agent::API::node_dispatch_response, reqid, req$results); + Management::Request::finish(req$id); + return; + } + + # We use a single request record to track all node responses, and a + # single event that Broker publishes to all nodes. We know when all + # nodes have responded by checking the requests set we built up above. + local nreq = Management::Request::create(); + nreq$parent_id = reqid; + + Management::Log::info(fmt("tx Management::Node::API::node_dispatch_request %s %s", nreq$id, action)); + Broker::publish(Management::Node::node_topic, + Management::Node::API::node_dispatch_request, nreq$id, action, nodes); + } + +event Management::Agent::API::agent_welcome_request(reqid: string) + { + Management::Log::info(fmt("rx Management::Agent::API::agent_welcome_request %s", reqid)); + + local res = Management::Result( + $reqid = reqid, + $instance = Management::Agent::name); + + Management::Log::info(fmt("tx Management::Agent::API::agent_welcome_response %s", + Management::result_to_string(res))); + Broker::publish(agent_topic(), + Management::Agent::API::agent_welcome_response, reqid, res); + } + +event Management::Agent::API::agent_standby_request(reqid: string) + { + Management::Log::info(fmt("rx Management::Agent::API::agent_standby_request %s", reqid)); + + # We shut down any existing cluster nodes via an empty configuration, + # and fall silent. We do not unpeer/disconnect (assuming we earlier + # peered/connected -- otherwise there's nothing we can do here via + # Broker anyway), mainly to keep open the possibility of running + # cluster nodes again later. + event Management::Agent::API::set_configuration_request("", Management::Configuration()); + + local res = Management::Result( + $reqid = reqid, + $instance = Management::Agent::name); + + Management::Log::info(fmt("tx Management::Agent::API::agent_standby_response %s", + Management::result_to_string(res))); + Broker::publish(agent_topic(), + Management::Agent::API::agent_standby_response, reqid, res); + } + +event Management::Node::API::notify_node_hello(node: string) + { + Management::Log::info(fmt("rx Management::Node::API::notify_node_hello %s", node)); + + if ( node in g_nodes ) + g_nodes[node]$state = Management::RUNNING; + } + +event Broker::peer_added(peer: Broker::EndpointInfo, msg: string) + { + # This does not (cannot?) immediately verify that the new peer + # is in fact a controller, so we might send this in vain. + # Controllers register the agent upon receipt of the event. + + local epi = Management::Agent::endpoint_info(); + + Broker::publish(agent_topic(), + Management::Agent::API::notify_agent_hello, + epi$id, to_addr(epi$network$address), + Management::Agent::API::version); + } + +# XXX We may want a request timeout event handler here. It's arguably cleaner to +# send supervisor failure events back to the controller than to rely on its +# controller-agent request timeout to kick in. + +event zeek_init() + { + local epi = Management::Agent::endpoint_info(); + + # The agent needs to peer with the supervisor -- this doesn't currently + # happen automatically. The address defaults to Broker's default, which + # relies on ZEEK_DEFAULT_LISTEN_ADDR and so might just be "". Broker + # internally falls back to listening on any; we pick 127.0.0.1. + local supervisor_addr = Broker::default_listen_address; + if ( supervisor_addr == "" ) + supervisor_addr = "127.0.0.1"; + + Broker::peer(supervisor_addr, Broker::default_port, Broker::default_listen_retry); + + # Agents need receive communication targeted at it, any responses + # from the supervisor, and any responses from cluster nodes. + Broker::subscribe(agent_topic()); + Broker::subscribe(SupervisorControl::topic_prefix); + Broker::subscribe(Management::Node::node_topic); + + # Establish connectivity with the controller. + if ( Management::Agent::controller$address != "0.0.0.0" ) + { + # We connect to the controller. + Broker::peer(Management::Agent::controller$address, + Management::Agent::controller$bound_port, + Management::connect_retry); + } + + # The agent always listens, to allow cluster nodes to peer with it. + # If the controller connects to us, it also uses this port. + Broker::listen(cat(epi$network$address), epi$network$bound_port); + + Management::Log::info("agent is live"); + } diff --git a/scripts/policy/frameworks/management/config.zeek b/scripts/policy/frameworks/management/config.zeek new file mode 100644 index 0000000000..7b87655ae5 --- /dev/null +++ b/scripts/policy/frameworks/management/config.zeek @@ -0,0 +1,20 @@ +##! Management framework configuration settings common to agent and controller. +##! This does not include config settings that exist in both agent and +##! controller but that they set differently, since setting defaults here would +##! be awkward or pointless (since both node types would overwrite them +##! anyway). For role-specific settings, see management/controller/config.zeek +##! and management/agent/config.zeek. + +module Management; + +export { + ## The fallback listen address if more specific adddresses, such as + ## the controller's :zeek:see:`Management::Controller::listen_address` + ## remains empty. Unless redefined, this uses Broker's own default + ## listen address. + const default_address = Broker::default_listen_address &redef; + + ## The retry interval for Broker connnects. Defaults to a more + ## aggressive value compared to Broker's 30s. + const connect_retry = 1sec &redef; +} diff --git a/scripts/policy/frameworks/management/controller/__load__.zeek b/scripts/policy/frameworks/management/controller/__load__.zeek new file mode 100644 index 0000000000..47af9762c5 --- /dev/null +++ b/scripts/policy/frameworks/management/controller/__load__.zeek @@ -0,0 +1,4 @@ +##! The entry point for the Management framework's cluster controller. It runs +##! bootstrap logic for launching the controller process via Zeek's Supervisor. + +@load ./boot diff --git a/scripts/policy/frameworks/cluster/controller/api.zeek b/scripts/policy/frameworks/management/controller/api.zeek similarity index 53% rename from scripts/policy/frameworks/cluster/controller/api.zeek rename to scripts/policy/frameworks/management/controller/api.zeek index 27c41d33ff..ad9b22cc26 100644 --- a/scripts/policy/frameworks/cluster/controller/api.zeek +++ b/scripts/policy/frameworks/management/controller/api.zeek @@ -3,9 +3,9 @@ ##! corresponding response event. Such event pairs share the same name prefix ##! and end in "_request" and "_response", respectively. -@load ./types +@load policy/frameworks/management/types -module ClusterController::API; +module Management::Controller::API; export { ## A simple versioning scheme, used to track basic compatibility of @@ -26,10 +26,10 @@ export { ## reqid: the request identifier used in the request event. ## ## result: the result record. Its data member is a - ## :zeek:see:`ClusterController::Types::Instance` record. + ## :zeek:see:`Management::Instance` record. ## global get_instances_response: event(reqid: string, - result: ClusterController::Types::Result); + result: Management::Result); ## zeek-client sends this event to establish a new cluster configuration, @@ -39,22 +39,75 @@ export { ## ## reqid: a request identifier string, echoed in the response event. ## - ## config: a :zeek:see:`ClusterController::Types::Configuration` record + ## config: a :zeek:see:`Management::Configuration` record ## specifying the cluster configuration. ## global set_configuration_request: event(reqid: string, - config: ClusterController::Types::Configuration); + config: Management::Configuration); ## Response to a set_configuration_request event. The controller sends ## this back to the client. ## ## reqid: the request identifier used in the request event. ## - ## result: a vector of :zeek:see:`ClusterController::Types::Result` records. + ## result: a vector of :zeek:see:`Management::Result` records. ## Each member captures one agent's response. ## global set_configuration_response: event(reqid: string, - result: ClusterController::Types::ResultVec); + result: Management::ResultVec); + + + ## zeek-client sends this event to request a list of + ## :zeek:see:`Management::NodeStatus` records that capture + ## the status of Supervisor-managed nodes running on the cluster's + ## instances. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + global get_nodes_request: event(reqid: string); + + ## Response to a get_nodes_request event. The controller sends this + ## back to the client. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: a :zeek:type:`vector` of :zeek:see:`Management::Result` + ## records. Each record covers one cluster instance. Each record's data + ## member is a vector of :zeek:see:`Management::NodeStatus` + ## records, covering the nodes at that instance. Results may also indicate + ## failure, with error messages indicating what went wrong. + global get_nodes_response: event(reqid: string, + result: Management::ResultVec); + + + ## zeek-client sends this event to retrieve the current value of a + ## variable in Zeek's global namespace, referenced by the given + ## identifier (i.e., variable name). The controller asks all agents + ## to retrieve this value from each cluster node, accumulates the + ## returned responses, and responds with a get_id_value_response + ## event back to the client. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + ## id: the name of the variable whose value to retrieve. + ## + ## nodes: a set of cluster node names (e.g. "worker-01") to retrieve + ## the values from. An empty set, supplied by default, means + ## retrieval from all current cluster nodes. + global get_id_value_request: event(reqid: string, id: string, + nodes: set[string] &default=set()); + + ## Response to a get_id_value_request event. The controller sends this + ## back to the client. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: a :zeek:type:`vector` of :zeek:see:`Management::Result` + ## records. Each record covers one Zeek cluster node. Each record's + ## data field contains a string with the JSON rendering (as produced + ## by :zeek:id:`to_json`, including the error strings it potentially + ## returns). + global get_id_value_response: event(reqid: string, result: Management::ResultVec); # Testing events. These don't provide operational value but expose @@ -79,10 +132,10 @@ export { ## reqid: the request identifier used in the request event. ## global test_timeout_response: event(reqid: string, - result: ClusterController::Types::Result); + result: Management::Result); - # Notification events, agent -> controller + # Notification events ## The controller triggers this event when the operational cluster ## instances align with the ones desired by the cluster diff --git a/scripts/policy/frameworks/management/controller/boot.zeek b/scripts/policy/frameworks/management/controller/boot.zeek new file mode 100644 index 0000000000..a3c679c257 --- /dev/null +++ b/scripts/policy/frameworks/management/controller/boot.zeek @@ -0,0 +1,36 @@ +##! The cluster controller's boot logic runs in Zeek's supervisor and instructs +##! it to launch the Management controller process. The controller's main logic +##! resides in main.zeek, similarly to other frameworks. The new process will +##! execute that script. +##! +##! If the current process is not the Zeek supervisor, this does nothing. + +@load ./config + +event zeek_init() + { + if ( ! Supervisor::is_supervisor() ) + return; + + local epi = Management::Controller::endpoint_info(); + local sn = Supervisor::NodeConfig($name=epi$id, $bare_mode=T, + $scripts=vector("policy/frameworks/management/controller/main.zeek")); + + if ( Management::Controller::directory != "" ) + sn$directory = Management::Controller::directory; + if ( Management::Controller::stdout_file != "" ) + sn$stdout_file = Management::Controller::stdout_file; + if ( Management::Controller::stderr_file != "" ) + sn$stderr_file = Management::Controller::stderr_file; + + # This helps Zeek run controller and agent with a minimal set of scripts. + sn$env["ZEEK_CLUSTER_MGMT_NODE"] = "CONTROLLER"; + + local res = Supervisor::create(sn); + + if ( res != "" ) + { + print(fmt("error: supervisor could not create controller node: %s", res)); + exit(1); + } + } diff --git a/scripts/policy/frameworks/management/controller/config.zeek b/scripts/policy/frameworks/management/controller/config.zeek new file mode 100644 index 0000000000..c97c11bb6a --- /dev/null +++ b/scripts/policy/frameworks/management/controller/config.zeek @@ -0,0 +1,90 @@ +##! Configuration settings for the cluster controller. + +@load policy/frameworks/management/config +@load policy/frameworks/management/types + +module Management::Controller; + +export { + ## The name of this controller. Defaults to the value of the + ## ZEEK_CONTROLLER_NAME environment variable. When that is unset and the + ## user doesn't redef the value, the implementation defaults to + ## "controller-". + const name = getenv("ZEEK_CONTROLLER_NAME") &redef; + + ## The controller's stdout log name. If the string is non-empty, Zeek will + ## produce a free-form log (i.e., not one governed by Zeek's logging + ## framework) in Zeek's working directory. If left empty, no such log + ## results. + ## + ## Note that the controller also establishes a "proper" Zeek log via the + ## :zeek:see:`Management::Log` module. + const stdout_file = "controller.stdout" &redef; + + ## The controller's stderr log name. Like :zeek:see:`Management::Controller::stdout_file`, + ## but for the stderr stream. + const stderr_file = "controller.stderr" &redef; + + ## The network address the controller listens on. By default this uses + ## the value of the ZEEK_CONTROLLER_ADDR environment variable, but you + ## may also redef to a specific value. When empty, the implementation + ## falls back to :zeek:see:`Management::default_address`. + const listen_address = getenv("ZEEK_CONTROLLER_ADDR") &redef; + + ## The network port the controller listens on. Counterpart to + ## :zeek:see:`Management::Controller::listen_address`, defaulting to the + ## ZEEK_CONTROLLER_PORT environment variable. + const listen_port = getenv("ZEEK_CONTROLLER_PORT") &redef; + + ## The fallback listen port if :zeek:see:`Management::Controller::listen_port` + ## remains empty. + const default_port = 2150/tcp &redef; + + ## The controller's Broker topic. Clients send requests to this topic. + const topic = "zeek/management/controller" &redef; + + ## An optional custom output directory for stdout/stderr. Agent and + ## controller currently only log locally, not via the data cluster's + ## logger node. This means that if both write to the same log file, + ## output gets garbled. + const directory = "" &redef; + + ## Returns a :zeek:see:`Broker::NetworkInfo` record describing the controller. + global network_info: function(): Broker::NetworkInfo; + + ## Returns a :zeek:see:`Broker::EndpointInfo` record describing the controller. + global endpoint_info: function(): Broker::EndpointInfo; +} + +function network_info(): Broker::NetworkInfo + { + local ni: Broker::NetworkInfo; + + if ( Management::Controller::listen_address != "" ) + ni$address = Management::Controller::listen_address; + else if ( Management::default_address != "" ) + ni$address = Management::default_address; + else + ni$address = "127.0.0.1"; + + if ( Management::Controller::listen_port != "" ) + ni$bound_port = to_port(Management::Controller::listen_port); + else + ni$bound_port = Management::Controller::default_port; + + return ni; + } + +function endpoint_info(): Broker::EndpointInfo + { + local epi: Broker::EndpointInfo; + + if ( Management::Controller::name != "" ) + epi$id = Management::Controller::name; + else + epi$id = fmt("controller-%s", gethostname()); + + epi$network = network_info(); + + return epi; + } diff --git a/scripts/policy/frameworks/management/controller/main.zeek b/scripts/policy/frameworks/management/controller/main.zeek new file mode 100644 index 0000000000..72ecf2be30 --- /dev/null +++ b/scripts/policy/frameworks/management/controller/main.zeek @@ -0,0 +1,836 @@ +##! This is the main "runtime" of the Management framework's controller. Zeek +##! does not load this directly; rather, the controller's bootstrapping module +##! (in ./boot.zeek) specifies it as the script to run in the node newly created +##! by the supervisor. + +@load base/frameworks/broker + +@load policy/frameworks/management +@load policy/frameworks/management/agent/config # For the agent topic prefix +@load policy/frameworks/management/agent/api + +@load ./api +@load ./config + +module Management::Controller::Runtime; + +# This export is mainly to appease Zeekygen's need to understand redefs of the +# Request record below. Without it, it fails to establish link targets for the +# tucked-on types. +export { + ## Request state specific to + ## :zeek:see:`Management::Controller::API::set_configuration_request` and + ## :zeek:see:`Management::Controller::API::set_configuration_response`. + type SetConfigurationState: record { + ## The cluster configuration established with this request + config: Management::Configuration; + ## Request state for every controller/agent transaction. + requests: set[string] &default=set(); + }; + + ## Request state specific to + ## :zeek:see:`Management::Controller::API::get_nodes_request` and + ## :zeek:see:`Management::Controller::API::get_nodes_response`. + type GetNodesState: record { + ## Request state for every controller/agent transaction. + requests: set[string] &default=set(); + }; + + ## Request state for node dispatch requests, to track the requested + ## action and received responses. Node dispatches are requests to + ## execute pre-implemented actions on every node in the cluster, + ## and report their outcomes. See + ## :zeek:see:`Management::Agent::API::node_dispatch_request` and + ## :zeek:see:`Management::Agent::API::node_dispatch_response` for the + ## agent/controller interaction, and + ## :zeek:see:`Management::Controller::API::get_id_value_request` and + ## :zeek:see:`Management::Controller::API::get_id_value_response` + ## for an example of a specific API the controller generalizes into + ## a dispatch. + type NodeDispatchState: record { + ## The dispatched action. The first string is a command, + ## any remaining strings its arguments. + action: vector of string; + + ## Request state for every controller/agent transaction. + ## The set of strings tracks the node names from which + ## we still expect responses, before we can respond back + ## to the client. + requests: set[string] &default=set(); + }; + + ## Dummy state for internal state-keeping test cases. + type TestState: record { }; +} + +redef record Management::Request::Request += { + set_configuration_state: SetConfigurationState &optional; + get_nodes_state: GetNodesState &optional; + node_dispatch_state: NodeDispatchState &optional; + test_state: TestState &optional; +}; + +# Tag our logs correctly +redef Management::Log::role = Management::CONTROLLER; + +global check_instances_ready: function(); +global add_instance: function(inst: Management::Instance); +global drop_instance: function(inst: Management::Instance); + +global null_config: function(): Management::Configuration; +global is_null_config: function(config: Management::Configuration): bool; + +# Checks whether the given instance is one that we know with different +# communication settings: a a different peering direction, a different listening +# port, etc. Used as a predicate to indicate when we need to drop the existing +# one from our internal state. +global is_instance_connectivity_change: function + (inst: Management::Instance): bool; + +# The set of agents the controller interacts with to manage to currently +# configured cluster. This may be a subset of all the agents known to the +# controller, as tracked by the g_instances_known set. They key is the instance +# name and should match the $name member of the corresponding instance record. +global g_instances: table[string] of Management::Instance = table(); + +# The set of instances that have checked in with the controller. This is a +# superset of g_instances, since it covers any agent that has sent us a +# notify_agent_hello event. +global g_instances_known: set[string] = set(); + +# A corresponding set of instances/agents that we track in order to understand +# when all of the above instances have sent agent_welcome_response events. (An +# alternative would be to use a record that adds a single state bit for each +# instance, and store that above.) +global g_instances_ready: set[string] = set(); + +# The request ID of the most recent configuration update that's come in from +# a client. We track it here until we know we are ready to communicate with all +# agents required by the update. +global g_config_reqid_pending: string = ""; + +# The most recent configuration we have successfully deployed. This is also +# the one we send whenever the client requests it. +global g_config_current: Management::Configuration; + +function send_config_to_agents(req: Management::Request::Request, config: Management::Configuration) + { + for ( name in g_instances ) + { + if ( name !in g_instances_ready ) + next; + + local agent_topic = Management::Agent::topic_prefix + "/" + name; + local areq = Management::Request::create(); + areq$parent_id = req$id; + + # We track the requests sent off to each agent. As the + # responses come in, we delete them. Once the requests + # set is empty, we respond back to the client. + add req$set_configuration_state$requests[areq$id]; + + # We could also broadcast just once on the agent prefix, but + # explicit request/response pairs for each agent seems cleaner. + Management::Log::info(fmt("tx Management::Agent::API::set_configuration_request %s to %s", areq$id, name)); + Broker::publish(agent_topic, Management::Agent::API::set_configuration_request, areq$id, config); + } + } + +# This is the &on_change handler for the g_instances_ready set, meaning +# it runs whenever a required agent has confirmed it's ready. +function check_instances_ready() + { + local cur_instances: set[string]; + + for ( inst in g_instances ) + add cur_instances[inst]; + + if ( cur_instances == g_instances_ready ) + event Management::Controller::API::notify_agents_ready(cur_instances); + } + +function add_instance(inst: Management::Instance) + { + g_instances[inst$name] = inst; + + if ( inst?$listen_port ) + Broker::peer(cat(inst$host), inst$listen_port, + Management::connect_retry); + + if ( inst$name in g_instances_known ) + { + # The agent has already peered with us. Send welcome to indicate + # it's part of cluster management. Once it responds, we update + # the set of ready instances and proceed as feasible with config + # deployments. + + local req = Management::Request::create(); + + Management::Log::info(fmt("tx Management::Agent::API::agent_welcome_request to %s", inst$name)); + Broker::publish(Management::Agent::topic_prefix + "/" + inst$name, + Management::Agent::API::agent_welcome_request, req$id); + } + } + +function drop_instance(inst: Management::Instance) + { + if ( inst$name !in g_instances ) + return; + + # Send the agent a standby so it shuts down its cluster nodes & state + Management::Log::info(fmt("tx Management::Agent::API::agent_standby_request to %s", inst$name)); + Broker::publish(Management::Agent::topic_prefix + "/" + inst$name, + Management::Agent::API::agent_standby_request, ""); + + delete g_instances[inst$name]; + + if ( inst$name in g_instances_ready ) + delete g_instances_ready[inst$name]; + + # The agent remains in g_instances_known, to track that we're able + # to communicate with it in case it's required again. + + Management::Log::info(fmt("dropped instance %s", inst$name)); + } + +function null_config(): Management::Configuration + { + return Management::Configuration($id=""); + } + +function is_null_config(config: Management::Configuration): bool + { + return config$id == ""; + } + +function is_instance_connectivity_change(inst: Management::Instance): bool + { + # If we're not tracking this instance as part of a cluster config, it's + # not a change. (More precisely: we cannot say whether it's changed.) + if ( inst$name !in g_instances ) + return F; + + # The agent has peered with us and now uses a different host. + # XXX 0.0.0.0 is a workaround until we've resolved how agents that peer + # with us obtain their identity. Broker ID? + if ( inst$host != 0.0.0.0 && inst$host != g_instances[inst$name]$host ) + return T; + + # The agent has a listening port and the one we know does not, or vice + # versa. I.e., this is a change in the intended peering direction. + if ( inst?$listen_port != g_instances[inst$name]?$listen_port ) + return T; + + # Both have listening ports, but they differ. + if ( inst?$listen_port && g_instances[inst$name]?$listen_port && + inst$listen_port != g_instances[inst$name]$listen_port ) + return T; + + return F; + } + +function filter_config_nodes_by_name(nodes: set[string]): set[string] + { + local res: set[string]; + local cluster_nodes: set[string]; + + for ( node in g_config_current$nodes ) + add cluster_nodes[node$name]; + + return nodes & cluster_nodes; + } + +event Management::Controller::API::notify_agents_ready(instances: set[string]) + { + local insts = Management::Util::set_to_vector(instances); + + Management::Log::info(fmt("rx Management::Controller::API:notify_agents_ready %s", + join_string_vec(insts, ","))); + + local req = Management::Request::lookup(g_config_reqid_pending); + + # If there's no pending request, when it's no longer available, or it + # doesn't have config state, don't do anything else. + if ( Management::Request::is_null(req) || ! req?$set_configuration_state ) + return; + + # All instances requested in the pending configuration update are now + # known to us. Send them the config. As they send their response events + # we update the client's request state and eventually send the response + # event to the it. + send_config_to_agents(req, req$set_configuration_state$config); + } + +event Management::Agent::API::notify_agent_hello(instance: string, host: addr, api_version: count) + { + Management::Log::info(fmt("rx Management::Agent::API::notify_agent_hello %s %s", instance, host)); + + # When an agent checks in with a mismatching API version, we log the + # fact and drop its state, if any. + if ( api_version != Management::Controller::API::version ) + { + Management::Log::warning( + fmt("instance %s/%s has checked in with incompatible API version %s", + instance, host, api_version)); + + if ( instance in g_instances ) + drop_instance(g_instances[instance]); + if ( instance in g_instances_known ) + delete g_instances_known[instance]; + + return; + } + + add g_instances_known[instance]; + + if ( instance in g_instances && instance !in g_instances_ready ) + { + # We need this instance for our cluster and have full context for + # it from the configuration. Tell agent. + local req = Management::Request::create(); + + Management::Log::info(fmt("tx Management::Agent::API::agent_welcome_request to %s", instance)); + Broker::publish(Management::Agent::topic_prefix + "/" + instance, + Management::Agent::API::agent_welcome_request, req$id); + } + } + +event Management::Agent::API::agent_welcome_response(reqid: string, result: Management::Result) + { + Management::Log::info(fmt("rx Management::Agent::API::agent_welcome_response %s", reqid)); + + local req = Management::Request::lookup(reqid); + + if ( Management::Request::is_null(req) ) + return; + + Management::Request::finish(req$id); + + # An agent we've been waiting to hear back from is ready for cluster + # work. Double-check we still want it, otherwise drop it. + + if ( ! result$success || result$instance !in g_instances ) + { + Management::Log::info(fmt( + "tx Management::Agent::API::agent_standby_request to %s", result$instance)); + Broker::publish(Management::Agent::topic_prefix + "/" + result$instance, + Management::Agent::API::agent_standby_request, ""); + return; + } + + add g_instances_ready[result$instance]; + Management::Log::info(fmt("instance %s ready", result$instance)); + + check_instances_ready(); + } + +event Management::Agent::API::notify_change(instance: string, n: Management::Node, + old: Management::State, new: Management::State) + { + # XXX TODO + } + +event Management::Agent::API::notify_error(instance: string, msg: string, node: string) + { + # XXX TODO + } + +event Management::Agent::API::notify_log(instance: string, msg: string, node: string) + { + # XXX TODO + } + +event Management::Agent::API::set_configuration_response(reqid: string, result: Management::Result) + { + Management::Log::info(fmt("rx Management::Agent::API::set_configuration_response %s", reqid)); + + # Retrieve state for the request we just got a response to + local areq = Management::Request::lookup(reqid); + if ( Management::Request::is_null(areq) ) + return; + + # Release the request, which is now done. + Management::Request::finish(areq$id); + + # Find the original request from the client + local req = Management::Request::lookup(areq$parent_id); + if ( Management::Request::is_null(req) ) + return; + + # Add this result to the overall response + req$results[|req$results|] = result; + + # Mark this request as done by removing it from the table of pending + # ones. The following if-check should always be true. + if ( areq$id in req$set_configuration_state$requests ) + delete req$set_configuration_state$requests[areq$id]; + + # If there are any pending requests to the agents, we're + # done: we respond once every agent has responed (or we time out). + if ( |req$set_configuration_state$requests| > 0 ) + return; + + # All set_configuration requests to instances are done, so adopt the + # client's requested configuration as the new one and respond back to + # client. + g_config_current = req$set_configuration_state$config; + g_config_reqid_pending = ""; + + Management::Log::info(fmt("tx Management::Controller::API::set_configuration_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::set_configuration_response, req$id, req$results); + Management::Request::finish(req$id); + } + +event Management::Controller::API::set_configuration_request(reqid: string, config: Management::Configuration) + { + Management::Log::info(fmt("rx Management::Controller::API::set_configuration_request %s", reqid)); + + local res: Management::Result; + local req = Management::Request::create(reqid); + + req$set_configuration_state = SetConfigurationState($config = config); + + # At the moment there can only be one pending request. + if ( g_config_reqid_pending != "" ) + { + res = Management::Result($reqid=reqid); + res$success = F; + res$error = fmt("request %s still pending", g_config_reqid_pending); + req$results += res; + + Management::Log::info(fmt("tx Management::Controller::API::set_configuration_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::set_configuration_response, req$id, req$results); + Management::Request::finish(req$id); + return; + } + + # XXX validate the configuration: + # - Are node instances among defined instances? + # - Are all names unique? + # - Are any node options understood? + # - Do node types with optional fields have required values? + # ... + + # The incoming request is now the pending one. It gets cleared when all + # agents have processed their config updates successfully, or their + # responses time out. + g_config_reqid_pending = req$id; + + # Compare the instance configuration to our current one. If it matches, + # we can proceed to deploying the new cluster topology. If it does + # not, we need to establish connectivity with agents we connect to, or + # wait until all instances that connect to us have done so. Either triggers + # a notify_agents_ready event, upon which we then deploy the topology. + + # The current & new set of instance names. + local insts_current: set[string]; + local insts_new: set[string]; + + # A set of current instances not contained in the new config. + # Those will need to get dropped. + local insts_to_drop: set[string]; + + # The opposite: new instances not yet in our current set. Those we will need + # to establish contact with (or they with us). + local insts_to_add: set[string]; + + # The overlap: instances in both the current and new set. For those we verify + # that we're actually dealign with the same entities, and might need to re- + # connect if not. + local insts_to_keep: set[string]; + + # Alternative representation of insts_to_add, directly providing the instances. + local insts_to_peer: table[string] of Management::Instance; + + # Helpful locals. + local inst_name: string; + local inst: Management::Instance; + + for ( inst_name in g_instances ) + add insts_current[inst_name]; + for ( inst in config$instances ) + add insts_new[inst$name]; + + # Populate TODO lists for instances we need to drop, check, or add. + insts_to_drop = insts_current - insts_new; + insts_to_add = insts_new - insts_current; + insts_to_keep = insts_new & insts_current; + + for ( inst in config$instances ) + { + if ( inst$name in insts_to_add ) + { + insts_to_peer[inst$name] = inst; + next; + } + + # Focus on the keepers: check for change in identity/location. + if ( inst$name !in insts_to_keep ) + next; + + if ( is_instance_connectivity_change(inst) ) + { + # The endpoint looks different. We drop the current one + # and need to re-establish connectivity with the new + # one. + add insts_to_drop[inst$name]; + add insts_to_add[inst$name]; + } + } + + # Process our TODO lists. Handle drops first, then additions, in + # case we need to re-establish connectivity with an agent. + + for ( inst_name in insts_to_drop ) + drop_instance(g_instances[inst_name]); + for ( inst_name in insts_to_peer ) + add_instance(insts_to_peer[inst_name]); + + # Updates to out instance tables are complete, now check if we're already + # able to send the config to the agents: + check_instances_ready(); + } + +event Management::Controller::API::get_instances_request(reqid: string) + { + Management::Log::info(fmt("rx Management::Controller::API::set_instances_request %s", reqid)); + + local res = Management::Result($reqid = reqid); + local insts: vector of Management::Instance; + + for ( i in g_instances ) + insts += g_instances[i]; + + res$data = insts; + + Management::Log::info(fmt("tx Management::Controller::API::get_instances_response %s", reqid)); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_instances_response, reqid, res); + } + +event Management::Agent::API::get_nodes_response(reqid: string, result: Management::Result) + { + Management::Log::info(fmt("rx Management::Agent::API::get_nodes_response %s", reqid)); + + # Retrieve state for the request we just got a response to + local areq = Management::Request::lookup(reqid); + if ( Management::Request::is_null(areq) ) + return; + + # Release the request, since this agent is now done. + Management::Request::finish(areq$id); + + # Find the original request from the client + local req = Management::Request::lookup(areq$parent_id); + if ( Management::Request::is_null(req) ) + return; + + # Zeek's ingestion of an any-typed val via Broker yields an opaque + # Broker DataVal. When Zeek forwards this val via another event it stays + # in this opaque form. To avoid forcing recipients to distinguish + # whether the val is of the actual, intended (any-)type or a Broker + # DataVal wrapper, we explicitly cast it back to our intended Zeek + # type. This test case demonstrates: broker.remote_event_vector_any + result$data = result$data as Management::NodeStatusVec; + + # Add this result to the overall response + req$results[|req$results|] = result; + + # Mark this request as done by removing it from the table of pending + # ones. The following if-check should always be true. + if ( areq$id in req$get_nodes_state$requests ) + delete req$get_nodes_state$requests[areq$id]; + + # If we still have pending queries out to the agents, do nothing: we'll + # handle this soon, or our request will time out and we respond with + # error. + if ( |req$get_nodes_state$requests| > 0 ) + return; + + Management::Log::info(fmt("tx Management::Controller::API::get_nodes_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_nodes_response, req$id, req$results); + Management::Request::finish(req$id); + } + +event Management::Controller::API::get_nodes_request(reqid: string) + { + Management::Log::info(fmt("rx Management::Controller::API::get_nodes_request %s", reqid)); + + # Special case: if we have no instances, respond right away. + if ( |g_instances| == 0 ) + { + Management::Log::info(fmt("tx Management::Controller::API::get_nodes_response %s", reqid)); + local res = Management::Result($reqid=reqid, $success=F, + $error="no instances connected"); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_nodes_response, reqid, vector(res)); + return; + } + + local req = Management::Request::create(reqid); + req$get_nodes_state = GetNodesState(); + + for ( name in g_instances ) + { + if ( name !in g_instances_ready ) + next; + + local agent_topic = Management::Agent::topic_prefix + "/" + name; + local areq = Management::Request::create(); + + areq$parent_id = req$id; + add req$get_nodes_state$requests[areq$id]; + + Management::Log::info(fmt("tx Management::Agent::API::get_nodes_request %s to %s", areq$id, name)); + Broker::publish(agent_topic, Management::Agent::API::get_nodes_request, areq$id); + } + } + +event Management::Agent::API::node_dispatch_response(reqid: string, results: Management::ResultVec) + { + Management::Log::info(fmt("rx Management::Agent::API::node_dispatch_response %s", reqid)); + + # Retrieve state for the request we just got a response to + local areq = Management::Request::lookup(reqid); + if ( Management::Request::is_null(areq) ) + return; + + # Release the request, since this agent is now done. + Management::Request::finish(areq$id); + + # Find the original request from the client + local req = Management::Request::lookup(areq$parent_id); + if ( Management::Request::is_null(req) ) + return; + + # Add this agent's results to the overall response + for ( i in results ) + { + # Same special treatment for Broker values that are of + # type "any": confirm their (known) type here. + switch req$node_dispatch_state$action[0] + { + case "get_id_value": + if ( results[i]?$data ) + results[i]$data = results[i]$data as string; + break; + default: + Management::Log::error(fmt("unexpected dispatch command %s", + req$node_dispatch_state$action[0])); + break; + } + + req$results[|req$results|] = results[i]; + } + + # Mark this request as done + if ( areq$id in req$node_dispatch_state$requests ) + delete req$node_dispatch_state$requests[areq$id]; + + # If we still have pending queries out to the agents, do nothing: we'll + # handle this soon, or our request will time out and we respond with + # error. + if ( |req$node_dispatch_state$requests| > 0 ) + return; + + # Send response event to the client based upon the dispatch type. + switch req$node_dispatch_state$action[0] + { + case "get_id_value": + Management::Log::info(fmt( + "tx Management::Controller::API::get_id_value_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_id_value_response, + req$id, req$results); + break; + default: + Management::Log::error(fmt("unexpected dispatch command %s", + req$node_dispatch_state$action[0])); + break; + } + + Management::Request::finish(req$id); + } + +event Management::Controller::API::get_id_value_request(reqid: string, id: string, nodes: set[string]) + { + Management::Log::info(fmt("rx Management::Controller::API::get_id_value_request %s %s", reqid, id)); + + local res: Management::Result; + + # Special case: if we have no instances, respond right away. + if ( |g_instances| == 0 ) + { + Management::Log::info(fmt("tx Management::Controller::API::get_id_value_response %s", reqid)); + res = Management::Result($reqid=reqid, $success=F, $error="no instances connected"); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_id_value_response, + reqid, vector(res)); + return; + } + + local action = vector("get_id_value", id); + local req = Management::Request::create(reqid); + req$node_dispatch_state = NodeDispatchState($action=action); + + local nodes_final: set[string]; + local node: string; + + # Input sanitization: check for any requested nodes that aren't part of + # the current configuration. We send back error results for those and + # don't propagate them to the agents. + if ( |nodes| > 0 ) + { + # Requested nodes that are in the current configuration: + nodes_final = filter_config_nodes_by_name(nodes); + # Requested nodes that are not in current configuration: + local nodes_invalid = nodes - nodes_final; + + # Assemble error results for all invalid nodes + for ( node in nodes_invalid ) + { + res = Management::Result($reqid=reqid, $node=node); + res$success = F; + res$error = "unknown cluster node"; + req$results += res; + } + + # If only invalid nodes got requested, we're now done. + if ( |nodes_final| == 0 ) + { + Management::Log::info(fmt( + "tx Management::Controller::API::get_id_value_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_id_value_response, + req$id, req$results); + Management::Request::finish(req$id); + return; + } + } + + # Send dispatch requests to all agents, with the final set of nodes + for ( name in g_instances ) + { + if ( name !in g_instances_ready ) + next; + + local agent_topic = Management::Agent::topic_prefix + "/" + name; + local areq = Management::Request::create(); + + areq$parent_id = req$id; + add req$node_dispatch_state$requests[areq$id]; + + Management::Log::info(fmt( + "tx Management::Agent::API::node_dispatch_request %s %s to %s", + areq$id, action, name)); + + Broker::publish(agent_topic, + Management::Agent::API::node_dispatch_request, + areq$id, action, nodes_final); + } + } + +event Management::Request::request_expired(req: Management::Request::Request) + { + # Various handlers for timed-out request state. We use the state members + # to identify how to respond. No need to clean up the request itself, + # since we're getting here via the request module's expiration + # mechanism that handles the cleanup. + local res = Management::Result($reqid=req$id, + $success = F, + $error = "request timed out"); + + if ( req?$set_configuration_state ) + { + # This timeout means we no longer have a pending request. + g_config_reqid_pending = ""; + req$results += res; + + Management::Log::info(fmt("tx Management::Controller::API::set_configuration_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::set_configuration_response, req$id, req$results); + } + + if ( req?$get_nodes_state ) + { + req$results += res; + + Management::Log::info(fmt("tx Management::Controller::API::get_nodes_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_nodes_response, req$id, req$results); + } + + if ( req?$node_dispatch_state ) + { + req$results += res; + + switch req$node_dispatch_state$action[0] + { + case "get_id_value": + Management::Log::info(fmt( + "tx Management::Controller::API::get_id_value_response %s", + Management::Request::to_string(req))); + Broker::publish(Management::Controller::topic, + Management::Controller::API::get_id_value_response, + req$id, req$results); + break; + default: + Management::Log::error(fmt("unexpected dispatch command %s", + req$node_dispatch_state$action[0])); + break; + } + } + + if ( req?$test_state ) + { + Management::Log::info(fmt("tx Management::Controller::API::test_timeout_response %s", req$id)); + Broker::publish(Management::Controller::topic, + Management::Controller::API::test_timeout_response, req$id, res); + } + } + +event Management::Controller::API::test_timeout_request(reqid: string, with_state: bool) + { + Management::Log::info(fmt("rx Management::Controller::API::test_timeout_request %s %s", reqid, with_state)); + + if ( with_state ) + { + # This state times out and triggers a timeout response in the + # above request_expired event handler. + local req = Management::Request::create(reqid); + req$test_state = TestState(); + } + } + +event zeek_init() + { + # Initialize null config at startup. We will replace it once we have + # persistence, and again whenever we complete a client's + # set_configuration request. + g_config_current = null_config(); + + # The controller always listens -- it needs to be able to respond to the + # Zeek client. This port is also used by the agents if they connect to + # the client. The client doesn't automatically establish or accept + # connectivity to agents: agents are defined and communicated with as + # defined via configurations defined by the client. + + local cni = Management::Controller::network_info(); + + Broker::listen(cat(cni$address), cni$bound_port); + + Broker::subscribe(Management::Agent::topic_prefix); + Broker::subscribe(Management::Controller::topic); + + Management::Log::info("controller is live"); + } diff --git a/scripts/policy/frameworks/cluster/controller/log.zeek b/scripts/policy/frameworks/management/log.zeek similarity index 71% rename from scripts/policy/frameworks/cluster/controller/log.zeek rename to scripts/policy/frameworks/management/log.zeek index a7525dec0c..e8732df1bc 100644 --- a/scripts/policy/frameworks/cluster/controller/log.zeek +++ b/scripts/policy/frameworks/management/log.zeek @@ -1,11 +1,11 @@ -##! This module implements straightforward logging abilities for cluster -##! controller and agent. It uses Zeek's logging framework, and works only for -##! nodes managed by the supervisor. In this setting Zeek's logging framework -##! operates locally, i.e., this logging does not involve any logger nodes. +##! This module implements logging abilities for controller and agent. It uses +##! Zeek's logging framework and works only for nodes managed by the +##! supervisor. In this setting Zeek's logging framework operates locally, i.e., +##! this does not involve logger nodes. -@load ./config +@load ./types -module ClusterController::Log; +module Management::Log; export { ## The cluster logging stream identifier. @@ -16,28 +16,29 @@ export { ## The controller/agent log supports four different log levels. type Level: enum { - DEBUG, - INFO, - WARNING, - ERROR, + DEBUG = 10, + INFO = 20, + WARNING = 30, + ERROR = 40, }; ## The record type containing the column fields of the agent/controller log. type Info: record { ## The time at which a cluster message was generated. - ts: time; + ts: time; ## The name of the node that is creating the log record. node: string; ## Log level of this message, converted from the above Level enum level: string; - ## The role of the node, translated from ClusterController::Types::Role. + ## The role of the node, translated from Management::Role. role: string; ## A message indicating information about cluster controller operation. message: string; } &log; - ## The log level in use for this node. - global log_level = DEBUG &redef; + ## The log level in use for this node. This is the minimum + ## log level required to produce output. + global log_level = INFO &redef; ## A debug-level log message writer. ## @@ -63,6 +64,10 @@ export { ## message: the message to log. ## global error: function(message: string); + + ## The role of this process in cluster management. Agent and controller + ## both redefine this, and we use it during logging. + const role = Management::NONE &redef; } # Enum translations to strings. This avoids those enums being reported @@ -75,9 +80,10 @@ global l2s: table[Level] of string = { [ERROR] = "ERROR", }; -global r2s: table[ClusterController::Types::Role] of string = { - [ClusterController::Types::AGENT] = "AGENT", - [ClusterController::Types::CONTROLLER] = "CONTROLLER", +global r2s: table[Management::Role] of string = { + [Management::AGENT] = "AGENT", + [Management::CONTROLLER] = "CONTROLLER", + [Management::NODE] = "NODE", }; function debug(message: string) @@ -87,7 +93,7 @@ function debug(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[DEBUG], - $role=r2s[ClusterController::role], $message=message]); + $role=r2s[role], $message=message]); } function info(message: string) @@ -97,7 +103,7 @@ function info(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[INFO], - $role=r2s[ClusterController::role], $message=message]); + $role=r2s[role], $message=message]); } function warning(message: string) @@ -107,7 +113,7 @@ function warning(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[WARNING], - $role=r2s[ClusterController::role], $message=message]); + $role=r2s[role], $message=message]); } function error(message: string) @@ -117,7 +123,7 @@ function error(message: string) local node = Supervisor::node(); Log::write(LOG, [$ts=network_time(), $node=node$name, $level=l2s[ERROR], - $role=r2s[ClusterController::role], $message=message]); + $role=r2s[role], $message=message]); } event zeek_init() @@ -133,5 +139,5 @@ event zeek_init() local stream = Log::Stream($columns=Info, $path=fmt("cluster-%s", node$name), $policy=log_policy); - Log::create_stream(ClusterController::Log::LOG, stream); + Log::create_stream(Management::Log::LOG, stream); } diff --git a/scripts/policy/frameworks/management/node/__load__.zeek b/scripts/policy/frameworks/management/node/__load__.zeek new file mode 100644 index 0000000000..a10fe855df --- /dev/null +++ b/scripts/policy/frameworks/management/node/__load__.zeek @@ -0,0 +1 @@ +@load ./main diff --git a/scripts/policy/frameworks/management/node/api.zeek b/scripts/policy/frameworks/management/node/api.zeek new file mode 100644 index 0000000000..f952f312ef --- /dev/null +++ b/scripts/policy/frameworks/management/node/api.zeek @@ -0,0 +1,48 @@ +##! The Management event API of cluster nodes. The API consists of request/ +##! response event pairs, like elsewhere in the Management, Supervisor, and +##! Control frameworks. + +@load policy/frameworks/management/types + +module Management::Node::API; + +export { + ## Management agents send this event to every Zeek cluster node to run a + ## "dispatch" -- a particular, pre-implemented action. This is the agent-node + ## complement to :zeek:see:`Management::Agent::API::node_dispatch_request`. + ## + ## reqid: a request identifier string, echoed in the response event. + ## + ## action: the requested dispatch command, with any arguments. + ## + ## nodes: the cluster node names this dispatch targets. An empty set, + ## supplied by default, means it applies to all nodes. Since nodes + ## receive all dispatch requests, they can use any node names provided + ## here to filter themselves out of responding. + global node_dispatch_request: event(reqid: string, action: vector of string, + nodes: set[string] &default=set()); + + ## Response to a node_dispatch_request event. The nodes send this back + ## to the agent. This is the agent-node equivalent of + ## :zeek:see:`Management::Agent::API::node_dispatch_response`. + ## + ## reqid: the request identifier used in the request event. + ## + ## result: a :zeek:see:`Management::Result` record covering one Zeek + ## cluster node managed by the agent. Upon success, the data field + ## contains a value appropriate for the requested dispatch. + global node_dispatch_response: event(reqid: string, result: Management::Result); + + + # Notification events, node -> agent + + ## The cluster nodes send this event upon peering as a "check-in" to + ## the agent, to indicate the node is now available to communicate + ## with. It is an agent-level equivalent of :zeek:see:`Broker::peer_added`, + ## and similar to :zeek:see:`Management::Agent::API::notify_agent_hello` + ## for agents. + ## + ## node: the name of the node, as given in :zeek:see:`Cluster::node`. + ## + global notify_node_hello: event(node: string); +} diff --git a/scripts/policy/frameworks/management/node/config.zeek b/scripts/policy/frameworks/management/node/config.zeek new file mode 100644 index 0000000000..d17fd663a1 --- /dev/null +++ b/scripts/policy/frameworks/management/node/config.zeek @@ -0,0 +1,9 @@ +##! Configuration settings for nodes controlled by the Management framework. + +module Management::Node; + +export { + ## The nodes' Broker topic. Cluster nodes automatically subscribe + ## to it, to receive request events from the Management framework. + const node_topic = "zeek/management/node" &redef; +} diff --git a/scripts/policy/frameworks/management/node/main.zeek b/scripts/policy/frameworks/management/node/main.zeek new file mode 100644 index 0000000000..52aa5f1e96 --- /dev/null +++ b/scripts/policy/frameworks/management/node/main.zeek @@ -0,0 +1,110 @@ +##! This module provides Management framework functionality present in every +##! cluster node, to allowing Management agents to interact with the nodes. + +@load base/frameworks/cluster + +@load policy/frameworks/management/agent/config +@load policy/frameworks/management/log + +@load ./api +@load ./config + +module Management::Node; + +# Tag our logs correctly +redef Management::Log::role = Management::NODE; + +## The type of dispatch callbacks. These implement a particular dispatch action, +## using the provided string vector as arguments, filling results into the +## provided result record. +type DispatchCallback: function(args: vector of string, res: Management::Result); + +## Implementation of the "get_id_value" dispatch. Its only argument is the name +## of the ID to look up. +function dispatch_get_id_value(args: vector of string, res: Management::Result) + { + if ( |args| == 0 ) + { + res$success = F; + res$error = "get_id_value expects name of global identifier"; + return; + } + + local val = lookup_ID(args[0]); + + # The following lookup_ID() result strings indicate errors: + if ( type_name(val) == "string" ) + { + local valstr: string = val; + if ( valstr == "" || valstr == "" ) + { + res$success = F; + res$error = valstr[1:-1]; + } + } + + if ( res$success ) + res$data = to_json(val); + } + +global g_dispatch_table: table[string] of DispatchCallback = { + ["get_id_value"] = dispatch_get_id_value, +}; + +event Management::Node::API::node_dispatch_request(reqid: string, action: vector of string, nodes: set[string]) + { + Management::Log::info(fmt("rx Management::Node::API::node_dispatch_request %s %s %s", reqid, action, nodes)); + + if ( |nodes| > 0 && Cluster::node !in nodes ) + { + Management::Log::debug(fmt( + "dispatch %s not targeting this node (%s !in %s), skipping", + reqid, Cluster::node, nodes)); + return; + } + + local res = Management::Result($reqid = reqid, $node = Cluster::node); + + if ( |action| == 0 ) + { + res$success = F; + res$error = "no dispatch arguments provided"; + } + else if ( action[0] !in g_dispatch_table ) + { + res$success = F; + res$error = fmt("dispatch %s unknown", action[0]); + } + + if ( ! res$success ) + { + Management::Log::info(fmt("tx Management::Node::API::node_dispatch_response %s", + Management::result_to_string(res))); + Broker::publish(node_topic, Management::Node::API::node_dispatch_response, reqid, res); + return; + } + + g_dispatch_table[action[0]](action[1:], res); + + Management::Log::info(fmt("tx Management::Node::API::node_dispatch_response %s", + Management::result_to_string(res))); + Broker::publish(node_topic, Management::Node::API::node_dispatch_response, reqid, res); + } + +event Broker::peer_added(peer: Broker::EndpointInfo, msg: string) + { + local epi = Management::Agent::endpoint_info(); + + # If this is the agent peering, notify it that we're ready + if ( peer$network$address == epi$network$address && + peer$network$bound_port == epi$network$bound_port ) + Broker::publish(node_topic, Management::Node::API::notify_node_hello, Cluster::node); + } + +event zeek_init() + { + local epi = Management::Agent::endpoint_info(); + + Broker::peer(epi$network$address, epi$network$bound_port, Management::connect_retry); + Broker::subscribe(node_topic); + } diff --git a/scripts/policy/frameworks/cluster/controller/request.zeek b/scripts/policy/frameworks/management/request.zeek similarity index 61% rename from scripts/policy/frameworks/cluster/controller/request.zeek rename to scripts/policy/frameworks/management/request.zeek index 202a615e6b..82a4de3648 100644 --- a/scripts/policy/frameworks/cluster/controller/request.zeek +++ b/scripts/policy/frameworks/management/request.zeek @@ -1,14 +1,19 @@ -##! This module implements a request state abstraction that both cluster -##! controller and agent use to tie responses to received request events and be -##! able to time-out such requests. +##! This module implements a request state abstraction in the Management +##! framework that both controller and agent use to connect request events to +##! subsequent response ones, and to be able to time out such requests. -@load ./types @load ./config +@load ./types -module ClusterController::Request; +module Management::Request; export { - ## Request records track each request's state. + ## Request records track state associated with a request/response event + ## pair. Calls to + ## :zeek:see:`Management::Request::create` establish such state + ## when an entity sends off a request event, while + ## :zeek:see:`Management::Request::finish` clears the state when + ## a corresponding response event comes in, or the state times out. type Request: record { ## Each request has a hopfully unique ID provided by the requester. id: string; @@ -18,39 +23,22 @@ export { ## received by the client), this specifies that original, "parent" ## request. parent_id: string &optional; - }; - # API-specific state. XXX we may be able to generalize after this has - # settled a bit more. It would also be nice to move request-specific - # state out of this module -- we could for example redef Request in - # main.zeek as needed. + ## The results vector builds up the list of results we eventually + ## send to the requestor when we have processed the request. + results: Management::ResultVec &default=vector(); - # State specific to the set_configuration request/response events - type SetConfigurationState: record { - config: ClusterController::Types::Configuration; - requests: vector of Request &default=vector(); - }; - - # State specific to supervisor interactions - type SupervisorState: record { - node: string; - }; - - # State for testing events - type TestState: record { - }; - - # The redef is a workaround so we can use the Request type - # while it is still being defined. - redef record Request += { - results: ClusterController::Types::ResultVec &default=vector(); + ## An internal flag to track whether a request is complete. finished: bool &default=F; - - set_configuration_state: SetConfigurationState &optional; - supervisor_state: SupervisorState &optional; - test_state: TestState &optional; }; + ## The timeout for request state. Such state (see the :zeek:see:`Management::Request` + ## module) ties together request and response event pairs. The timeout causes + ## its cleanup in the absence of a timely response. It applies both to + ## state kept for client requests, as well as state in the agents for + ## requests to the supervisor. + const timeout_interval = 10sec &redef; + ## A token request that serves as a null/nonexistant request. global null_req = Request($id="", $finished=T); @@ -61,7 +49,7 @@ export { global create: function(reqid: string &default=unique_id("")): Request; ## This function looks up the request for a given request ID and returns - ## it. When no such request exists, returns ClusterController::Request::null_req. + ## it. When no such request exists, returns Management::Request::null_req. ## ## reqid: the ID of the request state to retrieve. ## @@ -76,8 +64,8 @@ export { global finish: function(reqid: string): bool; ## This event fires when a request times out (as per the - ## ClusterController::request_timeout) before it has been finished via - ## ClusterController::Request::finish(). + ## Management::Request::timeout_interval) before it has been finished via + ## Management::Request::finish(). ## ## req: the request state that is expiring. ## @@ -101,17 +89,20 @@ export { function requests_expire_func(reqs: table[string] of Request, reqid: string): interval { - event ClusterController::Request::request_expired(reqs[reqid]); + # No need to flag request expiration when we've already internally marked + # the request as done. + if ( ! reqs[reqid]$finished ) + event Management::Request::request_expired(reqs[reqid]); + return 0secs; } # This is the global request-tracking table. The table maps from request ID # strings to corresponding Request records. Entries time out after the -# ClusterController::request_timeout interval. Upon expiration, a -# request_expired event triggers that conveys the request state. +# Management::Request::timeout_interval. Upon expiration, a request_expired +# event triggers that conveys the request state. global g_requests: table[string] of Request - &create_expire=ClusterController::request_timeout - &expire_func=requests_expire_func; + &create_expire=timeout_interval &expire_func=requests_expire_func; function create(reqid: string): Request { @@ -152,7 +143,7 @@ function is_null(request: Request): bool function to_string(request: Request): string { local results: string_vec; - local res: ClusterController::Types::Result; + local res: Management::Result; local parent_id = ""; if ( request?$parent_id ) @@ -161,7 +152,7 @@ function to_string(request: Request): string for ( idx in request$results ) { res = request$results[idx]; - results[|results|] = ClusterController::Types::result_to_string(res); + results[|results|] = Management::result_to_string(res); } return fmt("[request %s%s %s, results: %s]", request$id, parent_id, diff --git a/scripts/policy/frameworks/cluster/controller/types.zeek b/scripts/policy/frameworks/management/types.zeek similarity index 62% rename from scripts/policy/frameworks/cluster/controller/types.zeek rename to scripts/policy/frameworks/management/types.zeek index 9d7bc82e3c..6d89fbda1a 100644 --- a/scripts/policy/frameworks/cluster/controller/types.zeek +++ b/scripts/policy/frameworks/management/types.zeek @@ -1,17 +1,18 @@ -##! This module holds the basic types needed for the Cluster Controller -##! framework. These are used by both agent and controller, and several -##! have corresponding equals in the zeek-client implementation. +##! This module holds the basic types needed for the Management framework. These +##! are used by both cluster agent and controller, and several have corresponding +##! implementations in zeek-client. -module ClusterController::Types; +module Management; export { ## Management infrastructure node type. This intentionally does not - ## include the data cluster node types (worker, logger, etc) -- those + ## include the managed cluster node types (worker, logger, etc) -- those ## continue to be managed by the cluster framework. type Role: enum { - NONE, - AGENT, - CONTROLLER, + NONE, ##< No active role in cluster management + AGENT, ##< A cluster management agent. + CONTROLLER, ##< The cluster's controller. + NODE, ##< A managed cluster node (worker, manager, etc). }; ## A Zeek-side option with value. @@ -35,22 +36,25 @@ export { type InstanceVec: vector of Instance; ## State that a Cluster Node can be in. State changes trigger an - ## API notification (see notify_change()). + ## API notification (see notify_change()). The Pending state corresponds + ## to the Supervisor not yet reporting a PID for a node when it has not + ## yet fully launched. type State: enum { - Running, ##< Running and operating normally - Stopped, ##< Explicitly stopped - Failed, ##< Failed to start; and permanently halted - Crashed, ##< Crashed, will be restarted, - Unknown, ##< State not known currently (e.g., because of lost connectivity) + PENDING, ##< Not yet running + RUNNING, ##< Running and operating normally + STOPPED, ##< Explicitly stopped + FAILED, ##< Failed to start; and permanently halted + CRASHED, ##< Crashed, will be restarted, + UNKNOWN, ##< State not known currently (e.g., because of lost connectivity) }; ## Configuration describing a Cluster Node process. type Node: record { name: string; ##< Cluster-unique, human-readable node name instance: string; ##< Name of instance where node is to run - p: port; ##< Port on which this node will listen role: Supervisor::ClusterRole; ##< Role of the node. state: State; ##< Desired, or current, run state. + p: port &optional; ##< Port on which this node will listen scripts: vector of string &optional; ##< Additional Zeek scripts for node options: set[Option] &optional; ##< Zeek options for node interface: string &optional; ##< Interface to sniff @@ -61,7 +65,6 @@ export { ## Data structure capturing a cluster's complete configuration. type Configuration: record { id: string &default=unique_id(""); ##< Unique identifier for a particular configuration - ## The instances in the cluster. instances: set[Instance] &default=set(); @@ -69,6 +72,26 @@ export { nodes: set[Node] &default=set(); }; + ## The status of a Supervisor-managed node, as reported to the client in + ## a get_nodes_request/get_nodes_response transaction. + type NodeStatus: record { + ## Cluster-unique, human-readable node name + node: string; + ## Current run state of the node. + state: State; + ## Role the node plays in cluster management. + mgmt_role: Role &default=NONE; + ## Role the node plays in the data cluster. + cluster_role: Supervisor::ClusterRole &default=Supervisor::NONE; + ## Process ID of the node. This is optional because the Supervisor may not have + ## a PID when a node is still bootstrapping. + pid: int &optional; + ## The node's Broker peering listening port, if any. + p: port &optional; + }; + + type NodeStatusVec: vector of NodeStatus; + ## Return value for request-response API event pairs type Result: record { reqid: string; ##< Request ID of operation this result refers to @@ -81,6 +104,8 @@ export { type ResultVec: vector of Result; + ## Given a :zeek:see:`Management::Result` record, + ## this function returns a string summarizing it. global result_to_string: function(res: Result): string; } diff --git a/scripts/policy/frameworks/cluster/controller/util.zeek b/scripts/policy/frameworks/management/util.zeek similarity index 79% rename from scripts/policy/frameworks/cluster/controller/util.zeek rename to scripts/policy/frameworks/management/util.zeek index 0329438f2f..98d79c8f9e 100644 --- a/scripts/policy/frameworks/cluster/controller/util.zeek +++ b/scripts/policy/frameworks/management/util.zeek @@ -1,7 +1,7 @@ -##! Utility functions for the cluster controller framework, available to agent +##! Utility functions for the Management framework, available to agent ##! and controller. -module ClusterController::Util; +module Management::Util; export { ## Renders a set of strings to an alphabetically sorted vector. diff --git a/scripts/policy/protocols/ssl/decryption.zeek b/scripts/policy/protocols/ssl/decryption.zeek new file mode 100644 index 0000000000..b9be06ca89 --- /dev/null +++ b/scripts/policy/protocols/ssl/decryption.zeek @@ -0,0 +1,111 @@ +##! This script allows for the decryption of certain TLS 1.2 connections, if the user is in possession +##! of the private key material for the session. Key material can either be provided via a file (useful +##! for processing trace files) or via sending events via Broker (for live decoding). +##! +##! Please note that this feature is experimental and can change without guarantees to our typical +##! deprecation timeline. Please also note that currently only TLS 1.2 connections that use the +##! TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 cipher suite are supported. + +@load base/frameworks/input +@load base/frameworks/notice +@load base/protocols/conn +@load base/protocols/ssl + +module SSL; + +# Do not disable analyzers after detection - otherwise we will not receive +# encrypted packets. +redef SSL::disable_analyzer_after_detection = F; + +export { + ## This can be set to a file that contains the session secrets for decryption, when parsing a pcap file. + ## Please note that, when using this feature, you probably want to pause processing of data till this + ## file has been read. + const keylog_file = getenv("ZEEK_TLS_KEYLOG_FILE") &redef; + + ## Secrets expire after this time of not being used. + const secret_expiration = 5 mins &redef; + + ## This event can be triggered, e.g., via Broker to add known keys to the TLS key database. + ## + ## client_random: client random for which the key is set + ## + ## keys: key material + global add_keys: event(client_random: string, keys: string); + + ## This event can be triggered, e.g., via Broker to add known secrets to the TLS secret datbase. + ## + ## client_random: client random for which the secret is set + ## + ## secrets: derived TLS secrets material + global add_secret: event(client_random: string, secrets: string); +} + +@if ( keylog_file == "" ) +# If a keylog file was given via an environment variable, let's disable secret expiration - that does not +# make sense for pcaps. +global secrets: table[string] of string = {} &redef; +global keys: table[string] of string = {} &redef; +@else +global secrets: table[string] of string = {} &read_expire=secret_expiration &redef; +global keys: table[string] of string = {} &read_expire=secret_expiration &redef; +@endif + + +redef record SSL::Info += { + # Decryption uses client_random as identifier + client_random: string &optional; +}; + +type SecretsIdx: record { + client_random: string; +}; + +type SecretsVal: record { + secret: string; +}; + +const tls_decrypt_stream_name = "tls-keylog-file"; + +event zeek_init() + { + # listen for secrets + Broker::subscribe("/zeek/tls/decryption"); + + if ( keylog_file != "" ) + { + Input::add_table([$name=tls_decrypt_stream_name, $source=keylog_file, $destination=secrets, $idx=SecretsIdx, $val=SecretsVal, $want_record=F]); + Input::remove(tls_decrypt_stream_name); + } + } + +event SSL::add_keys(client_random: string, val: string) + { + SSL::keys[client_random] = val; + } + +event SSL::add_secret(client_random: string, val: string) + { + SSL::secrets[client_random] = val; + } + +event ssl_client_hello(c: connection, version: count, record_version: count, possible_ts: time, client_random: string, session_id: string, ciphers: index_vec, comp_methods: index_vec) + { + c$ssl$client_random = client_random; + + if ( client_random in keys ) + set_keys(c, keys[client_random]); + else if ( client_random in secrets ) + set_secret(c, secrets[client_random]); + } + +event ssl_change_cipher_spec(c: connection, is_orig: bool) + { + if ( c$ssl?$client_random ) + { + if ( c$ssl$client_random in keys ) + set_keys(c, keys[c$ssl$client_random]); + else if ( c$ssl$client_random in secrets ) + set_secret(c, secrets[c$ssl$client_random]); + } + } diff --git a/scripts/policy/protocols/ssl/dpd-v2.sig b/scripts/policy/protocols/ssl/dpd-v2.sig new file mode 100644 index 0000000000..66f44441c6 --- /dev/null +++ b/scripts/policy/protocols/ssl/dpd-v2.sig @@ -0,0 +1,17 @@ +# This signature can be used to enable DPD for SSL version 2. +# Note that SSLv2 is basically unused by now. Due to the structure of the protocol, it also is sometimes +# hard to disambiguate it from random noise - so you will probably always get a few false positives. + +signature dpd_ssl_server { + ip-proto == tcp + payload /^...?\x04..\x00\x02.*/ + requires-reverse-signature dpd_ssl_client + tcp-state responder + enable "ssl" +} + +signature dpd_ssl_client { + ip-proto == tcp + payload /^...?\x01[\x00\x03][\x00\x01\x02\x03\x04].*/ + tcp-state originator +} diff --git a/scripts/test-all-policy.zeek b/scripts/test-all-policy.zeek index bdc052d301..0f2de90609 100644 --- a/scripts/test-all-policy.zeek +++ b/scripts/test-all-policy.zeek @@ -11,20 +11,26 @@ # @load frameworks/control/controllee.zeek # @load frameworks/control/controller.zeek -@load frameworks/cluster/agent/__load__.zeek -@load frameworks/cluster/agent/api.zeek -@load frameworks/cluster/agent/boot.zeek -@load frameworks/cluster/agent/config.zeek -# @load frameworks/cluster/agent/main.zeek -@load frameworks/cluster/controller/__load__.zeek -@load frameworks/cluster/controller/api.zeek -@load frameworks/cluster/controller/boot.zeek -@load frameworks/cluster/controller/config.zeek -@load frameworks/cluster/controller/log.zeek -# @load frameworks/cluster/controller/main.zeek -@load frameworks/cluster/controller/request.zeek -@load frameworks/cluster/controller/types.zeek -@load frameworks/cluster/controller/util.zeek +@load frameworks/management/agent/__load__.zeek +@load frameworks/management/agent/api.zeek +@load frameworks/management/agent/boot.zeek +@load frameworks/management/agent/config.zeek +# @load frameworks/management/agent/main.zeek +@load frameworks/management/controller/__load__.zeek +@load frameworks/management/controller/api.zeek +@load frameworks/management/controller/boot.zeek +@load frameworks/management/controller/config.zeek +# @load frameworks/management/controller/main.zeek +@load frameworks/management/__load__.zeek +@load frameworks/management/config.zeek +@load frameworks/management/log.zeek +# @load frameworks/management/node/__load__.zeek +@load frameworks/management/node/api.zeek +@load frameworks/management/node/config.zeek +# @load frameworks/management/node/main.zeek +@load frameworks/management/request.zeek +@load frameworks/management/types.zeek +@load frameworks/management/util.zeek @load frameworks/dpd/detect-protocols.zeek @load frameworks/dpd/packet-segment-logging.zeek @load frameworks/intel/do_notice.zeek @@ -116,6 +122,7 @@ @load protocols/ssh/geo-data.zeek @load protocols/ssh/interesting-hostnames.zeek @load protocols/ssh/software.zeek +@load protocols/ssl/decryption.zeek @load protocols/ssl/expiring-certs.zeek # @load protocols/ssl/extract-certs-pem.zeek @load protocols/ssl/heartbleed.zeek diff --git a/scripts/zeekygen/__load__.zeek b/scripts/zeekygen/__load__.zeek index 75a5f7a666..39314a04ac 100644 --- a/scripts/zeekygen/__load__.zeek +++ b/scripts/zeekygen/__load__.zeek @@ -1,11 +1,14 @@ @load test-all-policy.zeek # Scripts which are commented out in test-all-policy.zeek. +@load protocols/ssl/decryption.zeek @load protocols/ssl/notary.zeek @load frameworks/control/controllee.zeek @load frameworks/control/controller.zeek -@load frameworks/cluster/agent/main.zeek -@load frameworks/cluster/controller/main.zeek +@load frameworks/management/agent/main.zeek +@load frameworks/management/controller/main.zeek +@load frameworks/management/node/__load__.zeek +@load frameworks/management/node/main.zeek @load frameworks/files/extract-all-files.zeek @load policy/misc/dump-events.zeek @load policy/protocols/conn/speculative-service.zeek diff --git a/src/3rdparty b/src/3rdparty index cb626c94f6..6cbb3d6587 160000 --- a/src/3rdparty +++ b/src/3rdparty @@ -1 +1 @@ -Subproject commit cb626c94f67e0ac0437beba076da1184eb1f8ad7 +Subproject commit 6cbb3d65877f80326c047364583f506ce58758ba diff --git a/src/Attr.cc b/src/Attr.cc index b0d2947a00..da28845fe9 100644 --- a/src/Attr.cc +++ b/src/Attr.cc @@ -192,6 +192,9 @@ void Attributes::AddAttr(AttrPtr attr, bool is_redef) { auto acceptable_duplicate_attr = [](const AttrPtr& attr, const AttrPtr& existing) -> bool { + if ( attr == existing ) + return true; + AttrTag new_tag = attr->Tag(); if ( new_tag == ATTR_DEPRECATED ) @@ -341,116 +344,12 @@ void Attributes::CheckAttr(Attr* a) case ATTR_DEFAULT: { - // &default is allowed for global tables, since it's used in initialization - // of table fields. it's not allowed otherwise. - if ( global_var && ! type->IsTable() ) - { - Error("&default is not valid for global variables except for tables"); - break; - } - - const auto& atype = a->GetExpr()->GetType(); - - if ( type->Tag() != TYPE_TABLE || (type->IsSet() && ! in_record) ) - { - if ( same_type(atype, type) ) - // Ok. - break; - - // Record defaults may be promotable. - if ( (type->Tag() == TYPE_RECORD && atype->Tag() == TYPE_RECORD && - record_promotion_compatible(atype->AsRecordType(), type->AsRecordType())) ) - // Ok. - break; - - if ( type->Tag() == TYPE_TABLE && type->AsTableType()->IsUnspecifiedTable() ) - // Ok. - break; - - auto e = check_and_promote_expr(a->GetExpr(), type); - - if ( e ) - { - a->SetAttrExpr(std::move(e)); - // Ok. - break; - } - - a->GetExpr()->Error("&default value has inconsistent type", type.get()); - return; - } - - TableType* tt = type->AsTableType(); - const auto& ytype = tt->Yield(); - - if ( ! in_record ) - { - // &default applies to the type itself. - if ( ! same_type(atype, ytype) ) - { - // It can still be a default function. - if ( atype->Tag() == TYPE_FUNC ) - { - FuncType* f = atype->AsFuncType(); - if ( ! f->CheckArgs(tt->GetIndexTypes()) || ! same_type(f->Yield(), ytype) ) - Error("&default function type clash"); - - // Ok. - break; - } - - // Table defaults may be promotable. - if ( (ytype->Tag() == TYPE_RECORD && atype->Tag() == TYPE_RECORD && - record_promotion_compatible(atype->AsRecordType(), - ytype->AsRecordType())) ) - // Ok. - break; - - auto e = check_and_promote_expr(a->GetExpr(), ytype); - - if ( e ) - { - a->SetAttrExpr(std::move(e)); - // Ok. - break; - } - - Error("&default value has inconsistent type 2"); - } - - // Ok. - break; - } - - else - { - // &default applies to record field. - - if ( same_type(atype, type) ) - // Ok. - break; - - if ( (atype->Tag() == TYPE_TABLE && atype->AsTableType()->IsUnspecifiedTable()) ) - { - auto e = check_and_promote_expr(a->GetExpr(), type); - - if ( e ) - { - a->SetAttrExpr(std::move(e)); - break; - } - } - - // Table defaults may be promotable. - if ( ytype && ytype->Tag() == TYPE_RECORD && atype->Tag() == TYPE_RECORD && - record_promotion_compatible(atype->AsRecordType(), ytype->AsRecordType()) ) - // Ok. - break; - - Error("&default value has inconsistent type"); - } - } + std::string err_msg; + if ( ! check_default_attr(a, type, global_var, in_record, err_msg) && + ! err_msg.empty() ) + Error(err_msg.c_str()); break; + } case ATTR_EXPIRE_READ: { @@ -748,4 +647,113 @@ bool Attributes::operator==(const Attributes& other) const return true; } +bool check_default_attr(Attr* a, const TypePtr& type, bool global_var, bool in_record, + std::string& err_msg) + { + // &default is allowed for global tables, since it's used in + // initialization of table fields. It's not allowed otherwise. + if ( global_var && ! type->IsTable() ) + { + err_msg = "&default is not valid for global variables except for tables"; + return false; + } + + const auto& atype = a->GetExpr()->GetType(); + + if ( type->Tag() != TYPE_TABLE || (type->IsSet() && ! in_record) ) + { + if ( same_type(atype, type) ) + // Ok. + return true; + + // Record defaults may be promotable. + if ( (type->Tag() == TYPE_RECORD && atype->Tag() == TYPE_RECORD && + record_promotion_compatible(atype->AsRecordType(), type->AsRecordType())) ) + // Ok. + return true; + + if ( type->Tag() == TYPE_TABLE && type->AsTableType()->IsUnspecifiedTable() ) + // Ok. + return true; + + auto e = check_and_promote_expr(a->GetExpr(), type); + + if ( e ) + { + a->SetAttrExpr(std::move(e)); + // Ok. + return true; + } + + a->GetExpr()->Error("&default value has inconsistent type", type.get()); + return false; + } + + TableType* tt = type->AsTableType(); + const auto& ytype = tt->Yield(); + + if ( ! in_record ) + { // &default applies to the type itself. + if ( same_type(atype, ytype) ) + return true; + + // It can still be a default function. + if ( atype->Tag() == TYPE_FUNC ) + { + FuncType* f = atype->AsFuncType(); + if ( ! f->CheckArgs(tt->GetIndexTypes()) || ! same_type(f->Yield(), ytype) ) + { + err_msg = "&default function type clash"; + return false; + } + + // Ok. + return true; + } + + // Table defaults may be promotable. + if ( (ytype->Tag() == TYPE_RECORD && atype->Tag() == TYPE_RECORD && + record_promotion_compatible(atype->AsRecordType(), ytype->AsRecordType())) ) + // Ok. + return true; + + auto e = check_and_promote_expr(a->GetExpr(), ytype); + + if ( e ) + { + a->SetAttrExpr(std::move(e)); + // Ok. + return true; + } + + err_msg = "&default value has inconsistent type"; + return false; + } + + // &default applies to record field. + + if ( same_type(atype, type) ) + return true; + + if ( (atype->Tag() == TYPE_TABLE && atype->AsTableType()->IsUnspecifiedTable()) ) + { + auto e = check_and_promote_expr(a->GetExpr(), type); + + if ( e ) + { + a->SetAttrExpr(std::move(e)); + return true; + } + } + + // Table defaults may be promotable. + if ( ytype && ytype->Tag() == TYPE_RECORD && atype->Tag() == TYPE_RECORD && + record_promotion_compatible(atype->AsRecordType(), ytype->AsRecordType()) ) + // Ok. + return true; + + err_msg = "&default value has inconsistent type"; + return false; + } + } diff --git a/src/Attr.h b/src/Attr.h index 0df396c2d9..6bc37ea726 100644 --- a/src/Attr.h +++ b/src/Attr.h @@ -139,5 +139,16 @@ protected: bool global_var; }; +// Checks whether default attribute "a" is compatible with the given type. +// "global_var" specifies whether the attribute is being associated with +// a global variable, and "in_record" whether it's occurring inside of +// a record declaration. +// +// Returns true on compatibility (which might include modifying "a"), false +// on an error. If an error message hasn't been directly generated, then +// it will be returned in err_msg. +extern bool check_default_attr(Attr* a, const TypePtr& type, bool global_var, bool in_record, + std::string& err_msg); + } // namespace detail } // namespace zeek diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d8a82b92bb..f5158bcb50 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -150,6 +150,15 @@ list(APPEND BINPAC_OUTPUTS "${BINPAC_OUTPUT_CC}") binpac_target(binpac_zeek-lib.pac) list(APPEND BINPAC_OUTPUTS "${BINPAC_OUTPUT_CC}") +######################################################################## +## Gen-ZAM setup + +include(Gen-ZAM) + +set(GEN_ZAM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/Ops.in) + +gen_zam_target(${GEN_ZAM_SRC}) + ######################################################################## ## Including subdirectories. ######################################################################## @@ -248,37 +257,7 @@ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/DebugCmdConstants.h set(_gen_zeek_script_cpp ${CMAKE_CURRENT_BINARY_DIR}/../CPP-gen.cc) add_custom_command(OUTPUT ${_gen_zeek_script_cpp} - COMMAND ${CMAKE_COMMAND} -E touch ${_gen_zeek_script_cpp}) - -# define a command that's used to run the ZAM instruction generator; -# building the zeek binary depends on the outputs of this script -add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ZAM-AssignFlavorsDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Conds.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-DirectDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-EvalDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-EvalMacros.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC1.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC2.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC3.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsV.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsC1.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsC2.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsV.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDecls.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Op1FlavorsDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpSideEffects.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpsDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpsNamesDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Vec1EvalDefs.h - ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Vec2EvalDefs.h - COMMAND ${CMAKE_CURRENT_BINARY_DIR}/Gen-ZAM - ARGS ${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/Ops.in - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Gen-ZAM - ${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/Ops.in - COMMENT "[sh] Generating ZAM operations" - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} -) + COMMAND ${CMAKE_COMMAND} -E touch ${_gen_zeek_script_cpp}) set_source_files_properties(3rdparty/nb_dns.c PROPERTIES COMPILE_FLAGS -fno-strict-aliasing) @@ -307,12 +286,14 @@ set(MAIN_SRCS Desc.cc Dict.cc Discard.cc + DNS_Mapping.cc DNS_Mgr.cc EquivClass.cc Event.cc EventHandler.cc EventLauncher.cc EventRegistry.cc + EventTrace.cc Expr.cc File.cc Flare.cc @@ -445,10 +426,6 @@ set(THIRD_PARTY_SRCS 3rdparty/strsep.c ) -set(GEN_ZAM_SRCS - script_opt/ZAM/Gen-ZAM.cc -) - # Highwayhash. Highwayhash is a bit special since it has architecture dependent code... set(HH_SRCS @@ -504,6 +481,8 @@ set(zeek_SRCS ${BIF_SRCS} ${BINPAC_AUXSRC} ${BINPAC_OUTPUTS} + ${GEN_ZAM_SRC} + ${GEN_ZAM_OUTPUT_H} ${TRANSFORMED_BISON_OUTPUTS} ${FLEX_RuleScanner_OUTPUTS} ${FLEX_RuleScanner_INPUT} @@ -522,7 +501,6 @@ set(zeek_SRCS ) collect_headers(zeek_HEADERS ${zeek_SRCS}) -collect_headers(GEN_ZAM_HEADERS ${GEN_ZAM_SRCS}) add_library(zeek_objs OBJECT ${zeek_SRCS}) @@ -538,8 +516,6 @@ set_target_properties(zeek PROPERTIES ENABLE_EXPORTS TRUE) install(TARGETS zeek DESTINATION bin) -add_executable(Gen-ZAM ${GEN_ZAM_SRCS} ${GEN_ZAM_HEADERS}) - # Install wrapper script for Bro-to-Zeek renaming. include(InstallSymlink) InstallSymlink("${CMAKE_INSTALL_PREFIX}/bin/zeek-wrapper" "${CMAKE_INSTALL_PREFIX}/bin/bro") @@ -600,6 +576,10 @@ install(CODE " ) ") +# Make sure to escape a bunch of special characters in the path before trying to use it as a +# regular expression below. +string(REGEX REPLACE "([][+.*()^])" "\\\\\\1" escaped_path "${CMAKE_CURRENT_SOURCE_DIR}/zeek") + install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ DESTINATION include/zeek FILES_MATCHING @@ -607,7 +587,7 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ PATTERN "*.pac" PATTERN "3rdparty/*" EXCLUDE # The "zeek -> ." symlink isn't needed in the install-tree - REGEX "^${CMAKE_CURRENT_SOURCE_DIR}/zeek$" EXCLUDE + REGEX "^${escaped_path}$" EXCLUDE ) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ diff --git a/src/CompHash.cc b/src/CompHash.cc index 56b626fa7f..e4ab1c536f 100644 --- a/src/CompHash.cc +++ b/src/CompHash.cc @@ -260,13 +260,16 @@ bool CompositeHash::RecoverOneVal(const HashKey& hk, Type* t, ValPtr* pval, bool { uint32_t id; hk.Read("func", id); - const auto& f = Func::GetFuncPtrByID(id); - if ( ! f ) + ASSERT(func_id_to_func != nullptr); + + if ( id >= func_id_to_func->size() ) reporter->InternalError("failed to look up unique function id %" PRIu32 " in CompositeHash::RecoverOneVal()", id); + const auto& f = func_id_to_func->at(id); + *pval = make_intrusive(f); const auto& pvt = (*pval)->GetType(); @@ -547,7 +550,31 @@ bool CompositeHash::SingleValHash(HashKey& hk, const Val* v, Type* bt, bool type switch ( v->GetType()->Tag() ) { case TYPE_FUNC: - hk.Write("func", v->AsFunc()->GetUniqueFuncID()); + { + auto f = v->AsFunc(); + + if ( ! func_to_func_id ) + const_cast(this)->BuildFuncMappings(); + + auto id_mapping = func_to_func_id->find(f); + uint32_t id; + + if ( id_mapping == func_to_func_id->end() ) + { + // We need the pointer to stick around + // for our lifetime, so we have to get + // a non-const version we can ref. + FuncPtr fptr = {NewRef{}, const_cast(f)}; + + id = func_id_to_func->size(); + func_id_to_func->push_back(std::move(fptr)); + func_to_func_id->insert_or_assign(f, id); + } + else + id = id_mapping->second; + + hk.Write("func", id); + } break; case TYPE_PATTERN: diff --git a/src/CompHash.h b/src/CompHash.h index cbbefe5a02..da51dc6116 100644 --- a/src/CompHash.h +++ b/src/CompHash.h @@ -4,7 +4,7 @@ #include -#include "zeek/IntrusivePtr.h" +#include "zeek/Func.h" #include "zeek/Type.h" namespace zeek @@ -61,6 +61,18 @@ protected: bool EnsureTypeReserve(HashKey& hk, const Val* v, Type* bt, bool type_check) const; + // The following are for allowing hashing of function values. + // These can occur, for example, in sets of predicates that get + // iterated over. We use pointers in order to keep storage + // lower for the common case of these not being needed. + std::unique_ptr> func_to_func_id; + std::unique_ptr> func_id_to_func; + void BuildFuncMappings() + { + func_to_func_id = std::make_unique>(); + func_id_to_func = std::make_unique>(); + } + TypeListPtr type; bool is_singleton = false; // if just one type in index }; diff --git a/src/DNS_Mapping.cc b/src/DNS_Mapping.cc new file mode 100644 index 0000000000..557b870552 --- /dev/null +++ b/src/DNS_Mapping.cc @@ -0,0 +1,428 @@ +#include "zeek/DNS_Mapping.h" + +#include + +#include "zeek/3rdparty/doctest.h" +#include "zeek/DNS_Mgr.h" +#include "zeek/Reporter.h" + +namespace zeek::detail + { + +DNS_Mapping::DNS_Mapping(std::string host, struct hostent* h, uint32_t ttl, int type) + { + Init(h); + req_host = host; + req_ttl = ttl; + req_type = type; + + if ( names.empty() ) + names.push_back(std::move(host)); + } + +DNS_Mapping::DNS_Mapping(const IPAddr& addr, struct hostent* h, uint32_t ttl) + { + Init(h); + req_addr = addr; + req_ttl = ttl; + req_type = T_PTR; + } + +DNS_Mapping::DNS_Mapping(FILE* f) + { + Clear(); + init_failed = true; + + req_ttl = 0; + creation_time = 0; + + char buf[512]; + + if ( ! fgets(buf, sizeof(buf), f) ) + { + no_mapping = true; + return; + } + + char req_buf[512 + 1], name_buf[512 + 1]; + int is_req_host; + int failed_local; + int num_addrs; + + if ( sscanf(buf, "%lf %d %512s %d %512s %d %d %" PRIu32, &creation_time, &is_req_host, req_buf, + &failed_local, name_buf, &req_type, &num_addrs, &req_ttl) != 8 ) + { + no_mapping = true; + return; + } + + failed = static_cast(failed_local); + + if ( is_req_host ) + req_host = req_buf; + else + req_addr = IPAddr(req_buf); + + names.push_back(name_buf); + + for ( int i = 0; i < num_addrs; ++i ) + { + if ( ! fgets(buf, sizeof(buf), f) ) + return; + + char* newline = strchr(buf, '\n'); + if ( newline ) + *newline = '\0'; + + addrs.emplace_back(IPAddr(buf)); + } + + init_failed = false; + } + +ListValPtr DNS_Mapping::Addrs() + { + if ( failed ) + return nullptr; + + if ( ! addrs_val ) + { + addrs_val = make_intrusive(TYPE_ADDR); + + for ( const auto& addr : addrs ) + addrs_val->Append(make_intrusive(addr)); + } + + return addrs_val; + } + +TableValPtr DNS_Mapping::AddrsSet() + { + auto l = Addrs(); + + if ( ! l || l->Length() == 0 ) + return DNS_Mgr::empty_addr_set(); + + return l->ToSetVal(); + } + +StringValPtr DNS_Mapping::Host() + { + if ( failed || names.empty() ) + return nullptr; + + if ( ! host_val ) + host_val = make_intrusive(names[0]); + + return host_val; + } + +void DNS_Mapping::Init(struct hostent* h) + { + no_mapping = false; + init_failed = false; + creation_time = util::current_time(); + host_val = nullptr; + addrs_val = nullptr; + + if ( ! h ) + { + Clear(); + return; + } + + if ( h->h_name ) + // for now, just use the official name + // TODO: this could easily be expanded to include all of the aliases as well + names.push_back(h->h_name); + + if ( h->h_addr_list ) + { + for ( int i = 0; h->h_addr_list[i] != NULL; ++i ) + { + if ( h->h_addrtype == AF_INET ) + addrs.push_back(IPAddr(IPv4, (uint32_t*)h->h_addr_list[i], IPAddr::Network)); + else if ( h->h_addrtype == AF_INET6 ) + addrs.push_back(IPAddr(IPv6, (uint32_t*)h->h_addr_list[i], IPAddr::Network)); + } + } + + failed = false; + } + +void DNS_Mapping::Clear() + { + names.clear(); + host_val = nullptr; + addrs.clear(); + addrs_val = nullptr; + no_mapping = false; + req_type = 0; + failed = true; + } + +void DNS_Mapping::Save(FILE* f) const + { + fprintf(f, "%.0f %d %s %d %s %d %zu %" PRIu32 "\n", creation_time, ! req_host.empty(), + req_host.empty() ? req_addr.AsString().c_str() : req_host.c_str(), failed, + names.empty() ? "*" : names[0].c_str(), req_type, addrs.size(), req_ttl); + + for ( const auto& addr : addrs ) + fprintf(f, "%s\n", addr.AsString().c_str()); + } + +void DNS_Mapping::Merge(const DNS_MappingPtr& other) + { + std::copy(other->names.begin(), other->names.end(), std::back_inserter(names)); + std::copy(other->addrs.begin(), other->addrs.end(), std::back_inserter(addrs)); + } + +// This value needs to be incremented if something changes in the data stored by Save(). This +// allows us to change the structure of the cache without breaking something in DNS_Mgr. +constexpr int FILE_VERSION = 1; + +void DNS_Mapping::InitializeCache(FILE* f) + { + fprintf(f, "%d\n", FILE_VERSION); + } + +bool DNS_Mapping::ValidateCacheVersion(FILE* f) + { + char buf[512]; + if ( ! fgets(buf, sizeof(buf), f) ) + return false; + + int version; + if ( sscanf(buf, "%d", &version) != 1 ) + { + reporter->Warning("Existing DNS cache did not have correct version, ignoring"); + return false; + } + + return FILE_VERSION == version; + } + +////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// + +TEST_CASE("dns_mapping init null hostent") + { + DNS_Mapping mapping("www.apple.com", nullptr, 123, T_A); + + CHECK(! mapping.Valid()); + CHECK(mapping.Addrs() == nullptr); + CHECK(mapping.AddrsSet()->EqualTo(DNS_Mgr::empty_addr_set())); + CHECK(mapping.Host() == nullptr); + } + +TEST_CASE("dns_mapping init host") + { + IPAddr addr("1.2.3.4"); + in4_addr in4; + addr.CopyIPv4(&in4); + + struct hostent he; + he.h_name = util::copy_string("testing.home"); + he.h_aliases = NULL; + he.h_addrtype = AF_INET; + he.h_length = sizeof(in_addr); + + std::vector addrs = {&in4, NULL}; + he.h_addr_list = reinterpret_cast(addrs.data()); + + DNS_Mapping mapping("testing.home", &he, 123, T_A); + CHECK(mapping.Valid()); + CHECK(mapping.ReqAddr() == IPAddr::v6_unspecified); + CHECK(strcmp(mapping.ReqHost(), "testing.home") == 0); + CHECK(mapping.ReqStr() == "testing.home"); + + auto lva = mapping.Addrs(); + REQUIRE(lva != nullptr); + CHECK(lva->Length() == 1); + auto lvae = lva->Idx(0)->AsAddrVal(); + REQUIRE(lvae != nullptr); + CHECK(lvae->Get().AsString() == "1.2.3.4"); + + auto tvas = mapping.AddrsSet(); + REQUIRE(tvas != nullptr); + CHECK_FALSE(tvas->EqualTo(DNS_Mgr::empty_addr_set())); + + auto svh = mapping.Host(); + REQUIRE(svh != nullptr); + CHECK(svh->ToStdString() == "testing.home"); + + delete[] he.h_name; + } + +TEST_CASE("dns_mapping init addr") + { + IPAddr addr("1.2.3.4"); + in4_addr in4; + addr.CopyIPv4(&in4); + + struct hostent he; + he.h_name = util::copy_string("testing.home"); + he.h_aliases = NULL; + he.h_addrtype = AF_INET; + he.h_length = sizeof(in_addr); + + std::vector addrs = {&in4, NULL}; + he.h_addr_list = reinterpret_cast(addrs.data()); + + DNS_Mapping mapping(addr, &he, 123); + CHECK(mapping.Valid()); + CHECK(mapping.ReqAddr() == addr); + CHECK(mapping.ReqHost() == nullptr); + CHECK(mapping.ReqStr() == "1.2.3.4"); + + auto lva = mapping.Addrs(); + REQUIRE(lva != nullptr); + CHECK(lva->Length() == 1); + auto lvae = lva->Idx(0)->AsAddrVal(); + REQUIRE(lvae != nullptr); + CHECK(lvae->Get().AsString() == "1.2.3.4"); + + auto tvas = mapping.AddrsSet(); + REQUIRE(tvas != nullptr); + CHECK_FALSE(tvas->EqualTo(DNS_Mgr::empty_addr_set())); + + auto svh = mapping.Host(); + REQUIRE(svh != nullptr); + CHECK(svh->ToStdString() == "testing.home"); + + delete[] he.h_name; + } + +TEST_CASE("dns_mapping save reload") + { + IPAddr addr("1.2.3.4"); + in4_addr in4; + addr.CopyIPv4(&in4); + + struct hostent he; + he.h_name = util::copy_string("testing.home"); + he.h_aliases = NULL; + he.h_addrtype = AF_INET; + he.h_length = sizeof(in_addr); + + std::vector addrs = {&in4, NULL}; + he.h_addr_list = reinterpret_cast(addrs.data()); + + // Create a temporary file in memory and fseek to the end of it so we're at + // EOF for the next bit. + char buffer[4096]; + memset(buffer, 0, 4096); + FILE* tmpfile = fmemopen(buffer, 4096, "r+"); + fseek(tmpfile, 0, SEEK_END); + + // Try loading from the file at EOF. This should cause a mapping failure. + DNS_Mapping mapping(tmpfile); + CHECK(mapping.NoMapping()); + rewind(tmpfile); + + // Try reading from the empty file. This should cause an init failure. + DNS_Mapping mapping2(tmpfile); + CHECK(mapping2.InitFailed()); + rewind(tmpfile); + + // Save a valid mapping into the file and rewind to the start. + DNS_Mapping mapping3(addr, &he, 123); + mapping3.Save(tmpfile); + rewind(tmpfile); + + // Test loading the mapping back out of the file + DNS_Mapping mapping4(tmpfile); + fclose(tmpfile); + CHECK(mapping4.Valid()); + CHECK(mapping4.ReqAddr() == addr); + CHECK(mapping4.ReqHost() == nullptr); + CHECK(mapping4.ReqStr() == "1.2.3.4"); + + auto lva = mapping4.Addrs(); + REQUIRE(lva != nullptr); + CHECK(lva->Length() == 1); + auto lvae = lva->Idx(0)->AsAddrVal(); + REQUIRE(lvae != nullptr); + CHECK(lvae->Get().AsString() == "1.2.3.4"); + + auto tvas = mapping4.AddrsSet(); + REQUIRE(tvas != nullptr); + CHECK(tvas != DNS_Mgr::empty_addr_set()); + + auto svh = mapping4.Host(); + REQUIRE(svh != nullptr); + CHECK(svh->ToStdString() == "testing.home"); + + delete[] he.h_name; + } + +TEST_CASE("dns_mapping multiple addresses") + { + IPAddr addr("1.2.3.4"); + in4_addr in4_1; + addr.CopyIPv4(&in4_1); + + IPAddr addr2("5.6.7.8"); + in4_addr in4_2; + addr2.CopyIPv4(&in4_2); + + struct hostent he; + he.h_name = util::copy_string("testing.home"); + he.h_aliases = NULL; + he.h_addrtype = AF_INET; + he.h_length = sizeof(in_addr); + + std::vector addrs = {&in4_1, &in4_2, NULL}; + he.h_addr_list = reinterpret_cast(addrs.data()); + + DNS_Mapping mapping("testing.home", &he, 123, T_A); + CHECK(mapping.Valid()); + + auto lva = mapping.Addrs(); + REQUIRE(lva != nullptr); + CHECK(lva->Length() == 2); + + auto lvae = lva->Idx(0)->AsAddrVal(); + REQUIRE(lvae != nullptr); + CHECK(lvae->Get().AsString() == "1.2.3.4"); + + lvae = lva->Idx(1)->AsAddrVal(); + REQUIRE(lvae != nullptr); + CHECK(lvae->Get().AsString() == "5.6.7.8"); + + delete[] he.h_name; + } + +TEST_CASE("dns_mapping ipv6") + { + IPAddr addr("64:ff9b:1::"); + in6_addr in6; + addr.CopyIPv6(&in6); + + struct hostent he; + he.h_name = util::copy_string("testing.home"); + he.h_aliases = NULL; + he.h_addrtype = AF_INET6; + he.h_length = sizeof(in6_addr); + + std::vector addrs = {&in6, NULL}; + he.h_addr_list = reinterpret_cast(addrs.data()); + + DNS_Mapping mapping(addr, &he, 123); + CHECK(mapping.Valid()); + CHECK(mapping.ReqAddr() == addr); + CHECK(mapping.ReqHost() == nullptr); + CHECK(mapping.ReqStr() == "64:ff9b:1::"); + + auto lva = mapping.Addrs(); + REQUIRE(lva != nullptr); + CHECK(lva->Length() == 1); + auto lvae = lva->Idx(0)->AsAddrVal(); + REQUIRE(lvae != nullptr); + CHECK(lvae->Get().AsString() == "64:ff9b:1::"); + + delete[] he.h_name; + } + + } // namespace zeek::detail diff --git a/src/DNS_Mapping.h b/src/DNS_Mapping.h new file mode 100644 index 0000000000..32559701f0 --- /dev/null +++ b/src/DNS_Mapping.h @@ -0,0 +1,86 @@ +#pragma once + +#include +#include +#include +#include + +#include "zeek/IPAddr.h" +#include "zeek/Val.h" + +namespace zeek::detail + { + +class DNS_Mapping; +using DNS_MappingPtr = std::shared_ptr; + +class DNS_Mapping + { +public: + DNS_Mapping() = delete; + DNS_Mapping(std::string host, struct hostent* h, uint32_t ttl, int type); + DNS_Mapping(const IPAddr& addr, struct hostent* h, uint32_t ttl); + DNS_Mapping(FILE* f); + + bool NoMapping() const { return no_mapping; } + bool InitFailed() const { return init_failed; } + + ~DNS_Mapping() = default; + + // Returns nil if this was an address request. + // TODO: fix this an uses of this to just return the empty string + const char* ReqHost() const { return req_host.empty() ? nullptr : req_host.c_str(); } + const IPAddr& ReqAddr() const { return req_addr; } + std::string ReqStr() const { return req_host.empty() ? req_addr.AsString() : req_host; } + int ReqType() const { return req_type; } + + ListValPtr Addrs(); + TableValPtr AddrsSet(); // addresses returned as a set + StringValPtr Host(); + + double CreationTime() const { return creation_time; } + uint32_t TTL() const { return req_ttl; } + + void Save(FILE* f) const; + + bool Failed() const { return failed; } + bool Valid() const { return ! failed; } + + bool Expired() const + { + if ( ! req_host.empty() && addrs.empty() ) + return false; // nothing to expire + + return util::current_time() > (creation_time + req_ttl); + } + + void Merge(const DNS_MappingPtr& other); + + static void InitializeCache(FILE* f); + static bool ValidateCacheVersion(FILE* f); + +protected: + friend class DNS_Mgr; + + void Init(struct hostent* h); + void Clear(); + + std::string req_host; + IPAddr req_addr; + uint32_t req_ttl = 0; + int req_type = 0; + + // This class supports multiple names per address, but we only store one of them. + std::vector names; + StringValPtr host_val; + + std::vector addrs; + ListValPtr addrs_val; + + double creation_time = 0.0; + bool no_mapping = false; // when initializing from a file, immediately hit EOF + bool init_failed = false; + bool failed = false; + }; + + } // namespace zeek::detail diff --git a/src/DNS_Mgr.cc b/src/DNS_Mgr.cc index 8d776b7351..4d09a19945 100644 --- a/src/DNS_Mgr.cc +++ b/src/DNS_Mgr.cc @@ -4,30 +4,36 @@ #include "zeek/zeek-config.h" -#include -#include -#ifdef TIME_WITH_SYS_TIME -#include -#include -#else -#ifdef HAVE_SYS_TIME_H -#include -#else -#include -#endif -#endif - #include +#include #include +#include +#include +#include #include #include #include -#ifdef HAVE_MEMORY_H -#include -#endif -#include #include +#include +#ifdef TIME_WITH_SYS_TIME +#include +#include +#elif defined(HAVE_SYS_TIME_H) +#include +#else +#include +#endif + +#include +using ztd::out_ptr::out_ptr; + +#include +#include +#include + +#include "zeek/3rdparty/doctest.h" +#include "zeek/DNS_Mapping.h" #include "zeek/Event.h" #include "zeek/Expr.h" #include "zeek/Hash.h" @@ -40,356 +46,541 @@ #include "zeek/ZeekString.h" #include "zeek/iosource/Manager.h" -extern "C" +// Number of seconds we'll wait for a reply. +constexpr int DNS_TIMEOUT = 5; + +// The maximum allowed number of pending asynchronous requests. +constexpr int MAX_PENDING_REQUESTS = 20; + +// The maximum number of bytes requested via UDP. TCP fallback won't happen on +// requests until a response is larger than this. +constexpr int MAX_UDP_BUFFER_SIZE = 4096; + +// This unfortunately doesn't exist in c-ares, even though it seems rather useful. +static const char* request_type_string(int request_type) { - extern int select(int, fd_set*, fd_set*, fd_set*, struct timeval*); - -#include - -#include "zeek/3rdparty/nb_dns.h" + switch ( request_type ) + { + case T_A: + return "T_A"; + case T_NS: + return "T_NS"; + case T_MD: + return "T_MD"; + case T_MF: + return "T_MF"; + case T_CNAME: + return "T_CNAME"; + case T_SOA: + return "T_SOA"; + case T_MB: + return "T_MB"; + case T_MG: + return "T_MG"; + case T_MR: + return "T_MR"; + case T_NULL: + return "T_NULL"; + case T_WKS: + return "T_WKS"; + case T_PTR: + return "T_PTR"; + case T_HINFO: + return "T_HINFO"; + case T_MINFO: + return "T_MINFO"; + case T_MX: + return "T_MX"; + case T_TXT: + return "T_TXT"; + case T_RP: + return "T_RP"; + case T_AFSDB: + return "T_AFSDB"; + case T_X25: + return "T_X25"; + case T_ISDN: + return "T_ISDN"; + case T_RT: + return "T_RT"; + case T_NSAP: + return "T_NSAP"; + case T_NSAP_PTR: + return "T_NSAP_PTR"; + case T_SIG: + return "T_SIG"; + case T_KEY: + return "T_KEY"; + case T_PX: + return "T_PX"; + case T_GPOS: + return "T_GPOS"; + case T_AAAA: + return "T_AAAA"; + case T_LOC: + return "T_LOC"; + case T_NXT: + return "T_NXT"; + case T_EID: + return "T_EID"; + case T_NIMLOC: + return "T_NIMLOC"; + case T_SRV: + return "T_SRV"; + case T_ATMA: + return "T_ATMA"; + case T_NAPTR: + return "T_NAPTR"; + case T_KX: + return "T_KX"; + case T_CERT: + return "T_CERT"; + case T_A6: + return "T_A6"; + case T_DNAME: + return "T_DNAME"; + case T_SINK: + return "T_SINK"; + case T_OPT: + return "T_OPT"; + case T_APL: + return "T_APL"; + case T_DS: + return "T_DS"; + case T_SSHFP: + return "T_SSHFP"; + case T_RRSIG: + return "T_RRSIG"; + case T_NSEC: + return "T_NSEC"; + case T_DNSKEY: + return "T_DNSKEY"; + case T_TKEY: + return "T_TKEY"; + case T_TSIG: + return "T_TSIG"; + case T_IXFR: + return "T_IXFR"; + case T_AXFR: + return "T_AXFR"; + case T_MAILB: + return "T_MAILB"; + case T_MAILA: + return "T_MAILA"; + case T_ANY: + return "T_ANY"; + case T_URI: + return "T_URI"; + case T_CAA: + return "T_CAA"; + case T_MAX: + return "T_MAX"; + default: + return ""; + } } -using namespace std; +struct ares_deleter + { + void operator()(char* s) const { ares_free_string(s); } + void operator()(unsigned char* s) const { ares_free_string(s); } + void operator()(ares_addrinfo* s) const { ares_freeaddrinfo(s); } + void operator()(struct hostent* h) const { ares_free_hostent(h); } + void operator()(struct ares_txt_reply* h) const { ares_free_data(h); } + }; namespace zeek::detail { +static void addrinfo_cb(void* arg, int status, int timeouts, struct ares_addrinfo* result); +static void query_cb(void* arg, int status, int timeouts, unsigned char* buf, int len); +static void sock_cb(void* data, int s, int read, int write); -class DNS_Mgr_Request +struct CallbackArgs + { + DNS_Request* req; + DNS_Mgr* mgr; + }; + +class DNS_Request { public: - DNS_Mgr_Request(const char* h, int af, bool is_txt) - : host(util::copy_string(h)), fam(af), qtype(is_txt ? 16 : 0), addr(), request_pending() - { - } + DNS_Request(std::string host, int request_type, bool async = false); + DNS_Request(const IPAddr& addr, bool async = false); + ~DNS_Request(); - DNS_Mgr_Request(const IPAddr& a) : host(), fam(), qtype(), addr(a), request_pending() { } + std::string Host() const { return host; } + const IPAddr& Addr() const { return addr; } + int RequestType() const { return request_type; } + bool IsTxt() const { return request_type == 16; } - ~DNS_Mgr_Request() { delete[] host; } + void MakeRequest(ares_channel channel, DNS_Mgr* mgr); + void ProcessAsyncResult(bool timed_out, DNS_Mgr* mgr); - // Returns nil if this was an address request. - const char* ReqHost() const { return host; } - const IPAddr& ReqAddr() const { return addr; } - bool ReqIsTxt() const { return qtype == 16; } - - int MakeRequest(nb_dns_info* nb_dns); - int RequestPending() const { return request_pending; } - void RequestDone() { request_pending = 0; } - -protected: - char* host; // if non-nil, this is a host request - int fam; // address family query type for host requests - int qtype; // Query type +private: + std::string host; IPAddr addr; - int request_pending; + int request_type = 0; // Query type + bool async = false; + std::unique_ptr query; + static uint16_t request_id; }; -int DNS_Mgr_Request::MakeRequest(nb_dns_info* nb_dns) +uint16_t DNS_Request::request_id = 0; + +DNS_Request::DNS_Request(std::string host, int request_type, bool async) + : host(std::move(host)), request_type(request_type), async(async) { - if ( ! nb_dns ) - return 0; + // We combine the T_A and T_AAAA requests together in one request, so set the type + // to T_A to make things easier in other parts of the code (mostly around lookups). + if ( request_type == T_AAAA ) + request_type = T_A; + } - request_pending = 1; +DNS_Request::DNS_Request(const IPAddr& addr, bool async) : addr(addr), async(async) + { + request_type = T_PTR; + } - char err[NB_DNS_ERRSIZE]; - if ( host ) - return nb_dns_host_request2(nb_dns, host, fam, qtype, (void*)this, err) >= 0; +DNS_Request::~DNS_Request() { } + +void DNS_Request::MakeRequest(ares_channel channel, DNS_Mgr* mgr) + { + // This needs to get deleted at the end of the callback method. + auto req_data = new CallbackArgs{this, mgr}; + + // It's completely fine if this rolls over. It's just to keep the query ID different + // from one query to the next, and it's unlikely we'd do 2^16 queries so fast that + // all of them would be in flight at the same time. + DNS_Request::request_id++; + + if ( request_type == T_A ) + { + // For A/AAAA requests, we use a different method than the other requests. Since + // we're using the AF_UNSPEC family, we get both the ipv4 and ipv6 responses + // back in the same request if use ares_getaddrinfo() so we can store them both + // in the same mapping. + ares_addrinfo_hints hints = {ARES_AI_CANONNAME, AF_UNSPEC, 0, 0}; + ares_getaddrinfo(channel, host.c_str(), NULL, &hints, addrinfo_cb, req_data); + } else { - const uint32_t* bytes; - int len = addr.GetBytes(&bytes); - return nb_dns_addr_request2(nb_dns, (char*)bytes, len == 1 ? AF_INET : AF_INET6, - (void*)this, err) >= 0; + std::string query_host; + if ( request_type == T_PTR ) + query_host = addr.PtrName(); + else + query_host = host; + + std::unique_ptr query_str; + int len = 0; + int status = ares_create_query( + query_host.c_str(), C_IN, request_type, DNS_Request::request_id, 1, + out_ptr(query_str), &len, MAX_UDP_BUFFER_SIZE); + + if ( status != ARES_SUCCESS ) + return; + + // Store this so it can be destroyed when the request is destroyed. + this->query = std::move(query_str); + ares_send(channel, this->query.get(), len, query_cb, req_data); } } -class DNS_Mapping +void DNS_Request::ProcessAsyncResult(bool timed_out, DNS_Mgr* mgr) { -public: - DNS_Mapping(const char* host, struct hostent* h, uint32_t ttl); - DNS_Mapping(const IPAddr& addr, struct hostent* h, uint32_t ttl); - DNS_Mapping(FILE* f); - - bool NoMapping() const { return no_mapping; } - bool InitFailed() const { return init_failed; } - - ~DNS_Mapping(); - - // Returns nil if this was an address request. - const char* ReqHost() const { return req_host; } - IPAddr ReqAddr() const { return req_addr; } - string ReqStr() const { return req_host ? req_host : req_addr.AsString(); } - - ListValPtr Addrs(); - TableValPtr AddrsSet(); // addresses returned as a set - StringValPtr Host(); - - double CreationTime() const { return creation_time; } - - void Save(FILE* f) const; - - bool Failed() const { return failed; } - bool Valid() const { return ! failed; } - - bool Expired() const - { - if ( req_host && num_addrs == 0 ) - return false; // nothing to expire - - return util::current_time() > (creation_time + req_ttl); - } - - int Type() const { return map_type; } - -protected: - friend class DNS_Mgr; - - void Init(struct hostent* h); - void Clear(); - - char* req_host; - IPAddr req_addr; - uint32_t req_ttl; - - int num_names; - char** names; - StringValPtr host_val; - - int num_addrs; - IPAddr* addrs; - ListValPtr addrs_val; - - double creation_time; - int map_type; - bool no_mapping; // when initializing from a file, immediately hit EOF - bool init_failed; - bool failed; - }; - -void DNS_Mgr_mapping_delete_func(void* v) - { - delete (DNS_Mapping*)v; - } - -static TableValPtr empty_addr_set() - { - auto addr_t = base_type(TYPE_ADDR); - auto set_index = make_intrusive(addr_t); - set_index->Append(std::move(addr_t)); - auto s = make_intrusive(std::move(set_index), nullptr); - return make_intrusive(std::move(s)); - } - -DNS_Mapping::DNS_Mapping(const char* host, struct hostent* h, uint32_t ttl) - { - Init(h); - req_host = util::copy_string(host); - req_ttl = ttl; - - if ( names && ! names[0] ) - names[0] = util::copy_string(host); - } - -DNS_Mapping::DNS_Mapping(const IPAddr& addr, struct hostent* h, uint32_t ttl) - { - Init(h); - req_addr = addr; - req_host = nullptr; - req_ttl = ttl; - } - -DNS_Mapping::DNS_Mapping(FILE* f) - { - Clear(); - init_failed = true; - - req_host = nullptr; - req_ttl = 0; - creation_time = 0; - - char buf[512]; - - if ( ! fgets(buf, sizeof(buf), f) ) - { - no_mapping = true; - return; - } - - char req_buf[512 + 1], name_buf[512 + 1]; - int is_req_host; - int failed_local; - - if ( sscanf(buf, "%lf %d %512s %d %512s %d %d %" PRIu32, &creation_time, &is_req_host, req_buf, - &failed_local, name_buf, &map_type, &num_addrs, &req_ttl) != 8 ) + if ( ! async ) return; - failed = static_cast(failed_local); - - if ( is_req_host ) - req_host = util::copy_string(req_buf); + if ( request_type == T_A ) + mgr->CheckAsyncHostRequest(host, timed_out); + else if ( request_type == T_PTR ) + mgr->CheckAsyncAddrRequest(addr, timed_out); else - req_addr = IPAddr(req_buf); + mgr->CheckAsyncOtherRequest(host, timed_out, request_type); + } - num_names = 1; - names = new char*[num_names]; - names[0] = util::copy_string(name_buf); +/** + * Retrieves the TTL value from the first RR in the response. + * + * This code is adapted from an internal c-ares method called * ares__parse_into_addrinfo, + * which is used for ares_getaddrinfo callbacks. It's also the only method that properly + * parses out TTL data currently. This skips over the question and the first bit of the + * response to get to the first RR, and then returns the TTL from that RR. We only use the + * first RR because it's a good approximation for now, at least until the work in c-ares + * lands to add TTL support to the other RR-parsing methods. + * + * @param abuf The buffer containing the entire response from the server. + * @param alen The length of the buffer + * @param ttl An out param for returning the TTL value. + * @return A status code from c-ares. This will be ARES_SUCCESS on success, or some other + * code on failure. + */ +static int get_ttl(unsigned char* abuf, int alen, int* ttl) + { + int status; + long len; + std::unique_ptr hostname; - if ( num_addrs > 0 ) + *ttl = DNS_TIMEOUT; + + unsigned char* aptr = abuf + HFIXEDSZ; + status = ares_expand_name(aptr, abuf, alen, out_ptr(hostname), &len); + if ( status != ARES_SUCCESS ) + return status; + + if ( aptr + len + QFIXEDSZ > abuf + alen ) + return ARES_EBADRESP; + + aptr += len + QFIXEDSZ; + hostname.reset(); + + status = ares_expand_name(aptr, abuf, alen, out_ptr(hostname), &len); + if ( status != ARES_SUCCESS ) + return status; + + if ( aptr + RRFIXEDSZ > abuf + alen ) + return ARES_EBADRESP; + + aptr += len; + *ttl = DNS_RR_TTL(aptr); + + return status; + } + +/** + * Called in response to ares_getaddrinfo requests. Builds a hostent structure from + * the result data and sends it to the DNS manager via Addresult(). + */ +static void addrinfo_cb(void* arg, int status, int timeouts, struct ares_addrinfo* result) + { + auto arg_data = reinterpret_cast(arg); + const auto [req, mgr] = *arg_data; + std::unique_ptr res_ptr(result); + + if ( status != ARES_SUCCESS ) { - addrs = new IPAddr[num_addrs]; - - for ( int i = 0; i < num_addrs; ++i ) + // These two statuses should only ever be sent if we're shutting down everything + // and all of the existing queries are being cancelled. There's no reason to + // store a status that's just going to get deleted, nor is there a reason to log + // anything. + if ( status != ARES_ECANCELLED && status != ARES_EDESTRUCTION ) { - if ( ! fgets(buf, sizeof(buf), f) ) - { - num_addrs = i; - return; - } - - char* newline = strchr(buf, '\n'); - if ( newline ) - *newline = '\0'; - - addrs[i] = IPAddr(buf); + // Insert something into the cache so that the request loop will end correctly. + // We use the DNS_TIMEOUT value as the TTL here since it's small enough that the + // failed response will expire soon, and because we don't have the TTL from the + // response data. + mgr->AddResult(req, nullptr, DNS_TIMEOUT); } } else - addrs = nullptr; - - init_failed = false; - } - -DNS_Mapping::~DNS_Mapping() - { - delete[] req_host; - - if ( names ) { - for ( int i = 0; i < num_names; ++i ) - delete[] names[i]; - delete[] names; + std::vector addrs; + std::vector addrs6; + for ( ares_addrinfo_node* entry = result->nodes; entry != NULL; entry = entry->ai_next ) + { + if ( entry->ai_family == AF_INET ) + { + struct sockaddr_in* addr = reinterpret_cast(entry->ai_addr); + addrs.push_back(&addr->sin_addr); + } + else if ( entry->ai_family == AF_INET6 ) + { + struct sockaddr_in6* addr = (struct sockaddr_in6*)(entry->ai_addr); + addrs6.push_back(&addr->sin6_addr); + } + } + + if ( ! addrs.empty() ) + { + // Push a null on the end so the addr list has a final point during later parsing. + addrs.push_back(NULL); + + struct hostent he + { + }; + he.h_name = util::copy_string(result->name); + he.h_addrtype = AF_INET; + he.h_length = sizeof(in_addr); + he.h_addr_list = reinterpret_cast(addrs.data()); + + mgr->AddResult(req, &he, result->nodes[0].ai_ttl); + + delete[] he.h_name; + } + + if ( ! addrs6.empty() ) + { + // Push a null on the end so the addr list has a final point during later parsing. + addrs6.push_back(NULL); + + struct hostent he + { + }; + he.h_name = util::copy_string(result->name); + he.h_addrtype = AF_INET6; + he.h_length = sizeof(in6_addr); + he.h_addr_list = reinterpret_cast(addrs6.data()); + + mgr->AddResult(req, &he, result->nodes[0].ai_ttl, true); + + delete[] he.h_name; + } } - delete[] addrs; + req->ProcessAsyncResult(timeouts > 0, mgr); + + // TODO: might need to turn these into unique_ptr as well? + delete req; + delete arg_data; } -ListValPtr DNS_Mapping::Addrs() +static void query_cb(void* arg, int status, int timeouts, unsigned char* buf, int len) { - if ( failed ) - return nullptr; + auto arg_data = reinterpret_cast(arg); + const auto [req, mgr] = *arg_data; - if ( ! addrs_val ) + if ( status != ARES_SUCCESS ) { - addrs_val = make_intrusive(TYPE_ADDR); - - for ( int i = 0; i < num_addrs; ++i ) - addrs_val->Append(make_intrusive(addrs[i])); - } - - return addrs_val; - } - -TableValPtr DNS_Mapping::AddrsSet() - { - auto l = Addrs(); - - if ( ! l ) - return empty_addr_set(); - - return l->ToSetVal(); - } - -StringValPtr DNS_Mapping::Host() - { - if ( failed || num_names == 0 || ! names[0] ) - return nullptr; - - if ( ! host_val ) - host_val = make_intrusive(names[0]); - - return host_val; - } - -void DNS_Mapping::Init(struct hostent* h) - { - no_mapping = false; - init_failed = false; - creation_time = util::current_time(); - host_val = nullptr; - addrs_val = nullptr; - - if ( ! h ) - { - Clear(); - return; - } - - map_type = h->h_addrtype; - num_names = 1; // for now, just use official name - names = new char*[num_names]; - names[0] = h->h_name ? util::copy_string(h->h_name) : nullptr; - - for ( num_addrs = 0; h->h_addr_list[num_addrs]; ++num_addrs ) - ; - - if ( num_addrs > 0 ) - { - addrs = new IPAddr[num_addrs]; - for ( int i = 0; i < num_addrs; ++i ) - if ( h->h_addrtype == AF_INET ) - addrs[i] = IPAddr(IPv4, (uint32_t*)h->h_addr_list[i], IPAddr::Network); - else if ( h->h_addrtype == AF_INET6 ) - addrs[i] = IPAddr(IPv6, (uint32_t*)h->h_addr_list[i], IPAddr::Network); + // These two statuses should only ever be sent if we're shutting down everything + // and all of the existing queries are being cancelled. There's no reason to + // store a status that's just going to get deleted, nor is there a reason to log + // anything. + if ( status != ARES_ECANCELLED && status != ARES_EDESTRUCTION ) + { + // Insert something into the cache so that the request loop will end correctly. + // We use the DNS_TIMEOUT value as the TTL here since it's small enough that the + // failed response will expire soon, and because we don't have the TTL from the + // response data. + mgr->AddResult(req, nullptr, DNS_TIMEOUT); + } } else - addrs = nullptr; + { + // We don't really care that we couldn't properly parse the TTL here, since the + // later parsing will fail with better error messages. In that case, it's ok + // that we throw away the status value. + int ttl; + get_ttl(buf, len, &ttl); - failed = false; + switch ( req->RequestType() ) + { + case T_PTR: + { + std::unique_ptr he; + if ( req->Addr().GetFamily() == IPv4 ) + { + struct in_addr addr; + req->Addr().CopyIPv4(&addr); + status = ares_parse_ptr_reply(buf, len, &addr, sizeof(addr), AF_INET, + out_ptr(he)); + } + else + { + struct in6_addr addr; + req->Addr().CopyIPv6(&addr); + status = ares_parse_ptr_reply(buf, len, &addr, sizeof(addr), AF_INET6, + out_ptr(he)); + } + + if ( status == ARES_SUCCESS ) + mgr->AddResult(req, he.get(), ttl); + else + { + // See above for why DNS_TIMEOUT here. + mgr->AddResult(req, nullptr, DNS_TIMEOUT); + } + break; + } + case T_TXT: + { + std::unique_ptr reply; + int r = ares_parse_txt_reply(buf, len, out_ptr(reply)); + if ( r == ARES_SUCCESS ) + { + // Use a hostent to send the data into AddResult(). We only care about + // setting the host field, but everything else should be zero just for + // safety. + + // We don't currently handle more than the first response, and throw the + // rest away. There really isn't a good reason for this, we just haven't + // ever done so. It would likely require some changes to the output from + // Lookup(), since right now it only returns one value. + struct hostent he + { + }; + he.h_name = util::copy_string(reinterpret_cast(reply->txt)); + mgr->AddResult(req, &he, ttl); + + delete[] he.h_name; + } + else + { + // See above for why DNS_TIMEOUT here. + mgr->AddResult(req, nullptr, DNS_TIMEOUT); + } + + break; + } + + default: + reporter->Error("Requests of type %d (%s) are unsupported", req->RequestType(), + request_type_string(req->RequestType())); + break; + } + } + + req->ProcessAsyncResult(timeouts > 0, mgr); + delete arg_data; + delete req; } -void DNS_Mapping::Clear() +/** + * Called when the c-ares socket changes state, whcih indicates that it's connected to + * some source of data (either a host file or a DNS server). This indicates that we're + * able to do lookups against c-ares now and should activate the IOSource. + */ +static void sock_cb(void* data, int s, int read, int write) { - num_names = num_addrs = 0; - names = nullptr; - addrs = nullptr; - host_val = nullptr; - addrs_val = nullptr; - no_mapping = false; - map_type = 0; - failed = true; + auto mgr = reinterpret_cast(data); + mgr->RegisterSocket(s, read == 1, write == 1); } -void DNS_Mapping::Save(FILE* f) const +DNS_Mgr::DNS_Mgr(DNS_MgrMode arg_mode) : IOSource(true), mode(arg_mode) { - fprintf(f, "%.0f %d %s %d %s %d %d %" PRIu32 "\n", creation_time, req_host != nullptr, - req_host ? req_host : req_addr.AsString().c_str(), failed, - (names && names[0]) ? names[0] : "*", map_type, num_addrs, req_ttl); - - for ( int i = 0; i < num_addrs; ++i ) - fprintf(f, "%s\n", addrs[i].AsString().c_str()); - } - -DNS_Mgr::DNS_Mgr(DNS_MgrMode arg_mode) - { - did_init = false; - - mode = arg_mode; - - cache_name = dir = nullptr; - - asyncs_pending = 0; - num_requests = 0; - successful = 0; - failed = 0; - nb_dns = nullptr; + ares_library_init(ARES_LIB_INIT_ALL); } DNS_Mgr::~DNS_Mgr() { - if ( nb_dns ) - nb_dns_finish(nb_dns); + Flush(); - delete[] cache_name; - delete[] dir; + ares_cancel(channel); + ares_destroy(channel); + ares_library_cleanup(); + } + +void DNS_Mgr::RegisterSocket(int fd, bool read, bool write) + { + if ( read && socket_fds.count(fd) == 0 ) + { + socket_fds.insert(fd); + iosource_mgr->RegisterFd(fd, this, IOSource::READ); + } + else if ( ! read && socket_fds.count(fd) != 0 ) + { + socket_fds.erase(fd); + iosource_mgr->UnregisterFd(fd, this, IOSource::READ); + } + + if ( write && write_socket_fds.count(fd) == 0 ) + { + write_socket_fds.insert(fd); + iosource_mgr->RegisterFd(fd, this, IOSource::WRITE); + } + else if ( ! write && write_socket_fds.count(fd) != 0 ) + { + write_socket_fds.erase(fd); + iosource_mgr->UnregisterFd(fd, this, IOSource::WRITE); + } } void DNS_Mgr::InitSource() @@ -397,45 +588,70 @@ void DNS_Mgr::InitSource() if ( did_init ) return; + ares_options options; + int optmask = 0; + + // Enable an EDNS option to be sent with the requests. This allows us to set + // a bigger UDP buffer size in the request, which prevents fallback to TCP + // at least up to that size. + options.flags = ARES_FLAG_EDNS; + optmask |= ARES_OPT_FLAGS; + + options.ednspsz = MAX_UDP_BUFFER_SIZE; + optmask |= ARES_OPT_EDNSPSZ; + + options.socket_receive_buffer_size = MAX_UDP_BUFFER_SIZE; + optmask |= ARES_OPT_SOCK_RCVBUF; + + // This option is in milliseconds. + options.timeout = DNS_TIMEOUT * 1000; + optmask |= ARES_OPT_TIMEOUTMS; + + // This causes c-ares to only attempt each server twice before + // giving up. + options.tries = 2; + optmask |= ARES_OPT_TRIES; + + // See the comment on sock_cb for how this gets used. + options.sock_state_cb = sock_cb; + options.sock_state_cb_data = this; + optmask |= ARES_OPT_SOCK_STATE_CB; + + int status = ares_init_options(&channel, &options, optmask); + if ( status != ARES_SUCCESS ) + reporter->FatalError("Failed to initialize c-ares for DNS resolution: %s", + ares_strerror(status)); + // Note that Init() may be called by way of LookupHost() during the act of // parsing a hostname literal (e.g. google.com), so we can't use a // script-layer option to configure the DNS resolver as it may not be // configured to the user's desired address at the time when we need to to // the lookup. auto dns_resolver = getenv("ZEEK_DNS_RESOLVER"); - auto dns_resolver_addr = dns_resolver ? IPAddr(dns_resolver) : IPAddr(); - char err[NB_DNS_ERRSIZE]; - - if ( dns_resolver_addr == IPAddr() ) - nb_dns = nb_dns_init(err); - else + if ( dns_resolver ) { + ares_addr_node servers; + servers.next = NULL; + + auto dns_resolver_addr = IPAddr(dns_resolver); struct sockaddr_storage ss = {0}; if ( dns_resolver_addr.GetFamily() == IPv4 ) { - struct sockaddr_in* sa = (struct sockaddr_in*)&ss; - sa->sin_family = AF_INET; - dns_resolver_addr.CopyIPv4(&sa->sin_addr); + servers.family = AF_INET; + dns_resolver_addr.CopyIPv4(&(servers.addr.addr4)); } else { struct sockaddr_in6* sa = (struct sockaddr_in6*)&ss; sa->sin6_family = AF_INET6; dns_resolver_addr.CopyIPv6(&sa->sin6_addr); + + servers.family = AF_INET6; + memcpy(&(servers.addr.addr6), &sa->sin6_addr, sizeof(ares_in6_addr)); } - nb_dns = nb_dns_init2(err, (struct sockaddr*)&ss); - } - - if ( nb_dns ) - { - if ( ! iosource_mgr->RegisterFd(nb_dns_fd(nb_dns), this) ) - reporter->FatalError("Failed to register nb_dns file descriptor with iosource_mgr"); - } - else - { - reporter->Warning("problem initializing NB-DNS: %s", err); + ares_set_servers(channel, &servers); } did_init = true; @@ -443,91 +659,145 @@ void DNS_Mgr::InitSource() void DNS_Mgr::InitPostScript() { - dm_rec = id::find_type("dns_mapping"); + if ( ! doctest::is_running_in_test ) + { + dm_rec = id::find_type("dns_mapping"); - // Registering will call Init() - iosource_mgr->Register(this, true); + // Registering will call InitSource(), which sets up all of the DNS library stuff + iosource_mgr->Register(this, true); + } + else + { + // This would normally be called when registering the iosource above. + InitSource(); + } - const char* cache_dir = dir ? dir : "."; - cache_name = new char[strlen(cache_dir) + 64]; - sprintf(cache_name, "%s/%s", cache_dir, ".zeek-dns-cache"); - LoadCache(fopen(cache_name, "r")); + // Load the DNS cache from disk, if it exists. + std::string cache_dir = dir.empty() ? "." : dir; + cache_name = util::fmt("%s/%s", cache_dir.c_str(), ".zeek-dns-cache"); + LoadCache(cache_name); } -static TableValPtr fake_name_lookup_result(const char* name) +static TableValPtr fake_name_lookup_result(const std::string& name) { hash128_t hash; - KeyedHash::StaticHash128(name, strlen(name), &hash); + KeyedHash::StaticHash128(name.c_str(), name.size(), &hash); auto hv = make_intrusive(TYPE_ADDR); hv->Append(make_intrusive(reinterpret_cast(&hash))); return hv->ToSetVal(); } -static const char* fake_text_lookup_result(const char* name) +static std::string fake_lookup_result(const std::string& name, int request_type) { - static char tmp[32 + 256]; - snprintf(tmp, sizeof(tmp), "fake_text_lookup_result_%s", name); - return tmp; + return util::fmt("fake_lookup_result_%s_%s", request_type_string(request_type), name.c_str()); } -static const char* fake_addr_lookup_result(const IPAddr& addr) +static std::string fake_addr_lookup_result(const IPAddr& addr) { - static char tmp[128]; - snprintf(tmp, sizeof(tmp), "fake_addr_lookup_result_%s", addr.AsString().c_str()); - return tmp; + return util::fmt("fake_addr_lookup_result_%s", addr.AsString().c_str()); } -TableValPtr DNS_Mgr::LookupHost(const char* name) +static void resolve_lookup_cb(DNS_Mgr::LookupCallback* callback, TableValPtr result) + { + callback->Resolved(std::move(result)); + delete callback; + } + +static void resolve_lookup_cb(DNS_Mgr::LookupCallback* callback, const std::string& result) + { + callback->Resolved(result); + delete callback; + } + +ValPtr DNS_Mgr::Lookup(const std::string& name, int request_type) + { + if ( request_type == T_A || request_type == T_AAAA ) + return LookupHost(name); + + if ( mode == DNS_FAKE ) + return make_intrusive(fake_lookup_result(name, request_type)); + + InitSource(); + + if ( mode != DNS_PRIME ) + { + if ( auto val = LookupOtherInCache(name, request_type, false) ) + return val; + } + + switch ( mode ) + { + case DNS_PRIME: + { + auto req = new DNS_Request(name, request_type); + req->MakeRequest(channel, this); + return empty_addr_set(); + } + + case DNS_FORCE: + reporter->FatalError("can't find DNS entry for %s (req type %d / %s) in cache", + name.c_str(), request_type, request_type_string(request_type)); + return nullptr; + + case DNS_DEFAULT: + { + auto req = new DNS_Request(name, request_type); + req->MakeRequest(channel, this); + Resolve(); + + // Call LookupHost() a second time to get the newly stored value out of the cache. + return Lookup(name, request_type); + } + + default: + reporter->InternalError("bad mode %d in DNS_Mgr::Lookup", mode); + return nullptr; + } + + return nullptr; + } + +TableValPtr DNS_Mgr::LookupHost(const std::string& name) { if ( mode == DNS_FAKE ) return fake_name_lookup_result(name); InitSource(); - if ( ! nb_dns ) - return empty_addr_set(); - + // Check the cache before attempting to look up the name remotely. if ( mode != DNS_PRIME ) { - HostMap::iterator it = host_mappings.find(name); - - if ( it != host_mappings.end() ) - { - DNS_Mapping* d4 = it->second.first; - DNS_Mapping* d6 = it->second.second; - - if ( (d4 && d4->Failed()) || (d6 && d6->Failed()) ) - { - reporter->Warning("no such host: %s", name); - return empty_addr_set(); - } - else if ( d4 && d6 ) - { - auto tv4 = d4->AddrsSet(); - auto tv6 = d6->AddrsSet(); - tv4->AddTo(tv6.get(), false); - return tv6; - } - } + if ( auto val = LookupNameInCache(name, false, true) ) + return val; } // Not found, or priming. switch ( mode ) { case DNS_PRIME: - requests.push_back(new DNS_Mgr_Request(name, AF_INET, false)); - requests.push_back(new DNS_Mgr_Request(name, AF_INET6, false)); + { + // We pass T_A here, but DNSRequest::MakeRequest() will special-case that in + // a request that gets both T_A and T_AAAA results at one time. + auto req = new DNS_Request(name, T_A); + req->MakeRequest(channel, this); return empty_addr_set(); + } case DNS_FORCE: - reporter->FatalError("can't find DNS entry for %s in cache", name); + reporter->FatalError("can't find DNS entry for %s in cache", name.c_str()); return nullptr; case DNS_DEFAULT: - requests.push_back(new DNS_Mgr_Request(name, AF_INET, false)); - requests.push_back(new DNS_Mgr_Request(name, AF_INET6, false)); + { + // We pass T_A here, but DNSRequest::MakeRequest() will special-case that in + // a request that gets both T_A and T_AAAA results at one time. + auto req = new DNS_Request(name, T_A); + req->MakeRequest(channel, this); Resolve(); + + // Call LookupHost() a second time to get the newly stored value out of the cache. return LookupHost(name); + } default: reporter->InternalError("bad mode in DNS_Mgr::LookupHost"); @@ -535,43 +805,43 @@ TableValPtr DNS_Mgr::LookupHost(const char* name) } } -ValPtr DNS_Mgr::LookupAddr(const IPAddr& addr) +StringValPtr DNS_Mgr::LookupAddr(const IPAddr& addr) { + if ( mode == DNS_FAKE ) + return make_intrusive(fake_addr_lookup_result(addr)); + InitSource(); + // Check the cache before attempting to look up the name remotely. if ( mode != DNS_PRIME ) { - AddrMap::iterator it = addr_mappings.find(addr); - - if ( it != addr_mappings.end() ) - { - DNS_Mapping* d = it->second; - if ( d->Valid() ) - return d->Host(); - else - { - string s(addr); - reporter->Warning("can't resolve IP address: %s", s.c_str()); - return make_intrusive(s.c_str()); - } - } + if ( auto val = LookupAddrInCache(addr, false, true) ) + return val; } // Not found, or priming. switch ( mode ) { case DNS_PRIME: - requests.push_back(new DNS_Mgr_Request(addr)); + { + auto req = new DNS_Request(addr); + req->MakeRequest(channel, this); return make_intrusive(""); + } case DNS_FORCE: reporter->FatalError("can't find DNS entry for %s in cache", addr.AsString().c_str()); return nullptr; case DNS_DEFAULT: - requests.push_back(new DNS_Mgr_Request(addr)); + { + auto req = new DNS_Request(addr); + req->MakeRequest(channel, this); Resolve(); + + // Call LookupAddr() a second time to get the newly stored value out of the cache. return LookupAddr(addr); + } default: reporter->InternalError("bad mode in DNS_Mgr::LookupAddr"); @@ -579,130 +849,168 @@ ValPtr DNS_Mgr::LookupAddr(const IPAddr& addr) } } -void DNS_Mgr::Verify() { } +void DNS_Mgr::LookupHost(const std::string& name, LookupCallback* callback) + { + if ( mode == DNS_FAKE ) + { + resolve_lookup_cb(callback, fake_name_lookup_result(name)); + return; + } -#define MAX_PENDING_REQUESTS 20 + // Do we already know the answer? + if ( auto addrs = LookupNameInCache(name, true, false) ) + { + resolve_lookup_cb(callback, std::move(addrs)); + return; + } + + AsyncRequest* req = nullptr; + + // If we already have a request waiting for this host, we don't need to make + // another one. We can just add the callback to it and it'll get handled + // when the first request comes back. + auto key = std::make_pair(T_A, name); + auto i = asyncs.find(key); + if ( i != asyncs.end() ) + req = i->second; + else + { + // A new one. + req = new AsyncRequest{name, T_A}; + asyncs_queued.push_back(req); + asyncs.emplace_hint(i, std::move(key), req); + } + + req->callbacks.push_back(callback); + + // There may be requests in the queue that haven't been processed yet + // so go ahead and reissue them, even if this method didn't change + // anything. + IssueAsyncRequests(); + } + +void DNS_Mgr::LookupAddr(const IPAddr& addr, LookupCallback* callback) + { + if ( mode == DNS_FAKE ) + { + resolve_lookup_cb(callback, fake_addr_lookup_result(addr)); + return; + } + + // Do we already know the answer? + if ( auto name = LookupAddrInCache(addr, true, false) ) + { + resolve_lookup_cb(callback, name->CheckString()); + return; + } + + AsyncRequest* req = nullptr; + + // If we already have a request waiting for this host, we don't need to make + // another one. We can just add the callback to it and it'll get handled + // when the first request comes back. + auto i = asyncs.find(addr); + if ( i != asyncs.end() ) + req = i->second; + else + { + // A new one. + req = new AsyncRequest{addr}; + asyncs_queued.push_back(req); + asyncs.emplace_hint(i, addr, req); + } + + req->callbacks.push_back(callback); + + // There may be requests in the queue that haven't been processed yet + // so go ahead and reissue them, even if this method didn't change + // anything. + IssueAsyncRequests(); + } + +void DNS_Mgr::Lookup(const std::string& name, int request_type, LookupCallback* callback) + { + if ( mode == DNS_FAKE ) + { + resolve_lookup_cb(callback, fake_lookup_result(name, request_type)); + return; + } + + // Do we already know the answer? + if ( auto txt = LookupOtherInCache(name, request_type, true) ) + { + resolve_lookup_cb(callback, txt->CheckString()); + return; + } + + AsyncRequest* req = nullptr; + + // If we already have a request waiting for this host, we don't need to make + // another one. We can just add the callback to it and it'll get handled + // when the first request comes back. + auto key = std::make_pair(request_type, name); + auto i = asyncs.find(key); + if ( i != asyncs.end() ) + req = i->second; + else + { + // A new one. + req = new AsyncRequest{name, request_type}; + asyncs_queued.push_back(req); + asyncs.emplace_hint(i, std::move(key), req); + } + + req->callbacks.push_back(callback); + + IssueAsyncRequests(); + } void DNS_Mgr::Resolve() { - if ( ! nb_dns ) - return; + int nfds = 0; + struct timeval *tvp, tv; + fd_set read_fds, write_fds; - int i; + tv.tv_sec = DNS_TIMEOUT; + tv.tv_usec = 0; - int first_req = 0; - int num_pending = min(requests.length(), MAX_PENDING_REQUESTS); - int last_req = num_pending - 1; - - // Prime with the initial requests. - for ( i = first_req; i <= last_req; ++i ) - requests[i]->MakeRequest(nb_dns); - - // Start resolving. Each time an answer comes in, we can issue a - // new request, if we have more. - while ( num_pending > 0 ) + for ( int i = 0; i < MAX_PENDING_REQUESTS; i++ ) { - int status = AnswerAvailable(DNS_TIMEOUT); + FD_ZERO(&read_fds); + FD_ZERO(&write_fds); + nfds = ares_fds(channel, &read_fds, &write_fds); + if ( nfds == 0 ) + break; - if ( status <= 0 ) - { - // Error or timeout. Process all pending requests as - // unanswered and reprime. - for ( i = first_req; i <= last_req; ++i ) - { - DNS_Mgr_Request* dr = requests[i]; - if ( dr->RequestPending() ) - { - AddResult(dr, nullptr); - dr->RequestDone(); - } - } - - first_req = last_req + 1; - num_pending = min(requests.length() - first_req, MAX_PENDING_REQUESTS); - last_req = first_req + num_pending - 1; - - for ( i = first_req; i <= last_req; ++i ) - requests[i]->MakeRequest(nb_dns); - - continue; - } - - char err[NB_DNS_ERRSIZE]; - struct nb_dns_result r; - status = nb_dns_activity(nb_dns, &r, err); - if ( status < 0 ) - reporter->Warning("NB-DNS error in DNS_Mgr::WaitForReplies (%s)", err); - else if ( status > 0 ) - { - DNS_Mgr_Request* dr = (DNS_Mgr_Request*)r.cookie; - if ( dr->RequestPending() ) - { - AddResult(dr, &r); - dr->RequestDone(); - } - - // Room for another, if we have it. - if ( last_req < requests.length() - 1 ) - { - ++last_req; - requests[last_req]->MakeRequest(nb_dns); - } - else - --num_pending; - } + tvp = ares_timeout(channel, &tv, &tv); + select(nfds, &read_fds, &write_fds, NULL, tvp); + ares_process(channel, &read_fds, &write_fds); } - - // All done with the list of requests. - for ( i = requests.length() - 1; i >= 0; --i ) - delete requests.remove_nth(i); } -bool DNS_Mgr::Save() +void DNS_Mgr::Event(EventHandlerPtr e, const DNS_MappingPtr& dm) { - if ( ! cache_name ) - return false; - - FILE* f = fopen(cache_name, "w"); - - if ( ! f ) - return false; - - Save(f, host_mappings); - Save(f, addr_mappings); - // Save(f, text_mappings); // We don't save the TXT mappings (yet?). - - fclose(f); - - return true; + if ( e ) + event_mgr.Enqueue(e, BuildMappingVal(dm)); } -void DNS_Mgr::Event(EventHandlerPtr e, DNS_Mapping* dm) +void DNS_Mgr::Event(EventHandlerPtr e, const DNS_MappingPtr& dm, ListValPtr l1, ListValPtr l2) { - if ( ! e ) - return; - - event_mgr.Enqueue(e, BuildMappingVal(dm)); + if ( e ) + event_mgr.Enqueue(e, BuildMappingVal(dm), l1->ToSetVal(), l2->ToSetVal()); } -void DNS_Mgr::Event(EventHandlerPtr e, DNS_Mapping* dm, ListValPtr l1, ListValPtr l2) +void DNS_Mgr::Event(EventHandlerPtr e, const DNS_MappingPtr& old_dm, DNS_MappingPtr new_dm) { - if ( ! e ) - return; - - event_mgr.Enqueue(e, BuildMappingVal(dm), l1->ToSetVal(), l2->ToSetVal()); + if ( e ) + event_mgr.Enqueue(e, BuildMappingVal(old_dm), BuildMappingVal(new_dm)); } -void DNS_Mgr::Event(EventHandlerPtr e, DNS_Mapping* old_dm, DNS_Mapping* new_dm) +ValPtr DNS_Mgr::BuildMappingVal(const DNS_MappingPtr& dm) { - if ( ! e ) - return; + if ( ! dm_rec ) + return nullptr; - event_mgr.Enqueue(e, BuildMappingVal(old_dm), BuildMappingVal(new_dm)); - } - -ValPtr DNS_Mgr::BuildMappingVal(DNS_Mapping* dm) - { auto r = make_intrusive(dm_rec); r->AssignTime(0, dm->CreationTime()); @@ -717,130 +1025,101 @@ ValPtr DNS_Mgr::BuildMappingVal(DNS_Mapping* dm) return r; } -void DNS_Mgr::AddResult(DNS_Mgr_Request* dr, struct nb_dns_result* r) +void DNS_Mgr::AddResult(DNS_Request* dr, struct hostent* h, uint32_t ttl, bool merge) { - struct hostent* h = (r && r->host_errno == 0) ? r->hostent : nullptr; - u_int32_t ttl = (r && r->host_errno == 0) ? r->ttl : 0; + // TODO: the existing code doesn't handle hostname aliases at all. Should we? - DNS_Mapping* new_dm; - DNS_Mapping* prev_dm; - int keep_prev = 0; + DNS_MappingPtr new_mapping = nullptr; + DNS_MappingPtr prev_mapping = nullptr; + bool keep_prev = true; - if ( dr->ReqHost() ) + MappingMap::iterator it; + if ( dr->RequestType() == T_PTR ) { - new_dm = new DNS_Mapping(dr->ReqHost(), h, ttl); - prev_dm = nullptr; - - if ( dr->ReqIsTxt() ) + new_mapping = std::make_shared(dr->Addr(), h, ttl); + it = all_mappings.find(dr->Addr()); + if ( it == all_mappings.end() ) { - TextMap::iterator it = text_mappings.find(dr->ReqHost()); - - if ( it == text_mappings.end() ) - text_mappings[dr->ReqHost()] = new_dm; - else - { - prev_dm = it->second; - it->second = new_dm; - } - - if ( new_dm->Failed() && prev_dm && prev_dm->Valid() ) - { - text_mappings[dr->ReqHost()] = prev_dm; - ++keep_prev; - } + auto result = all_mappings.emplace(dr->Addr(), new_mapping); + it = result.first; } else + prev_mapping = it->second; + } + else + { + new_mapping = std::make_shared(dr->Host(), h, ttl, dr->RequestType()); + auto key = std::make_pair(dr->RequestType(), dr->Host()); + + it = all_mappings.find(key); + if ( it == all_mappings.end() ) { - HostMap::iterator it = host_mappings.find(dr->ReqHost()); - if ( it == host_mappings.end() ) - { - host_mappings[dr->ReqHost()].first = new_dm->Type() == AF_INET ? new_dm : nullptr; + auto result = all_mappings.emplace(std::move(key), new_mapping); + it = result.first; + } + else + prev_mapping = it->second; + } - host_mappings[dr->ReqHost()].second = new_dm->Type() == AF_INET ? nullptr : new_dm; - } - else - { - if ( new_dm->Type() == AF_INET ) - { - prev_dm = it->second.first; - it->second.first = new_dm; - } - else - { - prev_dm = it->second.second; - it->second.second = new_dm; - } - } + if ( prev_mapping && prev_mapping->Valid() ) + { + if ( new_mapping->Valid() ) + { + if ( merge ) + new_mapping->Merge(prev_mapping); - if ( new_dm->Failed() && prev_dm && prev_dm->Valid() ) - { - // Put previous, valid entry back - CompareMappings - // will generate a corresponding warning. - if ( prev_dm->Type() == AF_INET ) - host_mappings[dr->ReqHost()].first = prev_dm; - else - host_mappings[dr->ReqHost()].second = prev_dm; - - ++keep_prev; - } + it->second = new_mapping; + keep_prev = false; } } else { - new_dm = new DNS_Mapping(dr->ReqAddr(), h, ttl); - AddrMap::iterator it = addr_mappings.find(dr->ReqAddr()); - prev_dm = (it == addr_mappings.end()) ? 0 : it->second; - addr_mappings[dr->ReqAddr()] = new_dm; - - if ( new_dm->Failed() && prev_dm && prev_dm->Valid() ) - { - addr_mappings[dr->ReqAddr()] = prev_dm; - ++keep_prev; - } + it->second = new_mapping; + keep_prev = false; } - if ( prev_dm && ! dr->ReqIsTxt() ) - CompareMappings(prev_dm, new_dm); + if ( prev_mapping && ! dr->IsTxt() ) + CompareMappings(prev_mapping, new_mapping); if ( keep_prev ) - delete new_dm; + new_mapping.reset(); else - delete prev_dm; + prev_mapping.reset(); } -void DNS_Mgr::CompareMappings(DNS_Mapping* prev_dm, DNS_Mapping* new_dm) +void DNS_Mgr::CompareMappings(const DNS_MappingPtr& prev_mapping, const DNS_MappingPtr& new_mapping) { - if ( prev_dm->Failed() ) + if ( prev_mapping->Failed() ) { - if ( new_dm->Failed() ) + if ( new_mapping->Failed() ) // Nothing changed. return; - Event(dns_mapping_valid, new_dm); + Event(dns_mapping_valid, new_mapping); return; } - else if ( new_dm->Failed() ) + else if ( new_mapping->Failed() ) { - Event(dns_mapping_unverified, prev_dm); + Event(dns_mapping_unverified, prev_mapping); return; } - auto prev_s = prev_dm->Host(); - auto new_s = new_dm->Host(); + auto prev_s = prev_mapping->Host(); + auto new_s = new_mapping->Host(); if ( prev_s || new_s ) { if ( ! prev_s ) - Event(dns_mapping_new_name, new_dm); + Event(dns_mapping_new_name, new_mapping); else if ( ! new_s ) - Event(dns_mapping_lost_name, prev_dm); + Event(dns_mapping_lost_name, prev_mapping); else if ( ! Bstr_eq(new_s->AsString(), prev_s->AsString()) ) - Event(dns_mapping_name_changed, prev_dm, new_dm); + Event(dns_mapping_name_changed, prev_mapping, new_mapping); } - auto prev_a = prev_dm->Addrs(); - auto new_a = new_dm->Addrs(); + auto prev_a = prev_mapping->Addrs(); + auto new_a = new_mapping->Addrs(); if ( ! prev_a || ! new_a ) { @@ -848,14 +1127,14 @@ void DNS_Mgr::CompareMappings(DNS_Mapping* prev_dm, DNS_Mapping* new_dm) return; } - auto prev_delta = AddrListDelta(prev_a.get(), new_a.get()); - auto new_delta = AddrListDelta(new_a.get(), prev_a.get()); + auto prev_delta = AddrListDelta(prev_a, new_a); + auto new_delta = AddrListDelta(new_a, prev_a); if ( prev_delta->Length() > 0 || new_delta->Length() > 0 ) - Event(dns_mapping_altered, new_dm, std::move(prev_delta), std::move(new_delta)); + Event(dns_mapping_altered, new_mapping, std::move(prev_delta), std::move(new_delta)); } -ListValPtr DNS_Mgr::AddrListDelta(ListVal* al1, ListVal* al2) +ListValPtr DNS_Mgr::AddrListDelta(ListValPtr al1, ListValPtr al2) { auto delta = make_intrusive(TYPE_ADDR); @@ -879,570 +1158,676 @@ ListValPtr DNS_Mgr::AddrListDelta(ListVal* al1, ListVal* al2) return delta; } -void DNS_Mgr::DumpAddrList(FILE* f, ListVal* al) +void DNS_Mgr::LoadCache(const std::string& path) { - for ( int i = 0; i < al->Length(); ++i ) - { - const IPAddr& al_i = al->Idx(i)->AsAddr(); - fprintf(f, "%s%s", i > 0 ? "," : "", al_i.AsString().c_str()); - } - } + FILE* f = fopen(path.c_str(), "r"); -void DNS_Mgr::LoadCache(FILE* f) - { if ( ! f ) return; - DNS_Mapping* m = new DNS_Mapping(f); - for ( ; ! m->NoMapping() && ! m->InitFailed(); m = new DNS_Mapping(f) ) + if ( ! DNS_Mapping::ValidateCacheVersion(f) ) + return; + + // Loop until we find a mapping that doesn't initialize correctly. + auto m = std::make_shared(f); + for ( ; ! m->NoMapping() && ! m->InitFailed(); m = std::make_shared(f) ) { if ( m->ReqHost() ) - { - if ( host_mappings.find(m->ReqHost()) == host_mappings.end() ) - { - host_mappings[m->ReqHost()].first = 0; - host_mappings[m->ReqHost()].second = 0; - } - if ( m->Type() == AF_INET ) - host_mappings[m->ReqHost()].first = m; - else - host_mappings[m->ReqHost()].second = m; - } + all_mappings.insert_or_assign(std::make_pair(m->ReqType(), m->ReqHost()), m); else - { - addr_mappings[m->ReqAddr()] = m; - } + all_mappings.insert_or_assign(m->ReqAddr(), m); } if ( ! m->NoMapping() ) reporter->FatalError("DNS cache corrupted"); - delete m; fclose(f); } -void DNS_Mgr::Save(FILE* f, const AddrMap& m) +bool DNS_Mgr::Save() { - for ( AddrMap::const_iterator it = m.begin(); it != m.end(); ++it ) + if ( cache_name.empty() ) + return false; + + FILE* f = fopen(cache_name.c_str(), "w"); + + if ( ! f ) + return false; + + DNS_Mapping::InitializeCache(f); + Save(f, all_mappings); + + fclose(f); + + return true; + } + +void DNS_Mgr::Save(FILE* f, const MappingMap& m) + { + for ( const auto& [key, mapping] : m ) { - if ( it->second ) - it->second->Save(f); + if ( mapping ) + mapping->Save(f); } } -void DNS_Mgr::Save(FILE* f, const HostMap& m) +TableValPtr DNS_Mgr::LookupNameInCache(const std::string& name, bool cleanup_expired, + bool check_failed) { - HostMap::const_iterator it; - - for ( it = m.begin(); it != m.end(); ++it ) - { - if ( it->second.first ) - it->second.first->Save(f); - - if ( it->second.second ) - it->second.second->Save(f); - } - } - -const char* DNS_Mgr::LookupAddrInCache(const IPAddr& addr) - { - AddrMap::iterator it = addr_mappings.find(addr); - - if ( it == addr_mappings.end() ) + auto it = all_mappings.find(std::make_pair(T_A, name)); + if ( it == all_mappings.end() ) return nullptr; - DNS_Mapping* d = it->second; + auto d = it->second; - if ( d->Expired() ) + if ( ! d || d->names.empty() ) + return nullptr; + + if ( cleanup_expired && (d && d->Expired()) ) { - addr_mappings.erase(it); - delete d; + all_mappings.erase(it); return nullptr; } - // The escapes in the following strings are to avoid having it - // interpreted as a trigraph sequence. - return d->names ? d->names[0] : "<\?\?\?>"; + if ( check_failed && (d && d->Failed()) ) + { + reporter->Warning("Can't resolve host: %s", name.c_str()); + return empty_addr_set(); + } + + return d->AddrsSet(); } -TableValPtr DNS_Mgr::LookupNameInCache(const string& name) +StringValPtr DNS_Mgr::LookupAddrInCache(const IPAddr& addr, bool cleanup_expired, bool check_failed) { - HostMap::iterator it = host_mappings.find(name); - if ( it == host_mappings.end() ) + auto it = all_mappings.find(addr); + if ( it == all_mappings.end() ) + return nullptr; + + auto d = it->second; + + if ( cleanup_expired && d->Expired() ) { - it = host_mappings.begin(); + all_mappings.erase(it); + return nullptr; + } + else if ( check_failed && d->Failed() ) + { + std::string s(addr); + reporter->Warning("can't resolve IP address: %s", s.c_str()); + return make_intrusive(s); + } + + if ( d->Host() ) + return d->Host(); + + return make_intrusive("<\?\?\?>"); + } + +StringValPtr DNS_Mgr::LookupOtherInCache(const std::string& name, int request_type, + bool cleanup_expired) + { + auto it = all_mappings.find(std::make_pair(request_type, name)); + if ( it == all_mappings.end() ) + return nullptr; + + auto d = it->second; + + if ( cleanup_expired && d->Expired() ) + { + all_mappings.erase(it); return nullptr; } - DNS_Mapping* d4 = it->second.first; - DNS_Mapping* d6 = it->second.second; + if ( d->Host() ) + return d->Host(); - if ( ! d4 || ! d4->names || ! d6 || ! d6->names ) - return nullptr; - - if ( d4->Expired() || d6->Expired() ) - { - host_mappings.erase(it); - delete d4; - delete d6; - return nullptr; - } - - auto tv4 = d4->AddrsSet(); - auto tv6 = d6->AddrsSet(); - tv4->AddTo(tv6.get(), false); - return tv6; - } - -const char* DNS_Mgr::LookupTextInCache(const string& name) - { - TextMap::iterator it = text_mappings.find(name); - if ( it == text_mappings.end() ) - return nullptr; - - DNS_Mapping* d = it->second; - - if ( d->Expired() ) - { - text_mappings.erase(it); - delete d; - return nullptr; - } - - // The escapes in the following strings are to avoid having it - // interpreted as a trigraph sequence. - return d->names ? d->names[0] : "<\?\?\?>"; - } - -static void resolve_lookup_cb(DNS_Mgr::LookupCallback* callback, TableValPtr result) - { - callback->Resolved(result.get()); - delete callback; - } - -static void resolve_lookup_cb(DNS_Mgr::LookupCallback* callback, const char* result) - { - callback->Resolved(result); - delete callback; - } - -void DNS_Mgr::AsyncLookupAddr(const IPAddr& host, LookupCallback* callback) - { - InitSource(); - - if ( mode == DNS_FAKE ) - { - resolve_lookup_cb(callback, fake_addr_lookup_result(host)); - return; - } - - // Do we already know the answer? - const char* name = LookupAddrInCache(host); - if ( name ) - { - resolve_lookup_cb(callback, name); - return; - } - - AsyncRequest* req = nullptr; - - // Have we already a request waiting for this host? - AsyncRequestAddrMap::iterator i = asyncs_addrs.find(host); - if ( i != asyncs_addrs.end() ) - req = i->second; - else - { - // A new one. - req = new AsyncRequest; - req->host = host; - asyncs_queued.push_back(req); - asyncs_addrs.insert(AsyncRequestAddrMap::value_type(host, req)); - } - - req->callbacks.push_back(callback); - - IssueAsyncRequests(); - } - -void DNS_Mgr::AsyncLookupName(const string& name, LookupCallback* callback) - { - InitSource(); - - if ( mode == DNS_FAKE ) - { - resolve_lookup_cb(callback, fake_name_lookup_result(name.c_str())); - return; - } - - // Do we already know the answer? - auto addrs = LookupNameInCache(name); - if ( addrs ) - { - resolve_lookup_cb(callback, std::move(addrs)); - return; - } - - AsyncRequest* req = nullptr; - - // Have we already a request waiting for this host? - AsyncRequestNameMap::iterator i = asyncs_names.find(name); - if ( i != asyncs_names.end() ) - req = i->second; - else - { - // A new one. - req = new AsyncRequest; - req->name = name; - asyncs_queued.push_back(req); - asyncs_names.insert(AsyncRequestNameMap::value_type(name, req)); - } - - req->callbacks.push_back(callback); - - IssueAsyncRequests(); - } - -void DNS_Mgr::AsyncLookupNameText(const string& name, LookupCallback* callback) - { - InitSource(); - - if ( mode == DNS_FAKE ) - { - resolve_lookup_cb(callback, fake_text_lookup_result(name.c_str())); - return; - } - - // Do we already know the answer? - const char* txt = LookupTextInCache(name); - - if ( txt ) - { - resolve_lookup_cb(callback, txt); - return; - } - - AsyncRequest* req = nullptr; - - // Have we already a request waiting for this host? - AsyncRequestTextMap::iterator i = asyncs_texts.find(name); - if ( i != asyncs_texts.end() ) - req = i->second; - else - { - // A new one. - req = new AsyncRequest; - req->name = name; - req->is_txt = true; - asyncs_queued.push_back(req); - asyncs_texts.insert(AsyncRequestTextMap::value_type(name, req)); - } - - req->callbacks.push_back(callback); - - IssueAsyncRequests(); - } - -static bool DoRequest(nb_dns_info* nb_dns, DNS_Mgr_Request* dr) - { - if ( dr->MakeRequest(nb_dns) ) - // dr stored in nb_dns cookie and deleted later when results available. - return true; - - reporter->Warning("can't issue DNS request"); - delete dr; - return false; + return make_intrusive("<\?\?\?>"); } void DNS_Mgr::IssueAsyncRequests() { - while ( asyncs_queued.size() && asyncs_pending < MAX_PENDING_REQUESTS ) + while ( ! asyncs_queued.empty() && asyncs_pending < MAX_PENDING_REQUESTS ) { + DNS_Request* dns_req = nullptr; AsyncRequest* req = asyncs_queued.front(); asyncs_queued.pop_front(); ++num_requests; - - bool success; - - if ( req->IsAddrReq() ) - success = DoRequest(nb_dns, new DNS_Mgr_Request(req->host)); - else if ( req->is_txt ) - success = DoRequest(nb_dns, - new DNS_Mgr_Request(req->name.c_str(), AF_INET, req->is_txt)); - else - { - // If only one request type succeeds, don't consider it a failure. - success = DoRequest(nb_dns, - new DNS_Mgr_Request(req->name.c_str(), AF_INET, req->is_txt)); - success = DoRequest(nb_dns, - new DNS_Mgr_Request(req->name.c_str(), AF_INET6, req->is_txt)) || - success; - } - - if ( ! success ) - { - req->Timeout(); - ++failed; - continue; - } - req->time = util::current_time(); - asyncs_timeouts.push(req); + + if ( req->type == T_PTR ) + dns_req = new DNS_Request(req->addr, true); + else if ( req->type == T_A || req->type == T_AAAA ) + // We pass T_A here, but DNSRequest::MakeRequest() will special-case that in + // a request that gets both T_A and T_AAAA results at one time. + dns_req = new DNS_Request(req->host.c_str(), T_A, true); + else + dns_req = new DNS_Request(req->host.c_str(), req->type, true); + + dns_req->MakeRequest(channel, this); ++asyncs_pending; } } +void DNS_Mgr::CheckAsyncHostRequest(const std::string& host, bool timeout) + { + // Note that this code is a mirror of that for CheckAsyncAddrRequest. + auto i = asyncs.find(std::make_pair(T_A, host)); + + if ( i != asyncs.end() ) + { + if ( timeout ) + { + ++failed; + i->second->Timeout(); + } + else if ( auto addrs = LookupNameInCache(host, true, false) ) + { + ++successful; + i->second->Resolved(addrs); + } + else + return; + + delete i->second; + asyncs.erase(i); + --asyncs_pending; + } + } + void DNS_Mgr::CheckAsyncAddrRequest(const IPAddr& addr, bool timeout) { // Note that this code is a mirror of that for CheckAsyncHostRequest. // In the following, if it's not in the respective map anymore, we've // already finished it earlier and don't have anything to do. - AsyncRequestAddrMap::iterator i = asyncs_addrs.find(addr); + auto i = asyncs.find(addr); - if ( i != asyncs_addrs.end() ) + if ( i != asyncs.end() ) { - const char* name = LookupAddrInCache(addr); - if ( name ) - { - ++successful; - i->second->Resolved(name); - } - - else if ( timeout ) + if ( timeout ) { ++failed; i->second->Timeout(); } - + else if ( auto name = LookupAddrInCache(addr, true, false) ) + { + ++successful; + i->second->Resolved(name->CheckString()); + } else return; - asyncs_addrs.erase(i); + delete i->second; + asyncs.erase(i); --asyncs_pending; - - // Don't delete the request. That will be done once it - // eventually times out. } } -void DNS_Mgr::CheckAsyncTextRequest(const char* host, bool timeout) +void DNS_Mgr::CheckAsyncOtherRequest(const std::string& host, bool timeout, int request_type) { // Note that this code is a mirror of that for CheckAsyncAddrRequest. - AsyncRequestTextMap::iterator i = asyncs_texts.find(host); - if ( i != asyncs_texts.end() ) + auto i = asyncs.find(std::make_pair(request_type, host)); + if ( i != asyncs.end() ) { - const char* name = LookupTextInCache(host); - if ( name ) - { - ++successful; - i->second->Resolved(name); - } - - else if ( timeout ) - { - AsyncRequestTextMap::iterator it = asyncs_texts.begin(); - ++failed; - i->second->Timeout(); - } - - else - return; - - asyncs_texts.erase(i); - --asyncs_pending; - - // Don't delete the request. That will be done once it - // eventually times out. - } - } - -void DNS_Mgr::CheckAsyncHostRequest(const char* host, bool timeout) - { - // Note that this code is a mirror of that for CheckAsyncAddrRequest. - - AsyncRequestNameMap::iterator i = asyncs_names.find(host); - - if ( i != asyncs_names.end() ) - { - auto addrs = LookupNameInCache(host); - - if ( addrs ) - { - ++successful; - i->second->Resolved(addrs.get()); - } - - else if ( timeout ) + if ( timeout ) { ++failed; i->second->Timeout(); } - + else if ( auto name = LookupOtherInCache(host, request_type, true) ) + { + ++successful; + i->second->Resolved(name->CheckString()); + } else return; - asyncs_names.erase(i); + delete i->second; + asyncs.erase(i); --asyncs_pending; - - // Don't delete the request. That will be done once it - // eventually times out. } } void DNS_Mgr::Flush() { - Process(); - - HostMap::iterator it; - for ( it = host_mappings.begin(); it != host_mappings.end(); ++it ) - { - delete it->second.first; - delete it->second.second; - } - - for ( AddrMap::iterator it2 = addr_mappings.begin(); it2 != addr_mappings.end(); ++it2 ) - delete it2->second; - - for ( TextMap::iterator it3 = text_mappings.begin(); it3 != text_mappings.end(); ++it3 ) - delete it3->second; - - host_mappings.clear(); - addr_mappings.clear(); - text_mappings.clear(); + Resolve(); + all_mappings.clear(); } double DNS_Mgr::GetNextTimeout() { - if ( asyncs_timeouts.empty() ) + if ( asyncs_pending == 0 ) return -1; - return run_state::network_time + DNS_TIMEOUT; - } - -void DNS_Mgr::Process() - { - if ( ! nb_dns ) - return; - - while ( asyncs_timeouts.size() > 0 ) - { - AsyncRequest* req = asyncs_timeouts.top(); - - if ( req->time + DNS_TIMEOUT > util::current_time() && ! run_state::terminating ) - break; - - if ( ! req->processed ) - { - if ( req->IsAddrReq() ) - CheckAsyncAddrRequest(req->host, true); - else if ( req->is_txt ) - CheckAsyncTextRequest(req->name.c_str(), true); - else - CheckAsyncHostRequest(req->name.c_str(), true); - } - - asyncs_timeouts.pop(); - delete req; - } - - if ( AnswerAvailable(0) <= 0 ) - return; - - char err[NB_DNS_ERRSIZE]; - struct nb_dns_result r; - - int status = nb_dns_activity(nb_dns, &r, err); - - if ( status < 0 ) - reporter->Warning("NB-DNS error in DNS_Mgr::Process (%s)", err); - - else if ( status > 0 ) - { - DNS_Mgr_Request* dr = (DNS_Mgr_Request*)r.cookie; - - bool do_host_timeout = true; - if ( dr->ReqHost() && host_mappings.find(dr->ReqHost()) == host_mappings.end() ) - // Don't timeout when this is the first result in an expected pair - // (one result each for A and AAAA queries). - do_host_timeout = false; - - if ( dr->RequestPending() ) - { - AddResult(dr, &r); - dr->RequestDone(); - } - - if ( ! dr->ReqHost() ) - CheckAsyncAddrRequest(dr->ReqAddr(), true); - else if ( dr->ReqIsTxt() ) - CheckAsyncTextRequest(dr->ReqHost(), do_host_timeout); - else - CheckAsyncHostRequest(dr->ReqHost(), do_host_timeout); - - IssueAsyncRequests(); - - delete dr; - } - } - -int DNS_Mgr::AnswerAvailable(int timeout) - { - if ( ! nb_dns ) - return -1; - - int fd = nb_dns_fd(nb_dns); - if ( fd < 0 ) - { - reporter->Warning("nb_dns_fd() failed in DNS_Mgr::WaitForReplies"); - return -1; - } - - fd_set read_fds; + fd_set read_fds, write_fds; FD_ZERO(&read_fds); - FD_SET(fd, &read_fds); - - struct timeval t; - t.tv_sec = timeout; - t.tv_usec = 0; - - int status = select(fd + 1, &read_fds, 0, 0, &t); - - if ( status < 0 ) - { - if ( errno != EINTR ) - reporter->Warning("problem with DNS select"); - + FD_ZERO(&write_fds); + int nfds = ares_fds(channel, &read_fds, &write_fds); + if ( nfds == 0 ) return -1; + + struct timeval tv; + tv.tv_sec = DNS_TIMEOUT; + tv.tv_usec = 0; + + struct timeval* tvp = ares_timeout(channel, &tv, &tv); + + return run_state::network_time + static_cast(tvp->tv_sec) + + (static_cast(tvp->tv_usec) / 1e6); + } + +void DNS_Mgr::ProcessFd(int fd, int flags) + { + if ( socket_fds.count(fd) != 0 ) + { + int read_fd = (flags & IOSource::ProcessFlags::READ) != 0 ? fd : ARES_SOCKET_BAD; + int write_fd = (flags & IOSource::ProcessFlags::WRITE) != 0 ? fd : ARES_SOCKET_BAD; + ares_process_fd(channel, read_fd, write_fd); } - if ( status > 1 ) - { - reporter->Warning("strange return from DNS select"); - return -1; - } - - return status; + IssueAsyncRequests(); } void DNS_Mgr::GetStats(Stats* stats) { + // TODO: can this use the telemetry framework? stats->requests = num_requests; stats->successful = successful; stats->failed = failed; stats->pending = asyncs_pending; - stats->cached_hosts = host_mappings.size(); - stats->cached_addresses = addr_mappings.size(); - stats->cached_texts = text_mappings.size(); + + stats->cached_hosts = 0; + stats->cached_addresses = 0; + stats->cached_texts = 0; + + for ( const auto& [key, mapping] : all_mappings ) + { + if ( mapping->ReqType() == T_PTR ) + stats->cached_addresses++; + else if ( mapping->ReqType() == T_A ) + stats->cached_hosts++; + else + stats->cached_texts++; + } } -void DNS_Mgr::Terminate() +void DNS_Mgr::AsyncRequest::Resolved(const std::string& name) { - if ( nb_dns ) - iosource_mgr->UnregisterFd(nb_dns_fd(nb_dns), this); + for ( const auto& cb : callbacks ) + { + cb->Resolved(name); + if ( ! doctest::is_running_in_test ) + delete cb; + } + + callbacks.clear(); + processed = true; + } + +void DNS_Mgr::AsyncRequest::Resolved(TableValPtr addrs) + { + for ( const auto& cb : callbacks ) + { + cb->Resolved(addrs); + if ( ! doctest::is_running_in_test ) + delete cb; + } + + callbacks.clear(); + processed = true; + } + +void DNS_Mgr::AsyncRequest::Timeout() + { + for ( const auto& cb : callbacks ) + { + cb->Timeout(); + if ( ! doctest::is_running_in_test ) + delete cb; + } + + callbacks.clear(); + processed = true; + } + +TableValPtr DNS_Mgr::empty_addr_set() + { + // TODO: can this be returned statically as well? Does the result get used in a way + // that would modify the same value being returned repeatedly? + auto addr_t = base_type(TYPE_ADDR); + auto set_index = make_intrusive(addr_t); + set_index->Append(std::move(addr_t)); + auto s = make_intrusive(std::move(set_index), nullptr); + return make_intrusive(std::move(s)); + } + +////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// + +static std::vector get_result_addresses(TableValPtr addrs) + { + std::vector results; + + auto m = addrs->ToMap(); + for ( const auto& [k, v] : m ) + { + auto lv = cast_intrusive(k); + auto lvv = lv->Vals(); + for ( const auto& addr : lvv ) + { + auto addr_ptr = cast_intrusive(addr); + results.push_back(addr_ptr->Get()); + } + } + + return results; + } + +class TestCallback : public DNS_Mgr::LookupCallback + { +public: + TestCallback() { } + void Resolved(const std::string& name) override + { + host_result = name; + done = true; + } + void Resolved(TableValPtr addrs) override + { + addr_results = get_result_addresses(addrs); + done = true; + } + void Timeout() override + { + timeout = true; + done = true; + } + + std::string host_result; + std::vector addr_results; + bool done = false; + bool timeout = false; + }; + +/** + * Derived testing version of DNS_Mgr so that the Process() method can be exposed + * publically. If new unit tests are added, this class should be used over using + * DNS_Mgr directly. + */ +class TestDNS_Mgr final : public DNS_Mgr + { +public: + explicit TestDNS_Mgr(DNS_MgrMode mode) : DNS_Mgr(mode) { } + void Process(); + }; + +void TestDNS_Mgr::Process() + { + // Only allow usage of this method when running unit tests. + assert(doctest::is_running_in_test); + Resolve(); + IssueAsyncRequests(); + } + +TEST_CASE("dns_mgr priming") + { + char prefix[] = "/tmp/zeek-unit-test-XXXXXX"; + auto tmpdir = mkdtemp(prefix); + + // Create a manager to prime the cache, make a few requests, and the save + // the result. This tests that the priming code will create the requests but + // wait for Resolve() to actually make the requests. + TestDNS_Mgr mgr(DNS_PRIME); + mgr.SetDir(tmpdir); + mgr.InitPostScript(); + + auto host_result = mgr.LookupHost("one.one.one.one"); + REQUIRE(host_result != nullptr); + CHECK(host_result->EqualTo(TestDNS_Mgr::empty_addr_set())); + + IPAddr ones("1.1.1.1"); + auto addr_result = mgr.LookupAddr(ones); + CHECK(strcmp(addr_result->CheckString(), "") == 0); + + // This should wait until we have all of the results back from the above + // requests. + mgr.Resolve(); + + // Save off the resulting values from Resolve() into a file on disk + // in the tmpdir created by mkdtemp. + REQUIRE(mgr.Save()); + + // Make a second DNS manager and reload the cache that we just saved. + TestDNS_Mgr mgr2(DNS_FORCE); + dns_mgr = &mgr2; + mgr2.SetDir(tmpdir); + mgr2.InitPostScript(); + + // Make the same two requests, but verify that we're correctly getting + // data out of the cache. + host_result = mgr2.LookupHost("one.one.one.one"); + REQUIRE(host_result != nullptr); + CHECK_FALSE(host_result->EqualTo(TestDNS_Mgr::empty_addr_set())); + + addr_result = mgr2.LookupAddr(ones); + REQUIRE(addr_result != nullptr); + CHECK(strcmp(addr_result->CheckString(), "one.one.one.one") == 0); + + // Clean up cache file and the temp directory + unlink(mgr2.CacheFile().c_str()); + rmdir(tmpdir); + } + +TEST_CASE("dns_mgr alternate server") + { + char* old_server = getenv("ZEEK_DNS_RESOLVER"); + + setenv("ZEEK_DNS_RESOLVER", "1.1.1.1", 1); + TestDNS_Mgr mgr(DNS_DEFAULT); + + mgr.InitPostScript(); + + auto result = mgr.LookupAddr("1.1.1.1"); + REQUIRE(result != nullptr); + CHECK(strcmp(result->CheckString(), "one.one.one.one") == 0); + + // FIXME: This won't run on systems without IPv6 connectivity. + // setenv("ZEEK_DNS_RESOLVER", "2606:4700:4700::1111", 1); + // TestDNS_Mgr mgr2(DNS_DEFAULT, true); + // mgr2.InitPostScript(); + // result = mgr2.LookupAddr("1.1.1.1"); + // mgr2.Resolve(); + + // result = mgr2.LookupAddr("1.1.1.1"); + // CHECK(strcmp(result->CheckString(), "one.one.one.one") == 0); + + if ( old_server ) + setenv("ZEEK_DNS_RESOLVER", old_server, 1); + else + unsetenv("ZEEK_DNS_RESOLVER"); + } + +TEST_CASE("dns_mgr default mode") + { + TestDNS_Mgr mgr(DNS_DEFAULT); + mgr.InitPostScript(); + + IPAddr ones4("1.1.1.1"); + IPAddr ones6("2606:4700:4700::1111"); + + auto host_result = mgr.LookupHost("one.one.one.one"); + REQUIRE(host_result != nullptr); + CHECK_FALSE(host_result->EqualTo(TestDNS_Mgr::empty_addr_set())); + + auto addrs_from_request = get_result_addresses(host_result); + auto it = std::find(addrs_from_request.begin(), addrs_from_request.end(), ones4); + CHECK(it != addrs_from_request.end()); + it = std::find(addrs_from_request.begin(), addrs_from_request.end(), ones6); + CHECK(it != addrs_from_request.end()); + + auto addr_result = mgr.LookupAddr(ones4); + REQUIRE(addr_result != nullptr); + CHECK(strcmp(addr_result->CheckString(), "one.one.one.one") == 0); + + addr_result = mgr.LookupAddr(ones6); + REQUIRE(addr_result != nullptr); + CHECK(strcmp(addr_result->CheckString(), "one.one.one.one") == 0); + + IPAddr bad("240.0.0.0"); + addr_result = mgr.LookupAddr(bad); + REQUIRE(addr_result != nullptr); + CHECK(strcmp(addr_result->CheckString(), "240.0.0.0") == 0); + } + +TEST_CASE("dns_mgr async host") + { + TestDNS_Mgr mgr(DNS_DEFAULT); + mgr.InitPostScript(); + + TestCallback cb{}; + mgr.LookupHost("one.one.one.one", &cb); + + // This shouldn't take any longer than DNS_TIMEOUT+1 seconds, so bound it + // just in case of some failure we're not aware of yet. + int count = 0; + while ( ! cb.done && (count < DNS_TIMEOUT + 1) ) + { + mgr.Process(); + sleep(1); + if ( ! cb.timeout ) + count++; + } + + REQUIRE(count < (DNS_TIMEOUT + 1)); + if ( ! cb.timeout ) + { + REQUIRE_FALSE(cb.addr_results.empty()); + IPAddr ones("1.1.1.1"); + auto it = std::find(cb.addr_results.begin(), cb.addr_results.end(), ones); + CHECK(it != cb.addr_results.end()); + } + + mgr.Flush(); + } + +TEST_CASE("dns_mgr async addr") + { + TestDNS_Mgr mgr(DNS_DEFAULT); + mgr.InitPostScript(); + + TestCallback cb{}; + mgr.LookupAddr(IPAddr{"1.1.1.1"}, &cb); + + // This shouldn't take any longer than DNS_TIMEOUT +1 seconds, so bound it + // just in case of some failure we're not aware of yet. + int count = 0; + while ( ! cb.done && (count < DNS_TIMEOUT + 1) ) + { + mgr.Process(); + sleep(1); + if ( ! cb.timeout ) + count++; + } + + REQUIRE(count < (DNS_TIMEOUT + 1)); + if ( ! cb.timeout ) + REQUIRE(cb.host_result == "one.one.one.one"); + + mgr.Flush(); + } + +TEST_CASE("dns_mgr async text") + { + TestDNS_Mgr mgr(DNS_DEFAULT); + mgr.InitPostScript(); + + TestCallback cb{}; + mgr.Lookup("unittest.zeek.org", T_TXT, &cb); + + // This shouldn't take any longer than DNS_TIMEOUT +1 seconds, so bound it + // just in case of some failure we're not aware of yet. + int count = 0; + while ( ! cb.done && (count < DNS_TIMEOUT + 1) ) + { + mgr.Process(); + sleep(1); + if ( ! cb.timeout ) + count++; + } + + REQUIRE(count < (DNS_TIMEOUT + 1)); + if ( ! cb.timeout ) + REQUIRE(cb.host_result == "testing dns_mgr"); + + mgr.Flush(); + } + +TEST_CASE("dns_mgr timeouts") + { + char* old_server = getenv("ZEEK_DNS_RESOLVER"); + + // This is the address for blackhole.webpagetest.org, which provides a DNS + // server that lets you connect but never returns any responses, always + // resulting in a timeout. + setenv("ZEEK_DNS_RESOLVER", "3.219.212.117", 1); + TestDNS_Mgr mgr(DNS_DEFAULT); + + mgr.InitPostScript(); + auto addr_result = mgr.LookupAddr("1.1.1.1"); + REQUIRE(addr_result != nullptr); + CHECK(strcmp(addr_result->CheckString(), "1.1.1.1") == 0); + + auto host_result = mgr.LookupHost("one.one.one.one"); + REQUIRE(host_result != nullptr); + auto addresses = get_result_addresses(host_result); + CHECK(addresses.size() == 0); + + if ( old_server ) + setenv("ZEEK_DNS_RESOLVER", old_server, 1); + else + unsetenv("ZEEK_DNS_RESOLVER"); + } + +TEST_CASE("dns_mgr async timeouts") + { + char* old_server = getenv("ZEEK_DNS_RESOLVER"); + + // This is the address for blackhole.webpagetest.org, which provides a DNS + // server that lets you connect but never returns any responses, always + // resulting in a timeout. + setenv("ZEEK_DNS_RESOLVER", "3.219.212.117", 1); + TestDNS_Mgr mgr(DNS_DEFAULT); + mgr.InitPostScript(); + + TestCallback cb{}; + mgr.Lookup("unittest.zeek.org", T_TXT, &cb); + + // This shouldn't take any longer than DNS_TIMEOUT +1 seconds, so bound it + // just in case of some failure we're not aware of yet. + int count = 0; + while ( ! cb.done && (count < DNS_TIMEOUT + 1) ) + { + mgr.Process(); + sleep(1); + if ( ! cb.timeout ) + count++; + } + + REQUIRE(count < (DNS_TIMEOUT + 1)); + CHECK(cb.timeout); + + mgr.Flush(); + + if ( old_server ) + setenv("ZEEK_DNS_RESOLVER", old_server, 1); + else + unsetenv("ZEEK_DNS_RESOLVER"); } } // namespace zeek::detail diff --git a/src/DNS_Mgr.h b/src/DNS_Mgr.h index ef9b61db74..2adfc3ac01 100644 --- a/src/DNS_Mgr.h +++ b/src/DNS_Mgr.h @@ -2,10 +2,12 @@ #pragma once +#include #include #include #include #include +#include #include "zeek/EventHandler.h" #include "zeek/IPAddr.h" @@ -13,33 +15,38 @@ #include "zeek/iosource/IOSource.h" #include "zeek/util.h" +// These are defined in ares headers but we don't want to have to include +// those headers here and create install dependencies on them. +struct ares_channeldata; +typedef struct ares_channeldata* ares_channel; +#ifndef T_PTR +#define T_PTR 12 +#endif + +#ifndef T_TXT +#define T_TXT 16 +#endif + namespace zeek { - -class EventHandler; -class RecordType; class Val; class ListVal; class TableVal; +class StringVal; template class IntrusivePtr; using ValPtr = IntrusivePtr; using ListValPtr = IntrusivePtr; using TableValPtr = IntrusivePtr; +using StringValPtr = IntrusivePtr; } // namespace zeek -// Defined in nb_dns.h -struct nb_dns_info; -struct nb_dns_result; - namespace zeek::detail { - -class DNS_Mgr_Request; -using DNS_mgr_request_list = PList; - class DNS_Mapping; +using DNS_MappingPtr = std::shared_ptr; +class DNS_Request; enum DNS_MgrMode { @@ -49,50 +56,144 @@ enum DNS_MgrMode DNS_FAKE, // don't look up names, just return dummy results }; -// Number of seconds we'll wait for a reply. -#define DNS_TIMEOUT 5 - -class DNS_Mgr final : public iosource::IOSource +class DNS_Mgr : public iosource::IOSource { public: - explicit DNS_Mgr(DNS_MgrMode mode); - ~DNS_Mgr() override; - - void InitPostScript(); - void Flush(); - - // Looks up the address or addresses of the given host, and returns - // a set of addr. - TableValPtr LookupHost(const char* host); - - ValPtr LookupAddr(const IPAddr& addr); - - // Define the directory where to store the data. - void SetDir(const char* arg_dir) { dir = util::copy_string(arg_dir); } - - void Verify(); - void Resolve(); - bool Save(); - - const char* LookupAddrInCache(const IPAddr& addr); - TableValPtr LookupNameInCache(const std::string& name); - const char* LookupTextInCache(const std::string& name); - - // Support for async lookups. + /** + * Base class for callback handling for asynchronous lookups. + */ class LookupCallback { public: - LookupCallback() { } - virtual ~LookupCallback() { } + virtual ~LookupCallback() = default; - virtual void Resolved(const char* name){}; - virtual void Resolved(TableVal* addrs){}; + /** + * Called when an address lookup finishes. + * + * @param name The resulting name from the lookup. + */ + virtual void Resolved(const std::string& name){}; + + /** + * Called when a name lookup finishes. + * + * @param addrs A table of the resulting addresses from the lookup. + */ + virtual void Resolved(TableValPtr addrs){}; + + /** + * Generic callback method for all request types. + * + * @param val A Val containing the data from the query. + */ + virtual void Resolved(ValPtr data, int request_type) { } + + /** + * Called when a timeout request occurs. + */ virtual void Timeout() = 0; }; - void AsyncLookupAddr(const IPAddr& host, LookupCallback* callback); - void AsyncLookupName(const std::string& name, LookupCallback* callback); - void AsyncLookupNameText(const std::string& name, LookupCallback* callback); + explicit DNS_Mgr(DNS_MgrMode mode); + ~DNS_Mgr() override; + + /** + * Finalizes the manager initialization. This should be called only after all + * of the scripts have been parsed at startup. + */ + void InitPostScript(); + + /** + * Attempts to process one more round of requests and then flushes the + * mapping caches. + */ + void Flush(); + + /** + * Looks up the address(es) of a given host and returns a set of addresses. + * This is a shorthand method for doing A/AAAA requests. This is a + * synchronous request and will block until the request completes or times + * out. + * + * @param host The hostname to lookup an address for. + * @return A set of addresses for the host. + */ + TableValPtr LookupHost(const std::string& host); + + /** + * Looks up the hostname of a given address. This is a shorthand method for + * doing PTR requests. This is a synchronous request and will block until + * the request completes or times out. + * + * @param host The addr to lookup a hostname for. + * @return The hostname for the address. + */ + StringValPtr LookupAddr(const IPAddr& addr); + + /** + * Performs a generic request to the DNS server. This is a synchronous + * request and will block until the request completes or times out. + * + * @param name The name or address to make a request for. If this is an + * address it should be in arpa format (x.x.x.x.in-addr.arpa or x-*.ip6.arpa). + * Note that calling LookupAddr for PTR requests does this conversion + * automatically. + * @param request_type The type of request to make. This should be one of + * the type values defined in arpa/nameser.h or ares_nameser.h. + * @return The requested data. + */ + ValPtr Lookup(const std::string& name, int request_type); + + /** + * Looks up the address(es) of a given host. This is a shorthand method + * for doing A/AAAA requests. This is an asynchronous request. The + * response will be handled via the provided callback object. + * + * @param host The hostname to lookup an address for. + * @param callback A callback object for handling the response. + */ + void LookupHost(const std::string& host, LookupCallback* callback); + + /** + * Looks up the hostname of a given address. This is a shorthand method for + * doing PTR requests. This is an asynchronous request. The response will + * be handled via the provided callback object. + * + * @param host The addr to lookup a hostname for. + * @param callback A callback object for handling the response. + */ + void LookupAddr(const IPAddr& addr, LookupCallback* callback); + + /** + * Performs a generic request to the DNS server. This is an asynchronous + * request. The response will be handled via the provided callback + * object. + * + * @param name The name or address to make a request for. If this is an + * address it should be in arpa format (x.x.x.x.in-addr.arpa or x-*.ip6.arpa). + * Note that calling LookupAddr for PTR requests does this conversion + * automatically. + * @param request_type The type of request to make. This should be one of + * the type values defined in arpa/nameser.h or ares_nameser.h. + * @param callback A callback object for handling the response. + */ + void Lookup(const std::string& name, int request_type, LookupCallback* callback); + + /** + * Sets the directory where to store DNS data when Save() is called. + */ + void SetDir(const std::string& arg_dir) { dir = arg_dir; } + + /** + * Waits for responses to become available or a timeout to occur, + * and handles any responses. + */ + void Resolve(); + + /** + * Saves the current name and address caches to disk. + */ + bool Save(); struct Stats { @@ -105,142 +206,137 @@ public: unsigned long cached_texts; }; + /** + * Returns the current statistics for the DNS_Manager. + * + * @param stats A pointer to a stats object to return the data in. + */ void GetStats(Stats* stats); - void Terminate(); + /** + * Adds a result from a request to the caches. This is public so that the + * callback methods can call it from outside of the DNS_Mgr class. + * + * @param dr The request associated with the result. + * @param h A hostent structure containing the actual result data. + * @param ttl A ttl value contained in the response from the server. + * @param merge A flag for whether these results should be merged into + * an existing mapping. If false, AddResult will attempt to replace the + * existing mapping with the new data and delete the old mapping. + */ + void AddResult(DNS_Request* dr, struct hostent* h, uint32_t ttl, bool merge = false); + + /** + * Returns an empty set of addresses, used in various error cases and during + * cache priming. + */ + static TableValPtr empty_addr_set(); + + /** + * Returns the full path to the file used to store the DNS cache. + */ + std::string CacheFile() const { return cache_name; } + + /** + * Used by the c-ares socket call back to register/unregister a socket file descriptor. + */ + void RegisterSocket(int fd, bool read, bool write); + + ares_channel& GetChannel() { return channel; } protected: friend class LookupCallback; - friend class DNS_Mgr_Request; + friend class DNS_Request; - void Event(EventHandlerPtr e, DNS_Mapping* dm); - void Event(EventHandlerPtr e, DNS_Mapping* dm, ListValPtr l1, ListValPtr l2); - void Event(EventHandlerPtr e, DNS_Mapping* old_dm, DNS_Mapping* new_dm); - - ValPtr BuildMappingVal(DNS_Mapping* dm); - - void AddResult(DNS_Mgr_Request* dr, struct nb_dns_result* r); - void CompareMappings(DNS_Mapping* prev_dm, DNS_Mapping* new_dm); - ListValPtr AddrListDelta(ListVal* al1, ListVal* al2); - void DumpAddrList(FILE* f, ListVal* al); - - using HostMap = std::map>; - using AddrMap = std::map; - using TextMap = std::map; - void LoadCache(FILE* f); - void Save(FILE* f, const AddrMap& m); - void Save(FILE* f, const HostMap& m); - - // Selects on the fd to see if there is an answer available (timeout - // is secs). Returns 0 on timeout, -1 on EINTR or other error, and 1 - // if answer is ready. - int AnswerAvailable(int timeout); - - // Issue as many queued async requests as slots are available. - void IssueAsyncRequests(); + StringValPtr LookupAddrInCache(const IPAddr& addr, bool cleanup_expired = false, + bool check_failed = false); + TableValPtr LookupNameInCache(const std::string& name, bool cleanup_expired = false, + bool check_failed = false); + StringValPtr LookupOtherInCache(const std::string& name, int request_type, + bool cleanup_expired = false); // Finish the request if we have a result. If not, time it out if // requested. void CheckAsyncAddrRequest(const IPAddr& addr, bool timeout); - void CheckAsyncHostRequest(const char* host, bool timeout); - void CheckAsyncTextRequest(const char* host, bool timeout); + void CheckAsyncHostRequest(const std::string& host, bool timeout); + void CheckAsyncOtherRequest(const std::string& host, bool timeout, int request_type); + + void Event(EventHandlerPtr e, const DNS_MappingPtr& dm); + void Event(EventHandlerPtr e, const DNS_MappingPtr& dm, ListValPtr l1, ListValPtr l2); + void Event(EventHandlerPtr e, const DNS_MappingPtr& old_dm, DNS_MappingPtr new_dm); + + ValPtr BuildMappingVal(const DNS_MappingPtr& dm); + + void CompareMappings(const DNS_MappingPtr& prev_dm, const DNS_MappingPtr& new_dm); + ListValPtr AddrListDelta(ListValPtr al1, ListValPtr al2); + + using MappingKey = std::variant>; + using MappingMap = std::map; + void LoadCache(const std::string& path); + void Save(FILE* f, const MappingMap& m); + + // Issue as many queued async requests as slots are available. + void IssueAsyncRequests(); // IOSource interface. - void Process() override; + void Process() override { } + void ProcessFd(int fd, int flags) override; void InitSource() override; const char* Tag() override { return "DNS_Mgr"; } double GetNextTimeout() override; DNS_MgrMode mode; - HostMap host_mappings; - AddrMap addr_mappings; - TextMap text_mappings; + MappingMap all_mappings; - DNS_mgr_request_list requests; + std::string cache_name; + std::string dir; // directory in which cache_name resides - nb_dns_info* nb_dns; - char* cache_name; - char* dir; // directory in which cache_name resides - - bool did_init; - int asyncs_pending; + bool did_init = false; + int asyncs_pending = 0; RecordTypePtr dm_rec; + ares_channel channel{}; + using CallbackList = std::list; struct AsyncRequest { - double time; - IPAddr host; - std::string name; + double time = 0.0; + IPAddr addr; + std::string host; CallbackList callbacks; - bool is_txt; - bool processed; + int type = 0; + bool processed = false; - AsyncRequest() : time(0.0), is_txt(false), processed(false) { } - - bool IsAddrReq() const { return name.empty(); } - - void Resolved(const char* name) + AsyncRequest(std::string host, int request_type) : host(std::move(host)), type(request_type) { - for ( CallbackList::iterator i = callbacks.begin(); i != callbacks.end(); ++i ) - { - (*i)->Resolved(name); - delete *i; - } - callbacks.clear(); - processed = true; } + AsyncRequest(const IPAddr& addr) : addr(addr), type(T_PTR) { } - void Resolved(TableVal* addrs) - { - for ( CallbackList::iterator i = callbacks.begin(); i != callbacks.end(); ++i ) - { - (*i)->Resolved(addrs); - delete *i; - } - callbacks.clear(); - processed = true; - } - - void Timeout() - { - for ( CallbackList::iterator i = callbacks.begin(); i != callbacks.end(); ++i ) - { - (*i)->Timeout(); - delete *i; - } - callbacks.clear(); - processed = true; - } + void Resolved(const std::string& name); + void Resolved(TableValPtr addrs); + void Timeout(); }; - using AsyncRequestAddrMap = std::map; - AsyncRequestAddrMap asyncs_addrs; - - using AsyncRequestNameMap = std::map; - AsyncRequestNameMap asyncs_names; - - using AsyncRequestTextMap = std::map; - AsyncRequestTextMap asyncs_texts; - - using QueuedList = std::list; - QueuedList asyncs_queued; - struct AsyncRequestCompare { bool operator()(const AsyncRequest* a, const AsyncRequest* b) { return a->time > b->time; } }; - using TimeoutQueue = - std::priority_queue, AsyncRequestCompare>; - TimeoutQueue asyncs_timeouts; + using AsyncRequestMap = std::map; + AsyncRequestMap asyncs; - unsigned long num_requests; - unsigned long successful; - unsigned long failed; + using QueuedList = std::list; + QueuedList asyncs_queued; + + unsigned long num_requests = 0; + unsigned long successful = 0; + unsigned long failed = 0; + + std::set socket_fds; + std::set write_socket_fds; }; extern DNS_Mgr* dns_mgr; diff --git a/src/DbgBreakpoint.h b/src/DbgBreakpoint.h index 788d19ac18..f5af59d7a8 100644 --- a/src/DbgBreakpoint.h +++ b/src/DbgBreakpoint.h @@ -1,4 +1,4 @@ -// Structures and methods for implementing breakpoints in the Bro debugger. +// Structures and methods for implementing breakpoints in the Zeek debugger. #pragma once diff --git a/src/DbgDisplay.h b/src/DbgDisplay.h index 7b4ed086c4..09af4b230e 100644 --- a/src/DbgDisplay.h +++ b/src/DbgDisplay.h @@ -1,4 +1,4 @@ -// Structures and methods for implementing watches in the Bro debugger. +// Structures and methods for implementing watches in the Zeek debugger. #pragma once diff --git a/src/DbgHelp.cc b/src/DbgHelp.cc index 373f0c0613..7f346b9a14 100644 --- a/src/DbgHelp.cc +++ b/src/DbgHelp.cc @@ -1,4 +1,4 @@ -// Bro Debugger Help +// Zeek Debugger Help #include "zeek/zeek-config.h" diff --git a/src/DbgWatch.h b/src/DbgWatch.h index 8bfda57c16..22715c2195 100644 --- a/src/DbgWatch.h +++ b/src/DbgWatch.h @@ -1,4 +1,4 @@ -// Structures and methods for implementing watches in the Bro debugger. +// Structures and methods for implementing watches in the Zeek debugger. #pragma once diff --git a/src/Debug.cc b/src/Debug.cc index 469d016f2d..d63cef512c 100644 --- a/src/Debug.cc +++ b/src/Debug.cc @@ -1,4 +1,4 @@ -// Debugging support for Bro policy files. +// Debugging support for Zeek policy files. #include "zeek/Debug.h" diff --git a/src/Debug.h b/src/Debug.h index a006f35971..06bc483b98 100644 --- a/src/Debug.h +++ b/src/Debug.h @@ -1,4 +1,4 @@ -// Debugging support for Bro policy files. +// Debugging support for Zeek policy files. #pragma once diff --git a/src/DebugCmds.cc b/src/DebugCmds.cc index 3f69bccbd7..dbb150d716 100644 --- a/src/DebugCmds.cc +++ b/src/DebugCmds.cc @@ -1,4 +1,4 @@ -// Support routines to help deal with Bro debugging commands and +// Support routines to help deal with Zeek debugging commands and // implementation of most commands. #include "zeek/DebugCmds.h" diff --git a/src/DebugCmds.h b/src/DebugCmds.h index 4a0d511cae..35a0a773cd 100644 --- a/src/DebugCmds.h +++ b/src/DebugCmds.h @@ -1,4 +1,4 @@ -// Support routines to help deal with Bro debugging commands and +// Support routines to help deal with Zeek debugging commands and // implementation of most commands. #pragma once diff --git a/src/Dict.cc b/src/Dict.cc index 62e4dd0d87..a327b51ad5 100644 --- a/src/Dict.cc +++ b/src/Dict.cc @@ -1299,6 +1299,15 @@ void Dictionary::AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, i { ASSERT(c); ASSERT_VALID(c); + + // Remove any previous instances of this value that we may have recorded as + // their pointers will get invalid. We won't need that knowledge anymore + // anyways, will update with new information below as needed. + c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), + c->inserted->end()); + c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), + c->visited->end()); + if ( insert_position < c->next ) c->inserted->push_back(entry); if ( insert_position < c->next && c->next <= last_affected_position ) @@ -1314,6 +1323,12 @@ void Dictionary::AdjustOnInsert(IterCookie* c, const detail::DictEntry& entry, i void Dictionary::AdjustOnInsert(RobustDictIterator* c, const detail::DictEntry& entry, int insert_position, int last_affected_position) { + // See note in Dictionary::AdjustOnInsert() above. + c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), + c->inserted->end()); + c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), + c->visited->end()); + if ( insert_position < c->next ) c->inserted->push_back(entry); if ( insert_position < c->next && c->next <= last_affected_position ) @@ -1442,8 +1457,13 @@ void Dictionary::AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, i int last_affected_position) { ASSERT_VALID(c); + + // See note in Dictionary::AdjustOnInsert() above. c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end()); + c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), + c->visited->end()); + if ( position < c->next && c->next <= last_affected_position ) { int moved = HeadOfClusterByPosition(c->next - 1); @@ -1462,8 +1482,12 @@ void Dictionary::AdjustOnRemove(IterCookie* c, const detail::DictEntry& entry, i void Dictionary::AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& entry, int position, int last_affected_position) { + // See note in Dictionary::AdjustOnInsert() above. c->inserted->erase(std::remove(c->inserted->begin(), c->inserted->end(), entry), c->inserted->end()); + c->visited->erase(std::remove(c->visited->begin(), c->visited->end(), entry), + c->visited->end()); + if ( position < c->next && c->next <= last_affected_position ) { int moved = HeadOfClusterByPosition(c->next - 1); @@ -1475,6 +1499,14 @@ void Dictionary::AdjustOnRemove(RobustDictIterator* c, const detail::DictEntry& // if not already the end of the dictionary, adjust next to a valid one. if ( c->next < Capacity() && table[c->next].Empty() ) c->next = Next(c->next); + + if ( c->curr == entry ) + { + if ( c->next >= 0 && c->next < Capacity() && ! table[c->next].Empty() ) + c->curr = table[c->next]; + else + c->curr = detail::DictEntry(nullptr); // -> c == end_robust() + } } /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1808,7 +1840,20 @@ detail::DictEntry Dictionary::GetNextRobustIteration(RobustDictIterator* iter) if ( iter->next < 0 ) iter->next = Next(-1); - ASSERT(iter->next >= Capacity() || ! table[iter->next].Empty()); + if ( iter->next < Capacity() && table[iter->next].Empty() ) + { + // [Robin] I believe this means that the table has resized in a way + // that we're now inside the overflow area where elements are empty, + // because elsewhere empty slots aren't allowed. Assuming that's right, + // then it means we'll always be at the end of the table now and could + // also just set `next` to capacity. However, just to be sure, we + // instead reuse logic from below to move forward "to a valid position" + // and then double check, through an assertion in debug mode, that it's + // actually the end. If this ever triggered, the above assumption would + // be wrong (but the Next() call would probably still be right). + iter->next = Next(iter->next); + ASSERT(iter->next == Capacity()); + } // Filter out visited keys. int capacity = Capacity(); diff --git a/src/Dict.h b/src/Dict.h index 635dc0efed..5f905fcfb0 100644 --- a/src/Dict.h +++ b/src/Dict.h @@ -168,7 +168,9 @@ public: DictIterator& operator=(DictIterator&& that); reference operator*() { return *curr; } + reference operator*() const { return *curr; } pointer operator->() { return curr; } + pointer operator->() const { return curr; } DictIterator& operator++(); DictIterator operator++(int) diff --git a/src/Event.cc b/src/Event.cc index c6dfc5365a..0ae98f8fab 100644 --- a/src/Event.cc +++ b/src/Event.cc @@ -136,7 +136,7 @@ void EventMgr::Drain() draining = true; - // Past Bro versions drained as long as there events, including when + // Past Zeek versions drained as long as there events, including when // a handler queued new events during its execution. This could lead // to endless loops in case a handler kept triggering its own event. // We now limit this to just a couple of rounds. We do more than diff --git a/src/EventHandler.h b/src/EventHandler.h index ed981ae7e4..f47ff173d8 100644 --- a/src/EventHandler.h +++ b/src/EventHandler.h @@ -20,7 +20,7 @@ class EventHandler public: explicit EventHandler(std::string name); - const char* Name() { return name.data(); } + const char* Name() const { return name.data(); } const FuncPtr& GetFunc() { return local; } diff --git a/src/EventRegistry.h b/src/EventRegistry.h index 5b48334bbc..a0dd10d69e 100644 --- a/src/EventRegistry.h +++ b/src/EventRegistry.h @@ -1,4 +1,4 @@ -// Each event raised/handled by Bro is registered in the EventRegistry. +// Each event raised/handled by Zeek is registered in the EventRegistry. #pragma once diff --git a/src/EventTrace.cc b/src/EventTrace.cc new file mode 100644 index 0000000000..e55e368e00 --- /dev/null +++ b/src/EventTrace.cc @@ -0,0 +1,1079 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/EventTrace.h" + +#include + +#include "zeek/Desc.h" +#include "zeek/EventHandler.h" +#include "zeek/Func.h" +#include "zeek/IPAddr.h" +#include "zeek/Reporter.h" +#include "zeek/ZeekString.h" + +namespace zeek::detail + { + +std::unique_ptr etm; + +// Helper function for generating a correct script-level representation +// of a string constant. +static std::string escape_string(const u_char* b, int len) + { + std::string res = "\""; + + for ( int i = 0; i < len; ++i ) + { + unsigned char c = b[i]; + + switch ( c ) + { + case '\a': + res += "\\a"; + break; + case '\b': + res += "\\b"; + break; + case '\f': + res += "\\f"; + break; + case '\n': + res += "\\n"; + break; + case '\r': + res += "\\r"; + break; + case '\t': + res += "\\t"; + break; + case '\v': + res += "\\v"; + break; + + case '\\': + res += "\\\\"; + break; + case '"': + res += "\\\""; + break; + + default: + if ( isprint(c) ) + res += c; + else + { + char buf[8192]; + snprintf(buf, sizeof buf, "%03o", c); + res += "\\"; + res += buf; + } + break; + } + } + + return res + "\""; + } + +ValTrace::ValTrace(const ValPtr& _v) : v(_v) + { + t = v->GetType(); + + switch ( t->Tag() ) + { + case TYPE_LIST: + TraceList(cast_intrusive(v)); + break; + + case TYPE_RECORD: + TraceRecord(cast_intrusive(v)); + break; + + case TYPE_TABLE: + TraceTable(cast_intrusive(v)); + break; + + case TYPE_VECTOR: + TraceVector(cast_intrusive(v)); + break; + + default: + break; + } + } + +ValTrace::~ValTrace() { } + +bool ValTrace::operator==(const ValTrace& vt) const + { + auto& vt_v = vt.GetVal(); + if ( vt_v == v ) + return true; + + auto tag = t->Tag(); + + if ( vt.GetType()->Tag() != tag ) + return false; + + switch ( tag ) + { + case TYPE_BOOL: + case TYPE_INT: + case TYPE_ENUM: + return v->AsInt() == vt_v->AsInt(); + + case TYPE_COUNT: + case TYPE_PORT: + return v->AsCount() == vt_v->AsCount(); + + case TYPE_DOUBLE: + case TYPE_INTERVAL: + case TYPE_TIME: + return v->AsDouble() == vt_v->AsDouble(); + + case TYPE_STRING: + return (*v->AsString()) == (*vt_v->AsString()); + + case TYPE_ADDR: + return v->AsAddr() == vt_v->AsAddr(); + + case TYPE_SUBNET: + return v->AsSubNet() == vt_v->AsSubNet(); + + case TYPE_FUNC: + return v->AsFile() == vt_v->AsFile(); + + case TYPE_FILE: + return v->AsFile() == vt_v->AsFile(); + + case TYPE_PATTERN: + return v->AsPattern() == vt_v->AsPattern(); + + case TYPE_ANY: + return v->AsSubNet() == vt_v->AsSubNet(); + + case TYPE_TYPE: + return v->AsType() == vt_v->AsType(); + + case TYPE_OPAQUE: + return false; // needs pointer equivalence + + case TYPE_LIST: + return SameList(vt); + + case TYPE_RECORD: + return SameRecord(vt); + + case TYPE_TABLE: + return SameTable(vt); + + case TYPE_VECTOR: + return SameVector(vt); + + default: + reporter->InternalError("bad type in ValTrace::operator=="); + } + } + +void ValTrace::ComputeDelta(const ValTrace* prev, DeltaVector& deltas) const + { + auto tag = t->Tag(); + + if ( prev ) + { + ASSERT(prev->GetType()->Tag() == tag); + + auto& prev_v = prev->GetVal(); + + if ( prev_v != v ) + { + if ( *this != *prev ) + deltas.emplace_back(std::make_unique(this, v)); + return; + } + } + + switch ( tag ) + { + case TYPE_BOOL: + case TYPE_INT: + case TYPE_ENUM: + case TYPE_COUNT: + case TYPE_PORT: + case TYPE_DOUBLE: + case TYPE_INTERVAL: + case TYPE_TIME: + case TYPE_STRING: + case TYPE_ADDR: + case TYPE_SUBNET: + case TYPE_FUNC: + case TYPE_PATTERN: + case TYPE_TYPE: + // These don't change in place. No need to create + // them as stand-alone variables, since we can just + // use the constant representation instead. + break; + + case TYPE_FILE: + case TYPE_OPAQUE: + case TYPE_ANY: + // These we have no way of creating as constants. + reporter->Error("cannot generate an event trace for an event of type %s", + type_name(tag)); + break; + + case TYPE_LIST: + // We shouldn't see these exposed directly, as they're + // not manipulable at script-level. An exception + // might be for "any" types that are then decomposed + // via compound assignment; for now, we don't support + // those. + reporter->InternalError("list type seen in ValTrace::ComputeDelta"); + break; + + case TYPE_RECORD: + if ( prev ) + ComputeRecordDelta(prev, deltas); + else + deltas.emplace_back(std::make_unique(this)); + break; + + case TYPE_TABLE: + if ( prev ) + ComputeTableDelta(prev, deltas); + + else if ( t->Yield() ) + deltas.emplace_back(std::make_unique(this)); + else + deltas.emplace_back(std::make_unique(this)); + break; + + case TYPE_VECTOR: + if ( prev ) + ComputeVectorDelta(prev, deltas); + else + deltas.emplace_back(std::make_unique(this)); + break; + + default: + reporter->InternalError("bad type in ValTrace::ComputeDelta"); + } + } + +void ValTrace::TraceList(const ListValPtr& lv) + { + auto vals = lv->Vals(); + for ( auto& v : vals ) + elems.emplace_back(std::make_shared(v)); + } + +void ValTrace::TraceRecord(const RecordValPtr& rv) + { + auto n = rv->NumFields(); + auto rt = rv->GetType(); + + for ( auto i = 0U; i < n; ++i ) + { + auto f = rv->RawOptField(i); + if ( f ) + { + auto val = f->ToVal(rt->GetFieldType(i)); + elems.emplace_back(std::make_shared(val)); + } + else + elems.emplace_back(nullptr); + } + } + +void ValTrace::TraceTable(const TableValPtr& tv) + { + for ( auto& elem : tv->ToMap() ) + { + auto& key = elem.first; + elems.emplace_back(std::make_shared(key)); + + auto& val = elem.second; + if ( val ) + elems2.emplace_back(std::make_shared(val)); + } + } + +void ValTrace::TraceVector(const VectorValPtr& vv) + { + auto& vec = vv->RawVec(); + auto n = vec->size(); + auto& yt = vv->RawYieldType(); + auto& yts = vv->RawYieldTypes(); + + for ( auto i = 0U; i < n; ++i ) + { + auto& elem = (*vec)[i]; + if ( elem ) + { + auto& t = yts ? (*yts)[i] : yt; + auto val = elem->ToVal(t); + elems.emplace_back(std::make_shared(val)); + } + else + elems.emplace_back(nullptr); + } + } + +bool ValTrace::SameList(const ValTrace& vt) const + { + return SameElems(vt); + } + +bool ValTrace::SameRecord(const ValTrace& vt) const + { + return SameElems(vt); + } + +bool ValTrace::SameTable(const ValTrace& vt) const + { + auto& vt_elems = vt.elems; + auto n = elems.size(); + if ( n != vt_elems.size() ) + return false; + + auto& vt_elems2 = vt.elems2; + auto n2 = elems2.size(); + if ( n2 != vt_elems2.size() ) + return false; + + ASSERT(n2 == 0 || n == n2); + + // We accommodate the possibility that keys are out-of-order + // between the two sets of elements. + + // The following is O(N^2), but presumably if tables are somehow + // involved (in fact we can only get here if they're used as + // indices into other tables), then they'll likely be small. + for ( auto i = 0U; i < n; ++i ) + { + auto& elem_i = elems[i]; + + // See if we can find a match for it. If we do, we don't + // have to worry that another entry matched it too, since + // all table/set indices will be distinct. + auto j = 0U; + for ( ; j < n; ++j ) + { + auto& vt_elem_j = vt_elems[j]; + if ( *elem_i == *vt_elem_j ) + break; + } + + if ( j == n ) + // No match for the index. + return false; + + if ( n2 > 0 ) + { + // Need a match for the corresponding yield values. + if ( *elems2[i] != *vt_elems2[j] ) + return false; + } + } + + return true; + } + +bool ValTrace::SameVector(const ValTrace& vt) const + { + return SameElems(vt); + } + +bool ValTrace::SameElems(const ValTrace& vt) const + { + auto& vt_elems = vt.elems; + auto n = elems.size(); + if ( n != vt_elems.size() ) + return false; + + for ( auto i = 0U; i < n; ++i ) + { + auto& trace_i = elems[i]; + auto& vt_trace_i = vt_elems[i]; + + if ( trace_i && vt_trace_i ) + { + if ( *trace_i != *vt_trace_i ) + return false; + } + + else if ( trace_i || vt_trace_i ) + return false; + } + + return true; + } + +bool ValTrace::SameSingleton(const ValTrace& vt) const + { + return ! IsAggr(t) && *this == vt; + } + +void ValTrace::ComputeRecordDelta(const ValTrace* prev, DeltaVector& deltas) const + { + auto& prev_elems = prev->elems; + auto n = elems.size(); + if ( n != prev_elems.size() ) + reporter->InternalError("size inconsistency in ValTrace::ComputeRecordDelta"); + + for ( auto i = 0U; i < n; ++i ) + { + const auto trace_i = elems[i].get(); + const auto prev_trace_i = prev_elems[i].get(); + + if ( trace_i ) + { + if ( prev_trace_i ) + { + auto& v = trace_i->GetVal(); + auto& prev_v = prev_trace_i->GetVal(); + + if ( v == prev_v ) + { + trace_i->ComputeDelta(prev_trace_i, deltas); + continue; + } + + if ( trace_i->SameSingleton(*prev_trace_i) ) + // No further work needed. + continue; + } + + deltas.emplace_back(std::make_unique(this, i, trace_i->GetVal())); + } + + else if ( prev_trace_i ) + deltas.emplace_back(std::make_unique(this, i)); + } + } + +void ValTrace::ComputeTableDelta(const ValTrace* prev, DeltaVector& deltas) const + { + auto& prev_elems = prev->elems; + auto& prev_elems2 = prev->elems2; + + auto n = elems.size(); + auto is_set = elems2.size() == 0; + auto prev_n = prev_elems.size(); + + // We can't compare pointers for the indices because they're + // new objects generated afresh by TableVal::ToMap. So we do + // explict full comparisons for equality, distinguishing values + // newly added, common to both, or (implicitly) removed. We'll + // then go through the common to check them further. + // + // Our approach is O(N^2), but presumably these tables aren't + // large, and in any case generating event traces is not something + // requiring high performance, so we opt for conceptual simplicity. + + // Track which index values are newly added: + std::set added_indices; + + // Track which entry traces are in common. Indexed by previous + // trace elem index, yielding current trace elem index. + std::map common_entries; + + for ( auto i = 0U; i < n; ++i ) + { + const auto trace_i = elems[i].get(); + + bool common = false; + + for ( auto j = 0U; j < prev_n; ++j ) + { + const auto prev_trace_j = prev_elems[j].get(); + + if ( *trace_i == *prev_trace_j ) + { + common_entries[j] = i; + common = true; + break; + } + } + + if ( ! common ) + { + auto v = trace_i->GetVal(); + + if ( is_set ) + deltas.emplace_back(std::make_unique(this, v)); + else + { + auto yield = elems2[i]->GetVal(); + deltas.emplace_back(std::make_unique(this, v, yield)); + } + + added_indices.insert(v.get()); + } + } + + for ( auto j = 0U; j < prev_n; ++j ) + { + auto common_pair = common_entries.find(j); + if ( common_pair == common_entries.end() ) + { + auto& prev_trace = prev_elems[j]; + auto& v = prev_trace->GetVal(); + deltas.emplace_back(std::make_unique(this, v)); + continue; + } + + if ( is_set ) + continue; + + // If we get here, we're analyzing a table for which there's + // a common index. The remaining question is whether the + // yield has changed. + auto i = common_pair->second; + auto& trace2 = elems2[i]; + const auto prev_trace2 = prev_elems2[j]; + + auto& yield = trace2->GetVal(); + auto& prev_yield = prev_trace2->GetVal(); + + if ( yield == prev_yield ) + // Same yield, look for differences in its sub-elements. + trace2->ComputeDelta(prev_trace2.get(), deltas); + + else if ( ! trace2->SameSingleton(*prev_trace2) ) + deltas.emplace_back( + std::make_unique(this, elems[i]->GetVal(), yield)); + } + } + +void ValTrace::ComputeVectorDelta(const ValTrace* prev, DeltaVector& deltas) const + { + auto& prev_elems = prev->elems; + auto n = elems.size(); + auto prev_n = prev_elems.size(); + + // TODO: The following hasn't been tested for robustness to vector holes. + + if ( n < prev_n ) + { + // The vector shrank in size. Easiest to just build it + // from scratch. + deltas.emplace_back(std::make_unique(this)); + return; + } + + // Look for existing entries that need reassigment. + auto i = 0U; + for ( ; i < prev_n; ++i ) + { + const auto trace_i = elems[i].get(); + const auto prev_trace_i = prev_elems[i].get(); + + auto& elem_i = trace_i->GetVal(); + auto& prev_elem_i = prev_trace_i->GetVal(); + + if ( elem_i == prev_elem_i ) + trace_i->ComputeDelta(prev_trace_i, deltas); + else if ( ! trace_i->SameSingleton(*prev_trace_i) ) + deltas.emplace_back(std::make_unique(this, i, elem_i)); + } + + // Now append any new entries. + for ( ; i < n; ++i ) + { + auto& trace_i = elems[i]; + auto& elem_i = trace_i->GetVal(); + deltas.emplace_back(std::make_unique(this, i, elem_i)); + } + } + +std::string ValDelta::Generate(ValTraceMgr* vtm) const + { + return ""; + } + +std::string DeltaReplaceValue::Generate(ValTraceMgr* vtm) const + { + return std::string(" = ") + vtm->ValName(new_val); + } + +std::string DeltaSetField::Generate(ValTraceMgr* vtm) const + { + auto rt = vt->GetType()->AsRecordType(); + auto f = rt->FieldName(field); + return std::string("$") + f + " = " + vtm->ValName(new_val); + } + +std::string DeltaRemoveField::Generate(ValTraceMgr* vtm) const + { + auto rt = vt->GetType()->AsRecordType(); + auto f = rt->FieldName(field); + return std::string("delete ") + vtm->ValName(vt) + "$" + f; + } + +std::string DeltaRecordCreate::Generate(ValTraceMgr* vtm) const + { + auto rv = cast_intrusive(vt->GetVal()); + auto rt = rv->GetType(); + auto n = rt->NumFields(); + + std::string args; + + for ( auto i = 0; i < n; ++i ) + { + auto v_i = rv->GetField(i); + if ( v_i ) + { + if ( ! args.empty() ) + args += ", "; + + args += std::string("$") + rt->FieldName(i) + "=" + vtm->ValName(v_i); + } + } + + auto name = rt->GetName(); + if ( name.empty() ) + name = "record"; + + return std::string(" = ") + name + "(" + args + ")"; + } + +std::string DeltaSetSetEntry::Generate(ValTraceMgr* vtm) const + { + return std::string("add ") + vtm->ValName(vt) + "[" + vtm->ValName(index) + "]"; + } + +std::string DeltaSetTableEntry::Generate(ValTraceMgr* vtm) const + { + return std::string("[") + vtm->ValName(index) + "] = " + vtm->ValName(new_val); + } + +std::string DeltaRemoveTableEntry::Generate(ValTraceMgr* vtm) const + { + return std::string("delete ") + vtm->ValName(vt) + "[" + vtm->ValName(index) + "]"; + } + +std::string DeltaSetCreate::Generate(ValTraceMgr* vtm) const + { + auto sv = cast_intrusive(vt->GetVal()); + auto members = sv->ToMap(); + + std::string args; + + for ( auto& m : members ) + { + if ( ! args.empty() ) + args += ", "; + + args += vtm->ValName(m.first); + } + + auto name = sv->GetType()->GetName(); + if ( name.empty() ) + name = "set"; + + return std::string(" = ") + name + "(" + args + ")"; + } + +std::string DeltaTableCreate::Generate(ValTraceMgr* vtm) const + { + auto tv = cast_intrusive(vt->GetVal()); + auto members = tv->ToMap(); + + std::string args; + + for ( auto& m : members ) + { + if ( ! args.empty() ) + args += ", "; + + args += std::string("[") + vtm->ValName(m.first) + "] = " + vtm->ValName(m.second); + } + + auto name = tv->GetType()->GetName(); + if ( name.empty() ) + name = "table"; + + return std::string(" = ") + name + "(" + args + ")"; + } + +std::string DeltaVectorSet::Generate(ValTraceMgr* vtm) const + { + return std::string("[") + std::to_string(index) + "] = " + vtm->ValName(elem); + } + +std::string DeltaVectorAppend::Generate(ValTraceMgr* vtm) const + { + return std::string("[") + std::to_string(index) + "] = " + vtm->ValName(elem); + } + +std::string DeltaVectorCreate::Generate(ValTraceMgr* vtm) const + { + auto& elems = vt->GetElems(); + std::string vec; + + for ( auto& e : elems ) + { + if ( vec.size() > 0 ) + vec += ", "; + + vec += vtm->ValName(e->GetVal()); + } + + return std::string(" = vector(") + vec + ")"; + } + +EventTrace::EventTrace(const ScriptFunc* _ev, double _nt, int event_num) : ev(_ev), nt(_nt) + { + auto ev_name = std::regex_replace(ev->Name(), std::regex(":"), "_"); + + name = ev_name + "_" + std::to_string(event_num) + "__et"; + } + +void EventTrace::Generate(FILE* f, ValTraceMgr& vtm, const DeltaGenVec& dvec, std::string successor, + int num_pre) const + { + int offset = 0; + for ( auto& d : dvec ) + { + auto& val = d.GetVal(); + + if ( d.IsFirstDef() && vtm.IsGlobal(val) ) + { + auto& val_name = vtm.ValName(val); + + std::string type_name; + auto& t = val->GetType(); + auto& tn = t->GetName(); + if ( tn.empty() ) + { + ODesc d; + t->Describe(&d); + type_name = d.Description(); + } + else + type_name = tn; + + auto anno = offset < num_pre ? " # from script" : ""; + + fprintf(f, "global %s: %s;%s\n", val_name.c_str(), type_name.c_str(), anno); + } + + ++offset; + } + + fprintf(f, "event %s()\n", name.c_str()); + fprintf(f, "\t{\n"); + + offset = 0; + for ( auto& d : dvec ) + { + fprintf(f, "\t"); + + auto& val = d.GetVal(); + + if ( d.IsFirstDef() && ! vtm.IsGlobal(val) ) + fprintf(f, "local "); + + if ( d.NeedsLHS() ) + fprintf(f, "%s", vtm.ValName(val).c_str()); + + auto anno = offset < num_pre ? " # from script" : ""; + + fprintf(f, "%s;%s\n", d.RHS().c_str(), anno); + + ++offset; + } + + if ( ! dvec.empty() ) + fprintf(f, "\n"); + + fprintf(f, "\tevent %s(%s);\n\n", ev->Name(), args.c_str()); + + if ( successor.empty() ) + { + // The following isn't necessary with our current approach + // to managing chains of events, which avoids having to set + // exit_only_after_terminate=T. + // fprintf(f, "\tterminate();\n"); + } + else + { + fprintf(f, "\tset_network_time(double_to_time(%.06f));\n", nt); + fprintf(f, "\tevent __EventTrace::%s();\n", successor.c_str()); + } + + fprintf(f, "\t}\n"); + } + +void EventTrace::Generate(FILE* f, ValTraceMgr& vtm, const EventTrace* predecessor, + std::string successor) const + { + if ( predecessor ) + { + auto& pre_deltas = predecessor->post_deltas; + int num_pre = pre_deltas.size(); + + if ( num_pre > 0 ) + { + auto total_deltas = pre_deltas; + total_deltas.insert(total_deltas.end(), deltas.begin(), deltas.end()); + Generate(f, vtm, total_deltas, successor, num_pre); + return; + } + } + + Generate(f, vtm, deltas, successor); + } + +void ValTraceMgr::TraceEventValues(std::shared_ptr et, const zeek::Args* args) + { + curr_ev = std::move(et); + + auto num_vals = vals.size(); + + std::string ev_args; + for ( auto& a : *args ) + { + AddVal(a); + + if ( ! ev_args.empty() ) + ev_args += ", "; + + ev_args += ValName(a); + } + + curr_ev->SetArgs(ev_args); + + // Now look for any values newly-processed with this event and + // remember them so we can catch uses of them in future events. + for ( auto i = num_vals; i < vals.size(); ++i ) + { + processed_vals.insert(vals[i].get()); + ASSERT(val_names.count(vals[i].get()) > 0); + } + } + +void ValTraceMgr::FinishCurrentEvent(const zeek::Args* args) + { + auto num_vals = vals.size(); + + curr_ev->SetDoingPost(); + + for ( auto& a : *args ) + AddVal(a); + + for ( auto i = num_vals; i < vals.size(); ++i ) + processed_vals.insert(vals[i].get()); + } + +const std::string& ValTraceMgr::ValName(const ValPtr& v) + { + auto find = val_names.find(v.get()); + if ( find == val_names.end() ) + { + if ( IsAggr(v->GetType()) ) + { // Aggregate shouldn't exist; create it + ASSERT(val_map.count(v.get()) == 0); + NewVal(v); + find = val_names.find(v.get()); + } + + else + { // Non-aggregate can be expressed using a constant + auto tag = v->GetType()->Tag(); + std::string rep; + + if ( tag == TYPE_STRING ) + { + auto s = v->AsStringVal(); + rep = escape_string(s->Bytes(), s->Len()); + } + + else if ( tag == TYPE_LIST ) + { + auto lv = cast_intrusive(v); + for ( auto& v_i : lv->Vals() ) + { + if ( ! rep.empty() ) + rep += ", "; + + rep += ValName(v_i); + } + } + + else if ( tag == TYPE_FUNC ) + rep = v->AsFunc()->Name(); + + else if ( tag == TYPE_TIME ) + rep = std::string("double_to_time(") + std::to_string(v->AsDouble()) + ")"; + + else if ( tag == TYPE_INTERVAL ) + rep = std::string("double_to_interval(") + std::to_string(v->AsDouble()) + ")"; + + else + { + ODesc d; + v->Describe(&d); + rep = d.Description(); + } + + val_names[v.get()] = rep; + vals.push_back(v); + find = val_names.find(v.get()); + } + + ASSERT(find != val_names.end()); + } + + ValUsed(v); + + return find->second; + } + +void ValTraceMgr::AddVal(ValPtr v) + { + auto mapping = val_map.find(v.get()); + + if ( mapping == val_map.end() ) + NewVal(v); + else + { + auto vt = std::make_shared(v); + AssessChange(vt.get(), mapping->second.get()); + val_map[v.get()] = vt; + } + } + +void ValTraceMgr::NewVal(ValPtr v) + { + // Make sure the Val sticks around into the future. + vals.push_back(v); + + auto vt = std::make_shared(v); + AssessChange(vt.get(), nullptr); + val_map[v.get()] = vt; + } + +void ValTraceMgr::ValUsed(const ValPtr& v) + { + ASSERT(val_names.count(v.get()) > 0); + if ( processed_vals.count(v.get()) > 0 ) + // We saw this value when processing a previous event. + globals.insert(v.get()); + } + +void ValTraceMgr::AssessChange(const ValTrace* vt, const ValTrace* prev_vt) + { + DeltaVector deltas; + + vt->ComputeDelta(prev_vt, deltas); + + // Used to track deltas across the batch, to suppress redundant ones + // (which can arise due to two aggregates both including the same + // sub-element). + std::unordered_set previous_deltas; + + for ( auto& d : deltas ) + { + auto vp = d->GetValTrace()->GetVal(); + auto v = vp.get(); + auto rhs = d->Generate(this); + + bool needs_lhs = d->NeedsLHS(); + bool is_first_def = false; + + if ( needs_lhs && val_names.count(v) == 0 ) + { + TrackVar(v); + is_first_def = true; + } + + ASSERT(val_names.count(v) > 0); + + // The "/" in the following is just to have a delimiter + // to make sure the string is unambiguous. + auto full_delta = val_names[v] + "/" + rhs; + if ( previous_deltas.count(full_delta) > 0 ) + continue; + + previous_deltas.insert(full_delta); + + ValUsed(vp); + curr_ev->AddDelta(vp, rhs, needs_lhs, is_first_def); + } + + auto& v = vt->GetVal(); + if ( IsAggr(v->GetType()) ) + ValUsed(vt->GetVal()); + } + +void ValTraceMgr::TrackVar(const Val* v) + { + auto val_name = std::string("__val") + std::to_string(num_vars++); + val_names[v] = val_name; + } + +EventTraceMgr::EventTraceMgr(const std::string& trace_file) + { + f = fopen(trace_file.c_str(), "w"); + if ( ! f ) + reporter->FatalError("can't open event trace file %s", trace_file.c_str()); + } + +EventTraceMgr::~EventTraceMgr() + { + if ( events.empty() ) + return; + + fprintf(f, "module __EventTrace;\n\n"); + + for ( auto& e : events ) + fprintf(f, "global %s: event();\n", e->GetName()); + + fprintf(f, "\nevent zeek_init() &priority=-999999\n"); + fprintf(f, "\t{\n"); + fprintf(f, "\tevent __EventTrace::%s();\n", events.front()->GetName()); + fprintf(f, "\t}\n"); + + for ( auto i = 0U; i < events.size(); ++i ) + { + fprintf(f, "\n"); + + auto predecessor = i > 0 ? events[i - 1] : nullptr; + auto successor = i + 1 < events.size() ? events[i + 1]->GetName() : ""; + events[i]->Generate(f, vtm, predecessor.get(), successor); + } + + fclose(f); + } + +void EventTraceMgr::StartEvent(const ScriptFunc* ev, const zeek::Args* args) + { + if ( script_events.count(ev->Name()) > 0 ) + return; + + auto nt = run_state::network_time; + if ( nt == 0.0 ) + return; + + auto et = std::make_shared(ev, nt, events.size()); + events.emplace_back(et); + + vtm.TraceEventValues(et, args); + } + +void EventTraceMgr::EndEvent(const ScriptFunc* ev, const zeek::Args* args) + { + if ( script_events.count(ev->Name()) > 0 ) + return; + + if ( run_state::network_time > 0.0 ) + vtm.FinishCurrentEvent(args); + } + +void EventTraceMgr::ScriptEventQueued(const EventHandlerPtr& h) + { + script_events.insert(h->Name()); + } + + } // namespace zeek::detail diff --git a/src/EventTrace.h b/src/EventTrace.h new file mode 100644 index 0000000000..ff28496f6a --- /dev/null +++ b/src/EventTrace.h @@ -0,0 +1,464 @@ +// Classes for tracing/dumping Zeek events. + +#pragma once + +#include "zeek/Val.h" + +namespace zeek::detail + { + +class ValTrace; +class ValTraceMgr; + +// Abstract class for capturing a single difference between two script-level +// values. Includes notions of inserting, changing, or deleting a value. +class ValDelta + { +public: + ValDelta(const ValTrace* _vt) : vt(_vt) { } + virtual ~ValDelta() { } + + // Return a string that performs the update operation, expressed + // as Zeek scripting. Does not include a terminating semicolon. + virtual std::string Generate(ValTraceMgr* vtm) const; + + // Whether the generated string needs the affected value to + // explicitly appear on the left-hand-side. Note that this + // might not be as a simple "LHS = RHS" assignment, but instead + // as "LHS$field = RHS" or "LHS[index] = RHS". + // + // Returns false for generated strings like "delete LHS[index]". + virtual bool NeedsLHS() const { return true; } + + const ValTrace* GetValTrace() const { return vt; } + +protected: + const ValTrace* vt; + }; + +using DeltaVector = std::vector>; + +// Tracks the elements of a value as seen at a given point in execution. +// For non-aggregates, this is simply the Val object, but for aggregates +// it is (recursively) each of the sub-elements, in a manner that can then +// be readily compared against future instances. +class ValTrace + { +public: + ValTrace(const ValPtr& v); + ~ValTrace(); + + const ValPtr& GetVal() const { return v; } + const TypePtr& GetType() const { return t; } + const auto& GetElems() const { return elems; } + + // Returns true if this trace and the given one represent the + // same underlying value. Can involve subelement-by-subelement + // (recursive) comparisons. + bool operator==(const ValTrace& vt) const; + bool operator!=(const ValTrace& vt) const { return ! ((*this) == vt); } + + // Computes the deltas between a previous ValTrace and this one. + // If "prev" is nil then we're creating this value from scratch + // (though if it's an aggregate, we may reuse existing values + // for some of its components). + // + // Returns the accumulated differences in "deltas". If on return + // nothing was added to "deltas" then the two ValTrace's are equivalent + // (no changes between them). + void ComputeDelta(const ValTrace* prev, DeltaVector& deltas) const; + +private: + // Methods for tracing different types of aggregate values. + void TraceList(const ListValPtr& lv); + void TraceRecord(const RecordValPtr& rv); + void TraceTable(const TableValPtr& tv); + void TraceVector(const VectorValPtr& vv); + + // Predicates for comparing different types of aggregates for equality. + bool SameList(const ValTrace& vt) const; + bool SameRecord(const ValTrace& vt) const; + bool SameTable(const ValTrace& vt) const; + bool SameVector(const ValTrace& vt) const; + + // Helper function that knows about the internal vector-of-subelements + // we use for aggregates. + bool SameElems(const ValTrace& vt) const; + + // True if this value is a singleton and it's the same value as + // represented in "vt". + bool SameSingleton(const ValTrace& vt) const; + + // Add to "deltas" the differences needed to turn a previous instance + // of the given type of aggregate to the current instance. + void ComputeRecordDelta(const ValTrace* prev, DeltaVector& deltas) const; + void ComputeTableDelta(const ValTrace* prev, DeltaVector& deltas) const; + void ComputeVectorDelta(const ValTrace* prev, DeltaVector& deltas) const; + + // Holds sub-elements for aggregates. + std::vector> elems; + + // A parallel vector used for the yield values of tables. + std::vector> elems2; + + ValPtr v; + TypePtr t; // v's type, for convenience + }; + +// Captures the basic notion of a new, non-equivalent value being assigned. +class DeltaReplaceValue : public ValDelta + { +public: + DeltaReplaceValue(const ValTrace* _vt, ValPtr _new_val) + : ValDelta(_vt), new_val(std::move(_new_val)) + { + } + + std::string Generate(ValTraceMgr* vtm) const override; + +private: + ValPtr new_val; + }; + +// Captures the notion of setting a record field. +class DeltaSetField : public ValDelta + { +public: + DeltaSetField(const ValTrace* _vt, int _field, ValPtr _new_val) + : ValDelta(_vt), field(_field), new_val(std::move(_new_val)) + { + } + + std::string Generate(ValTraceMgr* vtm) const override; + +private: + int field; + ValPtr new_val; + }; + +// Captures the notion of deleting a record field. +class DeltaRemoveField : public ValDelta + { +public: + DeltaRemoveField(const ValTrace* _vt, int _field) : ValDelta(_vt), field(_field) { } + + std::string Generate(ValTraceMgr* vtm) const override; + bool NeedsLHS() const override { return false; } + +private: + int field; + }; + +// Captures the notion of creating a record from scratch. +class DeltaRecordCreate : public ValDelta + { +public: + DeltaRecordCreate(const ValTrace* _vt) : ValDelta(_vt) { } + + std::string Generate(ValTraceMgr* vtm) const override; + }; + +// Captures the notion of adding an element to a set. Use DeltaRemoveTableEntry to +// delete values. +class DeltaSetSetEntry : public ValDelta + { +public: + DeltaSetSetEntry(const ValTrace* _vt, ValPtr _index) : ValDelta(_vt), index(_index) { } + + std::string Generate(ValTraceMgr* vtm) const override; + bool NeedsLHS() const override { return false; } + +private: + ValPtr index; + }; + +// Captures the notion of setting a table entry (which includes both changing +// an existing one and adding a new one). Use DeltaRemoveTableEntry to +// delete values. +class DeltaSetTableEntry : public ValDelta + { +public: + DeltaSetTableEntry(const ValTrace* _vt, ValPtr _index, ValPtr _new_val) + : ValDelta(_vt), index(_index), new_val(std::move(_new_val)) + { + } + + std::string Generate(ValTraceMgr* vtm) const override; + +private: + ValPtr index; + ValPtr new_val; + }; + +// Captures the notion of removing a table/set entry. +class DeltaRemoveTableEntry : public ValDelta + { +public: + DeltaRemoveTableEntry(const ValTrace* _vt, ValPtr _index) + : ValDelta(_vt), index(std::move(_index)) + { + } + + std::string Generate(ValTraceMgr* vtm) const override; + bool NeedsLHS() const override { return false; } + +private: + ValPtr index; + }; + +// Captures the notion of creating a set from scratch. +class DeltaSetCreate : public ValDelta + { +public: + DeltaSetCreate(const ValTrace* _vt) : ValDelta(_vt) { } + + std::string Generate(ValTraceMgr* vtm) const override; + }; + +// Captures the notion of creating a table from scratch. +class DeltaTableCreate : public ValDelta + { +public: + DeltaTableCreate(const ValTrace* _vt) : ValDelta(_vt) { } + + std::string Generate(ValTraceMgr* vtm) const override; + }; + +// Captures the notion of changing an element of a vector. +class DeltaVectorSet : public ValDelta + { +public: + DeltaVectorSet(const ValTrace* _vt, int _index, ValPtr _elem) + : ValDelta(_vt), index(_index), elem(std::move(_elem)) + { + } + + std::string Generate(ValTraceMgr* vtm) const override; + +private: + int index; + ValPtr elem; + }; + +// Captures the notion of adding an entry to the end of a vector. +class DeltaVectorAppend : public ValDelta + { +public: + DeltaVectorAppend(const ValTrace* _vt, int _index, ValPtr _elem) + : ValDelta(_vt), index(_index), elem(std::move(_elem)) + { + } + + std::string Generate(ValTraceMgr* vtm) const override; + +private: + int index; + ValPtr elem; + }; + +// Captures the notion of replacing a vector wholesale. +class DeltaVectorCreate : public ValDelta + { +public: + DeltaVectorCreate(const ValTrace* _vt) : ValDelta(_vt) { } + + std::string Generate(ValTraceMgr* vtm) const override; + +private: + }; + +// Manages the changes to (or creation of) a variable used to represent +// a value. +class DeltaGen + { +public: + DeltaGen(ValPtr _val, std::string _rhs, bool _needs_lhs, bool _is_first_def) + : val(std::move(_val)), rhs(std::move(_rhs)), needs_lhs(_needs_lhs), + is_first_def(_is_first_def) + { + } + + const ValPtr& GetVal() const { return val; } + const std::string& RHS() const { return rhs; } + bool NeedsLHS() const { return needs_lhs; } + bool IsFirstDef() const { return is_first_def; } + +private: + ValPtr val; + + // The expression to set the variable to. + std::string rhs; + + // Whether that expression needs the variable explicitly provides + // on the lefthand side. + bool needs_lhs; + + // Whether this is the first definition of the variable (in which + // case we also need to declare the variable). + bool is_first_def; + }; + +using DeltaGenVec = std::vector; + +// Tracks a single event. +class EventTrace + { +public: + // Constructed in terms of the associated script function, "network + // time" when the event occurred, and the position of this event + // within all of those being traced. + EventTrace(const ScriptFunc* _ev, double _nt, int event_num); + + // Sets a string representation of the arguments (values) being + // passed to the event. + void SetArgs(std::string _args) { args = std::move(_args); } + + // Adds to the trace an update for the given value. + void AddDelta(ValPtr val, std::string rhs, bool needs_lhs, bool is_first_def) + { + auto& d = is_post ? post_deltas : deltas; + d.emplace_back(DeltaGen(val, rhs, needs_lhs, is_first_def)); + } + + // Initially we analyze events pre-execution. When this flag + // is set, we switch to instead analyzing post-execution. The + // difference allows us to annotate the output with "# from script" + // comments that flag changes created by script execution rather + // than event engine activity. + void SetDoingPost() { is_post = true; } + + const char* GetName() const { return name.c_str(); } + + // Generates an internal event handler that sets up the values + // associated with the traced event, followed by queueing the traced + // event, and then queueing the successor internal event handler, + // if any. + // + // "predecessor", if non-nil, gives the event that came just before + // this one (used for "# from script" annotations"). "successor", + // if not empty, gives the name of the successor internal event. + void Generate(FILE* f, ValTraceMgr& vtm, const EventTrace* predecessor, + std::string successor) const; + +private: + // "dvec" is either just our deltas, or the "post_deltas" of our + // predecessor plus our deltas. + void Generate(FILE* f, ValTraceMgr& vtm, const DeltaGenVec& dvec, std::string successor, + int num_pre = 0) const; + + const ScriptFunc* ev; + double nt; + bool is_post = false; + + // The deltas needed to construct the values associated with this + // event prior to its execution. + DeltaGenVec deltas; + + // The deltas capturing any changes to the original values as induced + // by executing its event handlers. + DeltaGenVec post_deltas; + + // The event's name and a string representation of its arguments. + std::string name; + std::string args; + }; + +// Manages all of the events and associated values seen during the execution. +class ValTraceMgr + { +public: + // Invoked to trace a new event with the associated arguments. + void TraceEventValues(std::shared_ptr et, const zeek::Args* args); + + // Invoked when the current event finishes execution. The arguments + // are again provided, for convenience so we don't have to remember + // them from the previous method. + void FinishCurrentEvent(const zeek::Args* args); + + // Returns the name of the script variable associated with the + // given value. + const std::string& ValName(const ValPtr& v); + const std::string& ValName(const ValTrace* vt) { return ValName(vt->GetVal()); } + + // Returns true if the script variable associated with the given value + // needs to be global (because it's used across multiple events). + bool IsGlobal(const ValPtr& v) const { return globals.count(v.get()) > 0; } + +private: + // Traces the given value, which we may-or-may-not have seen before. + void AddVal(ValPtr v); + + // Creates a new value, associating a script variable with it. + void NewVal(ValPtr v); + + // Called when the given value is used in an expression that sets + // or updates another value. This lets us track which values are + // used across multiple events, and thus need to be global. + void ValUsed(const ValPtr& v); + + // Compares the two value traces to build up deltas capturing + // the difference between the previous one and the current one. + void AssessChange(const ValTrace* vt, const ValTrace* prev_vt); + + // Create and track a script variable associated with the given value. + void TrackVar(const Val* vt); + + // Maps values to their associated traces. + std::unordered_map> val_map; + + // Maps values to the "names" we associated with them. For simple + // values, the name is just a Zeek script constant. For aggregates, + // it's a dedicated script variable. + std::unordered_map val_names; + int num_vars = 0; // the number of dedicated script variables + + // Tracks which values we've processed up through the preceding event. + // Any re-use we then see for the current event (via a ValUsed() call) + // then tells us that the value is used across events, and thus its + // associated script variable needs to be global. + std::unordered_set processed_vals; + + // Tracks which values have associated script variables that need + // to be global. + std::unordered_set globals; + + // The event we're currently tracing. + std::shared_ptr curr_ev; + + // Hang on to values we're tracking to make sure the pointers don't + // get reused when the main use of the value ends. + std::vector vals; + }; + +// Manages tracing of all of the events seen during execution, including +// the final generation of the trace script. +class EventTraceMgr + { +public: + EventTraceMgr(const std::string& trace_file); + ~EventTraceMgr(); + + // Called at the beginning of invoking an event's handlers. + void StartEvent(const ScriptFunc* ev, const zeek::Args* args); + + // Called after finishing with invoking an event's handlers. + void EndEvent(const ScriptFunc* ev, const zeek::Args* args); + + // Used to track events generated at script-level. + void ScriptEventQueued(const EventHandlerPtr& h); + +private: + FILE* f = nullptr; + ValTraceMgr vtm; + + // All of the events we've traced so far. + std::vector> events; + + // The names of all of the script events that have been generated. + std::unordered_set script_events; + }; + +// If non-nil then we're doing event tracing. +extern std::unique_ptr etm; + + } // namespace zeek::detail diff --git a/src/Expr.cc b/src/Expr.cc index 396a860cab..997c1509e9 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -8,16 +8,19 @@ #include "zeek/Desc.h" #include "zeek/Event.h" #include "zeek/EventRegistry.h" +#include "zeek/EventTrace.h" #include "zeek/Frame.h" #include "zeek/Func.h" #include "zeek/Hash.h" #include "zeek/IPAddr.h" #include "zeek/RE.h" +#include "zeek/Reporter.h" #include "zeek/RunState.h" #include "zeek/Scope.h" #include "zeek/Stmt.h" #include "zeek/Traverse.h" #include "zeek/Trigger.h" +#include "zeek/Type.h" #include "zeek/broker/Data.h" #include "zeek/digest.h" #include "zeek/module_util.h" @@ -26,16 +29,6 @@ namespace zeek::detail { -static bool init_tag_check(const Expr* expr, const char* name, TypeTag expect_tag, TypeTag init_tag) - { - if ( expect_tag == init_tag ) - return true; - - auto msg = util::fmt("unexpected use of %s in '%s' initialization", name, type_name(init_tag)); - expr->Error(msg); - return false; - } - const char* expr_name(BroExprTag t) { static const char* expr_names[int(NUM_EXPRS)] = { @@ -263,11 +256,6 @@ bool Expr::InvertSense() return false; } -void Expr::EvalIntoAggregate(const TypePtr& /* t */, ValPtr /* aggr */, Frame* /* f */) const - { - Internal("Expr::EvalIntoAggregate called"); - } - void Expr::Assign(Frame* /* f */, ValPtr /* v */) { Internal("Expr::Assign called"); @@ -411,25 +399,6 @@ bool Expr::IsRecordElement(TypeDecl* /* td */) const return false; } -bool Expr::IsPure() const - { - return true; - } - -ValPtr Expr::InitVal(const TypePtr& t, ValPtr aggr) const - { - if ( aggr ) - { - Error("bad initializer"); - return nullptr; - } - - if ( IsError() ) - return nullptr; - - return check_and_promote(Eval(nullptr), t, true); - } - bool Expr::IsError() const { return type && type->Tag() == TYPE_ERROR; @@ -582,11 +551,6 @@ void NameExpr::Assign(Frame* f, ValPtr v) f->SetElement(id, std::move(v)); } -bool NameExpr::IsPure() const - { - return id->IsConst(); - } - TraversalCode NameExpr::Traverse(TraversalCallback* cb) const { TraversalCode tc = cb->PreExpr(this); @@ -838,17 +802,28 @@ void BinaryExpr::ExprDescribe(ODesc* d) const ValPtr BinaryExpr::Fold(Val* v1, Val* v2) const { - InternalTypeTag it = v1->GetType()->InternalType(); + auto& t1 = v1->GetType(); + InternalTypeTag it = t1->InternalType(); if ( it == TYPE_INTERNAL_STRING ) return StringFold(v1, v2); - if ( v1->GetType()->Tag() == TYPE_PATTERN ) + if ( t1->Tag() == TYPE_PATTERN ) return PatternFold(v1, v2); - if ( v1->GetType()->IsSet() ) + if ( t1->IsSet() ) return SetFold(v1, v2); + if ( t1->IsTable() ) + return TableFold(v1, v2); + + if ( t1->Tag() == TYPE_VECTOR ) + { + // We only get here when using a matching vector on the RHS. + v2->AsVectorVal()->AddTo(v1, false); + return {NewRef{}, v1}; + } + if ( it == TYPE_INTERNAL_ADDR ) return AddrFold(v1, v2); @@ -1143,6 +1118,19 @@ ValPtr BinaryExpr::SetFold(Val* v1, Val* v2) const reporter->InternalError("confusion over canonicalization in set comparison"); break; + case EXPR_ADD_TO: + // Avoid doing the AddTo operation if tv2 is empty, + // because then it might not type-check for trivial + // reasons. + if ( tv2->Size() > 0 ) + tv2->AddTo(tv1, false); + return {NewRef{}, tv1}; + + case EXPR_REMOVE_FROM: + if ( tv2->Size() > 0 ) + tv2->RemoveFrom(tv1); + return {NewRef{}, tv1}; + default: BadTag("BinaryExpr::SetFold", expr_name(tag)); return nullptr; @@ -1151,6 +1139,33 @@ ValPtr BinaryExpr::SetFold(Val* v1, Val* v2) const return val_mgr->Bool(res); } +ValPtr BinaryExpr::TableFold(Val* v1, Val* v2) const + { + TableVal* tv1 = v1->AsTableVal(); + TableVal* tv2 = v2->AsTableVal(); + + switch ( tag ) + { + case EXPR_ADD_TO: + // Avoid doing the AddTo operation if tv2 is empty, + // because then it might not type-check for trivial + // reasons. + if ( tv2->Size() > 0 ) + tv2->AddTo(tv1, false); + return {NewRef{}, tv1}; + + case EXPR_REMOVE_FROM: + if ( tv2->Size() > 0 ) + tv2->RemoveFrom(tv1); + return {NewRef{}, tv1}; + + default: + BadTag("BinaryExpr::TableFold", expr_name(tag)); + } + + return nullptr; + } + ValPtr BinaryExpr::AddrFold(Val* v1, Val* v2) const { IPAddr a1 = v1->AsAddr(); @@ -1219,9 +1234,6 @@ void BinaryExpr::PromoteOps(TypeTag t) if ( is_vec2 ) bt2 = op2->GetType()->AsVectorType()->Yield()->Tag(); - if ( (is_vec1 || is_vec2) && ! (is_vec1 && is_vec2) ) - reporter->Warning("mixing vector and scalar operands is deprecated"); - if ( bt1 != t ) op1 = make_intrusive(op1, t); if ( bt2 != t ) @@ -1249,6 +1261,109 @@ void BinaryExpr::PromoteForInterval(ExprPtr& op) op = make_intrusive(op, TYPE_DOUBLE); } +bool BinaryExpr::IsScalarAggregateOp() const + { + const bool is_vec1 = IsAggr(op1->GetType()->Tag()) || is_list(op1); + const bool is_vec2 = IsAggr(op2->GetType()->Tag()) || is_list(op2); + const bool either_vec = is_vec1 || is_vec2; + const bool both_vec = is_vec1 && is_vec2; + + return either_vec && ! both_vec; + } + +void BinaryExpr::CheckScalarAggOp() const + { + if ( ! IsError() && IsScalarAggregateOp() ) + { + reporter->Warning("mixing vector and scalar operands is deprecated (%s) (%s)", + type_name(op1->GetType()->Tag()), type_name(op2->GetType()->Tag())); + } + } + +bool BinaryExpr::CheckForRHSList() + { + if ( op2->Tag() != EXPR_LIST ) + return false; + + auto lhs_t = op1->GetType(); + auto rhs = cast_intrusive(op2); + auto& rhs_exprs = rhs->Exprs(); + + if ( lhs_t->Tag() == TYPE_TABLE ) + { + if ( lhs_t->IsSet() && rhs_exprs.size() >= 1 && same_type(lhs_t, rhs_exprs[0]->GetType()) ) + { + // This is potentially the idiom of "set1 += { set2 }" + // or "set1 += { set2, set3, set4 }". + op2 = {NewRef{}, rhs_exprs[0]}; + + for ( auto i = 1U; i < rhs_exprs.size(); ++i ) + { + ExprPtr re_i = {NewRef{}, rhs_exprs[i]}; + op2 = make_intrusive(EXPR_OR, op2, re_i); + } + + SetType(op1->GetType()); + + return true; + } + + if ( lhs_t->IsTable() && rhs_exprs.size() == 1 && + same_type(lhs_t, rhs_exprs[0]->GetType()) ) + { + // This is the idiom of "table1 += { table2 }" (or -=). + // Unlike for sets we don't allow more than one table + // in the RHS list because table "union" isn't + // well-defined. + op2 = {NewRef{}, rhs_exprs[0]}; + SetType(op1->GetType()); + + return true; + } + + if ( lhs_t->IsTable() ) + op2 = make_intrusive(rhs, nullptr, lhs_t); + else + op2 = make_intrusive(rhs, nullptr, lhs_t); + } + + else if ( lhs_t->Tag() == TYPE_VECTOR ) + { + if ( tag == EXPR_REMOVE_FROM ) + { + ExprError("constructor list not allowed for -= operations on vectors"); + return false; + } + + op2 = make_intrusive(rhs, lhs_t); + } + + else + { + ExprError("invalid constructor list on RHS of assignment"); + return false; + } + + if ( op2->IsError() ) + { + // Message should have already been generated, but propagate. + SetError(); + return false; + } + + // Don't bother type-checking for the degenerate case of the RHS + // being empty, since it won't actually matter. + if ( ! rhs_exprs.empty() && ! same_type(op1->GetType(), op2->GetType()) ) + { + ExprError("type clash for constructor list on RHS of assignment"); + return false; + } + + SetType(op1->GetType()); + + return true; + } + CloneExpr::CloneExpr(ExprPtr arg_op) : UnaryExpr(EXPR_CLONE, std::move(arg_op)) { if ( IsError() ) @@ -1350,11 +1465,6 @@ ValPtr IncrExpr::Eval(Frame* f) const } } -bool IncrExpr::IsPure() const - { - return false; - } - ComplementExpr::ComplementExpr(ExprPtr arg_op) : UnaryExpr(EXPR_COMPLEMENT, std::move(arg_op)) { if ( IsError() ) @@ -1520,6 +1630,8 @@ AddExpr::AddExpr(ExprPtr arg_op1, ExprPtr arg_op2) else ExprError("requires arithmetic operands"); + CheckScalarAggOp(); + if ( base_result_type ) { if ( is_vector(op1) || is_vector(op2) ) @@ -1538,23 +1650,54 @@ void AddExpr::Canonicize() } AddToExpr::AddToExpr(ExprPtr arg_op1, ExprPtr arg_op2) - : BinaryExpr(EXPR_ADD_TO, is_vector(arg_op1) ? std::move(arg_op1) : arg_op1->MakeLvalue(), - std::move(arg_op2)) + : BinaryExpr(EXPR_ADD_TO, std::move(arg_op1), std::move(arg_op2)) { if ( IsError() ) return; - TypeTag bt1 = op1->GetType()->Tag(); - TypeTag bt2 = op2->GetType()->Tag(); + auto& t1 = op1->GetType(); + auto& t2 = op2->GetType(); + TypeTag bt1 = t1->Tag(); + TypeTag bt2 = t2->Tag(); + + if ( bt1 != TYPE_TABLE && bt1 != TYPE_VECTOR && bt1 != TYPE_PATTERN ) + op1 = op1->MakeLvalue(); if ( BothArithmetic(bt1, bt2) ) PromoteType(max_type(bt1, bt2), is_vector(op1) || is_vector(op2)); else if ( BothString(bt1, bt2) || BothInterval(bt1, bt2) ) SetType(base_type(bt1)); + else if ( bt2 == TYPE_LIST ) + (void)CheckForRHSList(); + + else if ( bt1 == TYPE_TABLE ) + { + if ( same_type(t1, t2) ) + SetType(t1); + else + ExprError("RHS type mismatch for table/set +="); + } + + else if ( bt1 == TYPE_PATTERN ) + { + if ( bt2 != TYPE_PATTERN ) + ExprError("pattern += op requires op to be a pattern"); + else + SetType(t1); + } + else if ( IsVector(bt1) ) { - bt1 = op1->GetType()->AsVectorType()->Yield()->Tag(); + if ( same_type(t1, t2) ) + { + SetType(t1); + return; + } + + is_vector_elem_append = true; + + bt1 = t1->AsVectorType()->Yield()->Tag(); if ( IsArithmetic(bt1) ) { @@ -1563,7 +1706,7 @@ AddToExpr::AddToExpr(ExprPtr arg_op1, ExprPtr arg_op2) if ( bt2 != bt1 ) op2 = make_intrusive(std::move(op2), bt1); - SetType(op1->GetType()); + SetType(t1); } else @@ -1575,7 +1718,7 @@ AddToExpr::AddToExpr(ExprPtr arg_op1, ExprPtr arg_op2) util::fmt("incompatible vector append: %s and %s", type_name(bt1), type_name(bt2))); else - SetType(op1->GetType()); + SetType(t1); } else @@ -1594,7 +1737,7 @@ ValPtr AddToExpr::Eval(Frame* f) const if ( ! v2 ) return nullptr; - if ( is_vector(v1) ) + if ( is_vector_elem_append ) { VectorVal* vv = v1->AsVectorVal(); @@ -1604,6 +1747,12 @@ ValPtr AddToExpr::Eval(Frame* f) const return v1; } + if ( type->Tag() == TYPE_PATTERN ) + { + v2->AddTo(v1.get(), false); + return v1; + } + if ( auto result = Fold(v1.get(), v2.get()) ) { op1->Assign(f, result); @@ -1652,6 +1801,8 @@ SubExpr::SubExpr(ExprPtr arg_op1, ExprPtr arg_op2) else ExprError("requires arithmetic operands"); + CheckScalarAggOp(); + if ( base_result_type ) { if ( is_vector(op1) || is_vector(op2) ) @@ -1662,18 +1813,35 @@ SubExpr::SubExpr(ExprPtr arg_op1, ExprPtr arg_op2) } RemoveFromExpr::RemoveFromExpr(ExprPtr arg_op1, ExprPtr arg_op2) - : BinaryExpr(EXPR_REMOVE_FROM, arg_op1->MakeLvalue(), std::move(arg_op2)) + : BinaryExpr(EXPR_REMOVE_FROM, std::move(arg_op1), std::move(arg_op2)) { if ( IsError() ) return; - TypeTag bt1 = op1->GetType()->Tag(); - TypeTag bt2 = op2->GetType()->Tag(); + auto& t1 = op1->GetType(); + auto& t2 = op2->GetType(); + TypeTag bt1 = t1->Tag(); + TypeTag bt2 = t2->Tag(); + + if ( bt1 != TYPE_TABLE ) + op1 = op1->MakeLvalue(); if ( BothArithmetic(bt1, bt2) ) PromoteType(max_type(bt1, bt2), is_vector(op1) || is_vector(op2)); else if ( BothInterval(bt1, bt2) ) SetType(base_type(bt1)); + + else if ( bt2 == TYPE_LIST ) + (void)CheckForRHSList(); + + else if ( bt1 == TYPE_TABLE ) + { + if ( same_type(t1, t2) ) + SetType(t1); + else + ExprError("RHS type mismatch for table/set -="); + } + else ExprError("requires two arithmetic operands"); } @@ -1728,6 +1896,8 @@ TimesExpr::TimesExpr(ExprPtr arg_op1, ExprPtr arg_op2) PromoteType(max_type(bt1, bt2), is_vector(op1) || is_vector(op2)); else ExprError("requires arithmetic operands"); + + CheckScalarAggOp(); } void TimesExpr::Canonicize() @@ -1776,6 +1946,8 @@ DivideExpr::DivideExpr(ExprPtr arg_op1, ExprPtr arg_op2) else ExprError("requires arithmetic operands"); + + CheckScalarAggOp(); } ValPtr DivideExpr::AddrFold(Val* v1, Val* v2) const @@ -1823,6 +1995,8 @@ ModExpr::ModExpr(ExprPtr arg_op1, ExprPtr arg_op2) PromoteType(max_type(bt1, bt2), is_vector(op1) || is_vector(op2)); else ExprError("requires integral operands"); + + CheckScalarAggOp(); } BoolExpr::BoolExpr(BroExprTag arg_tag, ExprPtr arg_op1, ExprPtr arg_op2) @@ -2086,6 +2260,8 @@ EqExpr::EqExpr(BroExprTag arg_tag, ExprPtr arg_op1, ExprPtr arg_op2) else ExprError("type clash in comparison"); + + CheckScalarAggOp(); } void EqExpr::Canonicize() @@ -2166,6 +2342,8 @@ RelExpr::RelExpr(BroExprTag arg_tag, ExprPtr arg_op1, ExprPtr arg_op2) else if ( bt1 != TYPE_TIME && bt1 != TYPE_INTERVAL && bt1 != TYPE_PORT && bt1 != TYPE_ADDR && bt1 != TYPE_STRING ) ExprError("illegal comparison"); + + CheckScalarAggOp(); } void RelExpr::Canonicize() @@ -2419,7 +2597,15 @@ AssignExpr::AssignExpr(ExprPtr arg_op1, ExprPtr arg_op2, bool arg_is_init, ValPt return; } - if ( typecheck ) + if ( op2->Tag() == EXPR_LIST && CheckForRHSList() ) + { + if ( op2->Tag() == EXPR_TABLE_CONSTRUCTOR ) + cast_intrusive(op2)->SetAttrs(attrs); + else if ( op2->Tag() == EXPR_SET_CONSTRUCTOR ) + cast_intrusive(op2)->SetAttrs(attrs); + } + + else if ( typecheck ) // We discard the status from TypeCheck since it has already // generated error messages. (void)TypeCheck(attrs); @@ -2461,25 +2647,6 @@ bool AssignExpr::TypeCheck(const AttributesPtr& attrs) return true; } - if ( bt1 == TYPE_TABLE && op2->Tag() == EXPR_LIST ) - { - std::unique_ptr> attr_copy; - - if ( attrs ) - attr_copy = std::make_unique>(attrs->GetAttrs()); - - if ( op1->GetType()->IsSet() ) - op2 = make_intrusive(cast_intrusive(op2), - std::move(attr_copy), op1->GetType()); - else - op2 = make_intrusive(cast_intrusive(op2), - std::move(attr_copy), op1->GetType()); - - // The constructor expressions are performing the type - // checking and will set op2 to an error state on failure. - return ! op2->IsError(); - } - if ( bt1 == TYPE_VECTOR ) { if ( bt2 == bt1 && op2->GetType()->AsVectorType()->IsUnspecifiedVector() ) @@ -2490,8 +2657,8 @@ bool AssignExpr::TypeCheck(const AttributesPtr& attrs) if ( op2->Tag() == EXPR_LIST ) { - op2 = make_intrusive( - IntrusivePtr{AdoptRef{}, op2.release()->AsListExpr()}, op1->GetType()); + op2 = make_intrusive(cast_intrusive(op2), + op1->GetType()); return true; } } @@ -2515,21 +2682,11 @@ bool AssignExpr::TypeCheck(const AttributesPtr& attrs) // Some elements in constructor list must not match, see if // we can create a new constructor now that the expected type // of LHS is known and let it do coercions where possible. - SetConstructorExpr* sce = dynamic_cast(op2.get()); - - if ( ! sce ) - { - ExprError("Failed typecast to SetConstructorExpr"); - return false; - } - - ListExpr* ctor_list = dynamic_cast(sce->Op()); + auto sce = cast_intrusive(op2); + auto ctor_list = cast_intrusive(sce->GetOp1()); if ( ! ctor_list ) - { - ExprError("Failed typecast to ListExpr"); - return false; - } + Internal("failed typecast to ListExpr"); std::unique_ptr> attr_copy; @@ -2540,8 +2697,8 @@ bool AssignExpr::TypeCheck(const AttributesPtr& attrs) } int errors_before = reporter->Errors(); - op2 = make_intrusive(IntrusivePtr{NewRef{}, ctor_list}, - std::move(attr_copy), op1->GetType()); + op2 = make_intrusive(ctor_list, std::move(attr_copy), + op1->GetType()); int errors_after = reporter->Errors(); if ( errors_after > errors_before ) @@ -2637,163 +2794,6 @@ TypePtr AssignExpr::InitType() const return make_intrusive(IntrusivePtr{NewRef{}, tl->AsTypeList()}, op2->GetType()); } -void AssignExpr::EvalIntoAggregate(const TypePtr& t, ValPtr aggr, Frame* f) const - { - if ( IsError() ) - return; - - TypeDecl td; - - if ( IsRecordElement(&td) ) - { - if ( t->Tag() != TYPE_RECORD ) - { - RuntimeError("not a record initializer"); - return; - } - - const RecordType* rt = t->AsRecordType(); - int field = rt->FieldOffset(td.id); - - if ( field < 0 ) - { - RuntimeError("no such field"); - return; - } - - RecordVal* aggr_r = aggr->AsRecordVal(); - - auto v = op2->Eval(f); - - if ( v ) - aggr_r->Assign(field, std::move(v)); - - return; - } - - if ( op1->Tag() != EXPR_LIST ) - RuntimeError("bad table insertion"); - - TableVal* tv = aggr->AsTableVal(); - - auto index = op1->Eval(f); - auto v = check_and_promote(op2->Eval(f), t->Yield(), true); - - if ( ! index || ! v ) - return; - - if ( ! tv->Assign(std::move(index), std::move(v)) ) - RuntimeError("type clash in table assignment"); - } - -ValPtr AssignExpr::InitVal(const TypePtr& t, ValPtr aggr) const - { - if ( ! aggr ) - { - Error("assignment in initialization"); - return nullptr; - } - - if ( IsError() ) - return nullptr; - - TypeDecl td; - - if ( IsRecordElement(&td) ) - { - if ( t->Tag() != TYPE_RECORD ) - { - Error("not a record initializer", t.get()); - return nullptr; - } - - const RecordType* rt = t->AsRecordType(); - int field = rt->FieldOffset(td.id); - - if ( field < 0 ) - { - Error("no such field"); - return nullptr; - } - - if ( aggr->GetType()->Tag() != TYPE_RECORD ) - Internal("bad aggregate in AssignExpr::InitVal"); - - RecordVal* aggr_r = aggr->AsRecordVal(); - - auto v = op2->InitVal(rt->GetFieldType(td.id), nullptr); - - if ( ! v ) - return nullptr; - - aggr_r->Assign(field, v); - return v; - } - - else if ( op1->Tag() == EXPR_LIST ) - { - if ( t->Tag() != TYPE_TABLE ) - { - Error("not a table initialization", t.get()); - return nullptr; - } - - if ( aggr->GetType()->Tag() != TYPE_TABLE ) - Internal("bad aggregate in AssignExpr::InitVal"); - - auto tv = cast_intrusive(std::move(aggr)); - const TableType* tt = tv->GetType()->AsTableType(); - const auto& yt = tv->GetType()->Yield(); - - auto index = op1->InitVal(tt->GetIndices(), nullptr); - - if ( yt->Tag() == TYPE_RECORD ) - { - if ( op2->GetType()->Tag() != TYPE_RECORD ) - { - Error(util::fmt("type mismatch in table value initialization: " - "assigning '%s' to table with values of type '%s'", - type_name(op2->GetType()->Tag()), type_name(yt->Tag()))); - return nullptr; - } - - if ( ! same_type(*yt, *op2->GetType()) && - ! record_promotion_compatible(yt->AsRecordType(), op2->GetType()->AsRecordType()) ) - { - Error("type mismatch in table value initialization: " - "incompatible record types"); - return nullptr; - } - } - else - { - if ( ! same_type(*yt, *op2->GetType(), true) ) - { - Error(util::fmt("type mismatch in table value initialization: " - "assigning '%s' to table with values of type '%s'", - type_name(op2->GetType()->Tag()), type_name(yt->Tag()))); - return nullptr; - } - } - - auto v = op2->InitVal(yt, nullptr); - - if ( ! index || ! v ) - return nullptr; - - if ( ! tv->ExpandAndInit(std::move(index), std::move(v)) ) - return nullptr; - - return tv; - } - - else - { - Error("illegal initializer"); - return nullptr; - } - } - bool AssignExpr::IsRecordElement(TypeDecl* td) const { if ( op1->Tag() == EXPR_NAME ) @@ -2811,11 +2811,6 @@ bool AssignExpr::IsRecordElement(TypeDecl* td) const return false; } -bool AssignExpr::IsPure() const - { - return false; - } - IndexSliceAssignExpr::IndexSliceAssignExpr(ExprPtr op1, ExprPtr op2, bool is_init) : AssignExpr(std::move(op1), std::move(op2), is_init) { @@ -3343,6 +3338,8 @@ RecordConstructorExpr::RecordConstructorExpr(RecordTypePtr known_rt, ListExprPtr const auto& exprs = op->AsListExpr()->Exprs(); map = std::vector(exprs.length()); + std::set fields_seen; // used to check for missing fields + int i = 0; for ( const auto& e : exprs ) { @@ -3369,35 +3366,28 @@ RecordConstructorExpr::RecordConstructorExpr(RecordTypePtr known_rt, ListExprPtr SetError(); (*map)[i++] = index; + fields_seen.insert(index); } - } -ValPtr RecordConstructorExpr::InitVal(const TypePtr& t, ValPtr aggr) const - { if ( IsError() ) - { - Error("bad record initializer"); - return nullptr; - } + return; - if ( ! init_tag_check(this, "record constructor", TYPE_RECORD, t->Tag()) ) - return nullptr; + auto n = known_rt->NumFields(); + for ( i = 0; i < n; ++i ) + if ( fields_seen.count(i) == 0 ) + { + const auto td_i = known_rt->FieldDecl(i); + if ( IsAggr(td_i->type) ) + // These are always initialized. + continue; - auto v = Eval(nullptr); - - if ( v ) - { - RecordVal* rv = v->AsRecordVal(); - RecordTypePtr rt{NewRef{}, t->AsRecordType()}; - auto aggr_rec = cast_intrusive(std::move(aggr)); - auto ar = rv->CoerceTo(std::move(rt), std::move(aggr_rec)); - - if ( ar ) - return ar; - } - - Error("bad record initializer"); - return nullptr; + if ( ! td_i->GetAttr(ATTR_OPTIONAL) ) + { + auto err = std::string("mandatory field \"") + known_rt->FieldName(i) + + "\" missing"; + ExprError(err.c_str()); + } + } } ValPtr RecordConstructorExpr::Eval(Frame* f) const @@ -3467,10 +3457,161 @@ TraversalCode RecordConstructorExpr::Traverse(TraversalCallback* cb) const HANDLE_TC_EXPR_POST(tc); } +static ExprPtr expand_one_elem(const ExprPList& index_exprs, ExprPtr yield, ExprPtr elem, + int elem_offset) + { + auto expanded_elem = make_intrusive(); + + for ( int i = 0; i < index_exprs.length(); ++i ) + if ( i == elem_offset ) + expanded_elem->Append(elem); + else + expanded_elem->Append({NewRef{}, index_exprs[i]}); + + if ( yield ) + return make_intrusive(expanded_elem, yield, true); + else + return expanded_elem; + } + +static bool expand_op_elem(ListExprPtr elems, ExprPtr elem, TypePtr t) + { + ExprPtr index; + ExprPtr yield; + + if ( elem->Tag() == EXPR_ASSIGN ) + { + if ( t ) + { + if ( ! t->IsTable() ) + { + elem->Error("table constructor used in a non-table context"); + return false; + } + + t = t->AsTableType()->GetIndices(); + } + + index = elem->GetOp1(); + yield = elem->GetOp2(); + } + else + index = elem; // this is a set - no yield + + // If the index isn't a list, then there's nothing to consider + // expanding. + if ( index->Tag() != EXPR_LIST ) + { + elems->Append(elem); + return false; + } + + // Look inside the index for any sub-lists or sets, and expand those. + // There might be more than one, but we'll pick that up recursively + // later. + auto& index_exprs = index->AsListExpr()->Exprs(); + int index_n = index_exprs.length(); + int list_offset = -1; + int set_offset = -1; + for ( int i = 0; i < index_n; ++i ) + { + auto& ie_i = index_exprs[i]; + + if ( ie_i->Tag() == EXPR_LIST ) + { + list_offset = i; + break; + } + + if ( ie_i->GetType()->IsSet() ) + { + // Check for this set corresponding to what's expected + // in this location, in which case it shouldn't be + // expanded. + const TypeList* tl = nullptr; + if ( t && t->Tag() == TYPE_LIST ) + tl = t->AsTypeList(); + + // So we're good-to-go in expanding if either + // (1) we weren't given a type, or it's not a list, + // or (2) it's a list, but doesn't correspond in + // length to the list of expressions, or (3) it does + // but its corresponding element at this position + // doesn't have the same type as this set. + if ( ! tl || static_cast(tl->GetTypes().size()) != index_n || + ! same_type(tl->GetTypes()[i], ie_i->GetType()) ) + { + set_offset = i; + break; + } + } + } + + if ( set_offset >= 0 ) + { // expand the set + auto s_e = index_exprs[set_offset]; + auto v = s_e->Eval(nullptr); + if ( ! v ) + { + s_e->Error( + "cannot expand constructor elements using a value that depends on local variables"); + elems->SetError(); + return false; + } + + for ( auto& s_elem : v->AsTableVal()->ToMap() ) + { + auto c_elem = make_intrusive(s_elem.first); + elems->Append(expand_one_elem(index_exprs, yield, c_elem, set_offset)); + } + + return true; + } + + if ( list_offset < 0 ) + { // No embedded lists. + elems->Append(elem); + return false; + } + + // Expand the identified list. + auto sub_list = index_exprs[list_offset]->AsListExpr(); + for ( auto& sub_list_i : sub_list->Exprs() ) + { + ExprPtr e = {NewRef{}, sub_list_i}; + elems->Append(expand_one_elem(index_exprs, yield, e, list_offset)); + } + + return true; + } + +ListExprPtr expand_op(ListExprPtr op, const TypePtr& t) + { + auto new_list = make_intrusive(); + bool did_expansion = false; + + for ( auto e : op->Exprs() ) + { + if ( expand_op_elem(new_list, {NewRef{}, e}, t) ) + did_expansion = true; + + if ( new_list->IsError() ) + { + op->SetError(); + return op; + } + } + + if ( did_expansion ) + return expand_op(new_list, t); + else + return op; + } + TableConstructorExpr::TableConstructorExpr(ListExprPtr constructor_list, std::unique_ptr> arg_attrs, TypePtr arg_type, AttributesPtr arg_attrs2) - : UnaryExpr(EXPR_TABLE_CONSTRUCTOR, std::move(constructor_list)) + : UnaryExpr(EXPR_TABLE_CONSTRUCTOR, expand_op(constructor_list, arg_type)) { if ( IsError() ) return; @@ -3493,7 +3634,7 @@ TableConstructorExpr::TableConstructorExpr(ListExprPtr constructor_list, make_intrusive(make_intrusive(base_type(TYPE_ANY)), nullptr)); else { - SetType(init_type(op.get())); + SetType(init_type(op)); if ( ! type ) { @@ -3502,14 +3643,17 @@ TableConstructorExpr::TableConstructorExpr(ListExprPtr constructor_list, } else if ( type->Tag() != TYPE_TABLE || type->AsTableType()->IsSet() ) + { SetError("values in table(...) constructor do not specify a table"); + return; + } } } if ( arg_attrs ) - attrs = make_intrusive(std::move(*arg_attrs), type, false, false); + SetAttrs(make_intrusive(std::move(*arg_attrs), type, false, false)); else - attrs = arg_attrs2; + SetAttrs(arg_attrs2); const auto& indices = type->AsTableType()->GetIndices()->GetTypes(); const ExprPList& cle = op->AsListExpr()->Exprs(); @@ -3518,7 +3662,11 @@ TableConstructorExpr::TableConstructorExpr(ListExprPtr constructor_list, for ( const auto& expr : cle ) { if ( expr->Tag() != EXPR_ASSIGN ) - continue; + { + expr->Error("illegal table constructor element"); + SetError(); + return; + } auto idx_expr = expr->AsAssignExpr()->GetOp1(); auto val_expr = expr->AsAssignExpr()->GetOp2(); @@ -3568,35 +3716,30 @@ ValPtr TableConstructorExpr::Eval(Frame* f) const if ( IsError() ) return nullptr; - auto aggr = make_intrusive(GetType(), attrs); + auto tv = make_intrusive(GetType(), attrs); const ExprPList& exprs = op->AsListExpr()->Exprs(); for ( const auto& expr : exprs ) - expr->EvalIntoAggregate(type, aggr, f); + { + auto op1 = expr->GetOp1(); + auto op2 = expr->GetOp2(); - aggr->InitDefaultFunc(f); + if ( ! op1 || ! op2 ) + return nullptr; - return aggr; - } + auto index = op1->Eval(f); + auto v = op2->Eval(f); -ValPtr TableConstructorExpr::InitVal(const TypePtr& t, ValPtr aggr) const - { - if ( IsError() ) - return nullptr; + if ( ! index || ! v ) + return nullptr; - if ( ! init_tag_check(this, "table constructor", TYPE_TABLE, t->Tag()) ) - return nullptr; + if ( ! tv->Assign(std::move(index), std::move(v)) ) + RuntimeError("type clash in table assignment"); + } - auto tt = GetType(); + tv->InitDefaultFunc(f); - auto tval = aggr ? TableValPtr{AdoptRef{}, aggr.release()->AsTableVal()} - : make_intrusive(std::move(tt), attrs); - const ExprPList& exprs = op->AsListExpr()->Exprs(); - - for ( const auto& expr : exprs ) - expr->EvalIntoAggregate(t, tval, nullptr); - - return tval; + return tv; } void TableConstructorExpr::ExprDescribe(ODesc* d) const @@ -3609,7 +3752,7 @@ void TableConstructorExpr::ExprDescribe(ODesc* d) const SetConstructorExpr::SetConstructorExpr(ListExprPtr constructor_list, std::unique_ptr> arg_attrs, TypePtr arg_type, AttributesPtr arg_attrs2) - : UnaryExpr(EXPR_SET_CONSTRUCTOR, std::move(constructor_list)) + : UnaryExpr(EXPR_SET_CONSTRUCTOR, expand_op(constructor_list, arg_type)) { if ( IsError() ) return; @@ -3631,7 +3774,7 @@ SetConstructorExpr::SetConstructorExpr(ListExprPtr constructor_list, SetType(make_intrusive(make_intrusive(base_type(TYPE_ANY)), nullptr)); else - SetType(init_type(op.get())); + SetType(init_type(op)); } if ( ! type ) @@ -3641,9 +3784,9 @@ SetConstructorExpr::SetConstructorExpr(ListExprPtr constructor_list, SetError("values in set(...) constructor do not specify a set"); if ( arg_attrs ) - attrs = make_intrusive(std::move(*arg_attrs), type, false, false); + SetAttrs(make_intrusive(std::move(*arg_attrs), type, false, false)); else - attrs = arg_attrs2; + SetAttrs(arg_attrs2); const auto& indices = type->AsTableType()->GetIndices()->GetTypes(); ExprPList& cle = op->AsListExpr()->Exprs(); @@ -3660,9 +3803,16 @@ SetConstructorExpr::SetConstructorExpr(ListExprPtr constructor_list, loop_over_list(cle, i) { Expr* ce = cle[i]; + + if ( ce->Tag() != EXPR_LIST ) + { + ce->Error("not a list of indices"); + SetError(); + return; + } + ListExpr* le = ce->AsListExpr(); - assert(ce->Tag() == EXPR_LIST); if ( check_and_promote_exprs(le, type->AsTableType()->GetIndices()) ) { if ( le != cle[i] ) @@ -3693,34 +3843,6 @@ ValPtr SetConstructorExpr::Eval(Frame* f) const return aggr; } -ValPtr SetConstructorExpr::InitVal(const TypePtr& t, ValPtr aggr) const - { - if ( IsError() ) - return nullptr; - - if ( ! init_tag_check(this, "set constructor", TYPE_TABLE, t->Tag()) ) - return nullptr; - - const auto& index_type = t->AsTableType()->GetIndices(); - auto tt = GetType(); - auto tval = aggr ? TableValPtr{AdoptRef{}, aggr.release()->AsTableVal()} - : make_intrusive(std::move(tt), attrs); - const ExprPList& exprs = op->AsListExpr()->Exprs(); - - for ( const auto& e : exprs ) - { - auto element = check_and_promote(e->Eval(nullptr), index_type, true); - - if ( ! element || ! tval->Assign(std::move(element), nullptr) ) - { - Error(util::fmt("initialization type mismatch in set"), e); - return nullptr; - } - } - - return tval; - } - void SetConstructorExpr::ExprDescribe(ODesc* d) const { d->Add("set("); @@ -3791,34 +3913,6 @@ ValPtr VectorConstructorExpr::Eval(Frame* f) const return vec; } -ValPtr VectorConstructorExpr::InitVal(const TypePtr& t, ValPtr aggr) const - { - if ( IsError() ) - return nullptr; - - if ( ! init_tag_check(this, "vector constructor", TYPE_VECTOR, t->Tag()) ) - return nullptr; - - auto vt = GetType(); - auto vec = aggr ? VectorValPtr{AdoptRef{}, aggr.release()->AsVectorVal()} - : make_intrusive(std::move(vt)); - const ExprPList& exprs = op->AsListExpr()->Exprs(); - - loop_over_list(exprs, i) - { - Expr* e = exprs[i]; - auto v = check_and_promote(e->Eval(nullptr), t->Yield(), true); - - if ( ! v || ! vec->Assign(i, std::move(v)) ) - { - Error(util::fmt("initialization type mismatch at index %d", i), e); - return nullptr; - } - } - - return vec; - } - void VectorConstructorExpr::ExprDescribe(ODesc* d) const { d->Add("vector("); @@ -3838,25 +3932,6 @@ bool FieldAssignExpr::PromoteTo(TypePtr t) return op != nullptr; } -void FieldAssignExpr::EvalIntoAggregate(const TypePtr& t, ValPtr aggr, Frame* f) const - { - if ( IsError() ) - return; - - if ( auto v = op->Eval(f) ) - { - RecordVal* rec = aggr->AsRecordVal(); - const RecordType* rt = t->AsRecordType(); - - int idx = rt->FieldOffset(field_name.c_str()); - - if ( idx < 0 ) - reporter->InternalError("Missing record field: %s", field_name.c_str()); - - rec->Assign(idx, std::move(v)); - } - } - bool FieldAssignExpr::IsRecordElement(TypeDecl* td) const { if ( td ) @@ -4046,31 +4121,6 @@ RecordCoerceExpr::RecordCoerceExpr(ExprPtr arg_op, RecordTypePtr r) } } -ValPtr RecordCoerceExpr::InitVal(const TypePtr& t, ValPtr aggr) const - { - if ( IsError() ) - { - Error("bad record initializer"); - return nullptr; - } - - if ( ! init_tag_check(this, "record", TYPE_RECORD, t->Tag()) ) - return nullptr; - - if ( auto v = Eval(nullptr) ) - { - RecordVal* rv = v->AsRecordVal(); - RecordTypePtr rt{NewRef{}, t->AsRecordType()}; - auto aggr_rec = cast_intrusive(std::move(aggr)); - - if ( auto ar = rv->CoerceTo(std::move(rt), std::move(aggr_rec)) ) - return ar; - } - - Error("bad record initializer"); - return nullptr; - } - ValPtr RecordCoerceExpr::Fold(Val* v) const { if ( same_type(GetType(), Op()->GetType()) ) @@ -4166,13 +4216,23 @@ RecordValPtr coerce_to_record(RecordTypePtr rt, Val* v, const std::vector& return val; } -TableCoerceExpr::TableCoerceExpr(ExprPtr arg_op, TableTypePtr r) +TableCoerceExpr::TableCoerceExpr(ExprPtr arg_op, TableTypePtr tt, bool type_check) : UnaryExpr(EXPR_TABLE_COERCE, std::move(arg_op)) { if ( IsError() ) return; - SetType(std::move(r)); + if ( type_check ) + { + op = check_and_promote_expr(op, tt); + if ( ! op ) + { + SetError(); + return; + } + } + + SetType(std::move(tt)); if ( GetType()->Tag() != TYPE_TABLE ) ExprError("coercion to non-table"); @@ -4243,13 +4303,6 @@ ScheduleExpr::ScheduleExpr(ExprPtr arg_when, EventExprPtr arg_event) if ( bt != TYPE_TIME && bt != TYPE_INTERVAL ) ExprError("schedule expression requires a time or time interval"); - else - SetType(base_type(TYPE_TIMER)); - } - -bool ScheduleExpr::IsPure() const - { - return false; } ValPtr ScheduleExpr::Eval(Frame* f) const @@ -4270,7 +4323,14 @@ ValPtr ScheduleExpr::Eval(Frame* f) const auto args = eval_list(f, event->Args()); if ( args ) - timer_mgr->Add(new ScheduleTimer(event->Handler(), std::move(*args), dt)); + { + auto handler = event->Handler(); + + if ( etm ) + etm->ScriptEventQueued(handler); + + timer_mgr->Add(new ScheduleTimer(handler, std::move(*args), dt)); + } return nullptr; } @@ -4809,7 +4869,12 @@ ValPtr EventExpr::Eval(Frame* f) const auto v = eval_list(f, args.get()); if ( handler ) + { + if ( etm ) + etm->ScriptEventQueued(handler); + event_mgr.Enqueue(handler, std::move(*v)); + } return nullptr; } @@ -4946,202 +5011,6 @@ TypePtr ListExpr::InitType() const } } -ValPtr ListExpr::InitVal(const TypePtr& t, ValPtr aggr) const - { - // While fairly similar to the EvalIntoAggregate() code, - // we keep this separate since it also deals with initialization - // idioms such as embedded aggregates and cross-product - // expansion. - if ( IsError() ) - return nullptr; - - // Check whether each element of this list itself matches t, - // in which case we should expand as a ListVal. - if ( ! aggr && type->AsTypeList()->AllMatch(t, true) ) - { - auto v = make_intrusive(TYPE_ANY); - const auto& tl = type->AsTypeList()->GetTypes(); - - if ( exprs.length() != static_cast(tl.size()) ) - { - Error("index mismatch", t.get()); - return nullptr; - } - - loop_over_list(exprs, i) - { - auto vi = exprs[i]->InitVal(tl[i], nullptr); - if ( ! vi ) - return nullptr; - - v->Append(std::move(vi)); - } - - return v; - } - - if ( t->Tag() == TYPE_LIST ) - { - if ( aggr ) - { - Error("bad use of list in initialization", t.get()); - return nullptr; - } - - const auto& tl = t->AsTypeList()->GetTypes(); - - if ( exprs.length() != static_cast(tl.size()) ) - { - Error("index mismatch", t.get()); - return nullptr; - } - - auto v = make_intrusive(TYPE_ANY); - - loop_over_list(exprs, i) - { - auto vi = exprs[i]->InitVal(tl[i], nullptr); - - if ( ! vi ) - return nullptr; - - v->Append(std::move(vi)); - } - - return v; - } - - if ( t->Tag() != TYPE_RECORD && t->Tag() != TYPE_TABLE && t->Tag() != TYPE_VECTOR ) - { - if ( exprs.length() == 1 ) - // Allow "global x:int = { 5 }" - return exprs[0]->InitVal(t, aggr); - else - { - Error("aggregate initializer for scalar type", t.get()); - return nullptr; - } - } - - if ( ! aggr ) - Internal("missing aggregate in ListExpr::InitVal"); - - if ( t->IsSet() ) - return AddSetInit(t, std::move(aggr)); - - if ( t->Tag() == TYPE_VECTOR ) - { - // v: vector = [10, 20, 30]; - VectorVal* vec = aggr->AsVectorVal(); - - loop_over_list(exprs, i) - { - ExprPtr e = {NewRef{}, exprs[i]}; - const auto& vyt = vec->GetType()->AsVectorType()->Yield(); - auto promoted_e = check_and_promote_expr(e, vyt); - - if ( promoted_e ) - e = promoted_e; - - if ( ! vec->Assign(i, e->Eval(nullptr)) ) - { - e->Error(util::fmt("type mismatch at index %d", i)); - return nullptr; - } - } - - return aggr; - } - - // If we got this far, then it's either a table or record - // initialization. Both of those involve AssignExpr's, which - // know how to add themselves to a table or record. Another - // possibility is an expression that evaluates itself to a - // table, which we can then add to the aggregate. - for ( const auto& e : exprs ) - { - if ( e->Tag() == EXPR_ASSIGN || e->Tag() == EXPR_FIELD_ASSIGN ) - { - if ( ! e->InitVal(t, aggr) ) - return nullptr; - } - else - { - if ( t->Tag() == TYPE_RECORD ) - { - e->Error("bad record initializer", t.get()); - return nullptr; - } - - auto v = e->Eval(nullptr); - - if ( ! same_type(v->GetType(), t) ) - { - v->GetType()->Error("type clash in table initializer", t.get()); - return nullptr; - } - - if ( ! v->AsTableVal()->AddTo(aggr->AsTableVal(), true) ) - return nullptr; - } - } - - return aggr; - } - -ValPtr ListExpr::AddSetInit(TypePtr t, ValPtr aggr) const - { - if ( aggr->GetType()->Tag() != TYPE_TABLE ) - Internal("bad aggregate in ListExpr::AddSetInit"); - - TableVal* tv = aggr->AsTableVal(); - const TableType* tt = tv->GetType()->AsTableType(); - TypeListPtr it = tt->GetIndices(); - - for ( const auto& expr : exprs ) - { - ValPtr element; - - if ( expr->GetType()->IsSet() ) - // A set to flatten. - element = expr->Eval(nullptr); - else if ( expr->GetType()->Tag() == TYPE_LIST ) - element = expr->InitVal(it, nullptr); - else - element = expr->InitVal(it->GetTypes()[0], nullptr); - - if ( ! element ) - return nullptr; - - if ( element->GetType()->IsSet() ) - { - if ( ! same_type(element->GetType(), t) ) - { - element->Error("type clash in set initializer", t.get()); - return nullptr; - } - - if ( ! element->AsTableVal()->AddTo(tv, true) ) - return nullptr; - - continue; - } - - if ( expr->GetType()->Tag() == TYPE_LIST ) - element = check_and_promote(std::move(element), it, true); - else - element = check_and_promote(std::move(element), it->GetTypes()[0], true); - - if ( ! element ) - return nullptr; - - if ( ! tv->ExpandAndInit(std::move(element), nullptr) ) - return nullptr; - } - - return aggr; - } - void ListExpr::ExprDescribe(ODesc* d) const { d->AddCount(exprs.length()); @@ -5391,7 +5260,26 @@ ExprPtr check_and_promote_expr(ExprPtr e, TypePtr t) { if ( t->Tag() == TYPE_TABLE && et->Tag() == TYPE_TABLE && et->AsTableType()->IsUnspecifiedTable() ) - return make_intrusive(e, IntrusivePtr{NewRef{}, t->AsTableType()}); + { + if ( e->Tag() == EXPR_TABLE_CONSTRUCTOR ) + { + auto& attrs = cast_intrusive(e)->GetAttrs(); + auto& def = attrs ? attrs->Find(ATTR_DEFAULT) : nullptr; + if ( def ) + { + std::string err_msg; + if ( ! check_default_attr(def.get(), t, false, false, err_msg) ) + { + if ( ! err_msg.empty() ) + t->Error(err_msg.c_str(), e.get()); + return nullptr; + } + } + } + + return make_intrusive(e, IntrusivePtr{NewRef{}, t->AsTableType()}, + false); + } if ( t->Tag() == TYPE_VECTOR && et->Tag() == TYPE_VECTOR && et->AsVectorType()->IsUnspecifiedVector() ) diff --git a/src/Expr.h b/src/Expr.h index fabf0915b3..a8b90d42dc 100644 --- a/src/Expr.h +++ b/src/Expr.h @@ -164,12 +164,6 @@ public: // or nil if the expression's value isn't fixed. virtual ValPtr Eval(Frame* f) const = 0; - // Same, but the context is that we are adding an element - // into the given aggregate of the given type. Note that - // return type is void since it's updating an existing - // value, rather than creating a new one. - virtual void EvalIntoAggregate(const TypePtr& t, ValPtr aggr, Frame* f) const; - // Assign to the given value, if appropriate. virtual void Assign(Frame* f, ValPtr v); @@ -183,15 +177,8 @@ public: // TypeDecl with a description of the element. virtual bool IsRecordElement(TypeDecl* td) const; - // Returns a value corresponding to this expression interpreted - // as an initialization, or nil if the expression is inconsistent - // with the given type. If "aggr" is non-nil, then this expression - // is an element of the given aggregate, and it is added to it - // accordingly. - virtual ValPtr InitVal(const TypePtr& t, ValPtr aggr) const; - // True if the expression has no side effects, false otherwise. - virtual bool IsPure() const; + virtual bool IsPure() const { return true; } // True if the expression is a constant, false otherwise. bool IsConst() const { return tag == EXPR_CONST; } @@ -467,7 +454,6 @@ public: ValPtr Eval(Frame* f) const override; void Assign(Frame* f, ValPtr v) override; ExprPtr MakeLvalue() override; - bool IsPure() const override; TraversalCode Traverse(TraversalCallback* cb) const override; @@ -599,6 +585,9 @@ protected: // Same for when the constants are sets. virtual ValPtr SetFold(Val* v1, Val* v2) const; + // Same for when the constants are tables. + virtual ValPtr TableFold(Val* v1, Val* v2) const; + // Same for when the constants are addresses or subnets. virtual ValPtr AddrFold(Val* v1, Val* v2) const; virtual ValPtr SubNetFold(Val* v1, Val* v2) const; @@ -622,6 +611,20 @@ protected: void ExprDescribe(ODesc* d) const override; + // Reports on if this BinaryExpr involves a scalar and aggregate + // type (vec, list, table, record). + bool IsScalarAggregateOp() const; + + // Warns about deprecated scalar vector operations like + // `[1, 2, 3] == 1` or `["a", "b", "c"] + "a"`. + void CheckScalarAggOp() const; + + // For assignment operations (=, +=, -=) checks for a valid + // expression-list on the RHS (op2), potentially transforming + // op2 in the process. Returns true if the list is present + // and type-checks correctly, false otherwise. + bool CheckForRHSList(); + ExprPtr op1; ExprPtr op2; }; @@ -646,7 +649,7 @@ public: ValPtr Eval(Frame* f) const override; ValPtr DoSingleEval(Frame* f, Val* v) const; - bool IsPure() const override; + bool IsPure() const override { return false; } // Optimization-related: ExprPtr Duplicate() override; @@ -749,21 +752,33 @@ public: ValPtr Eval(Frame* f) const override; // Optimization-related: + bool IsPure() const override { return false; } ExprPtr Duplicate() override; + bool HasReducedOps(Reducer* c) const override { return false; } bool WillTransform(Reducer* c) const override { return true; } + bool IsReduced(Reducer* c) const override; ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; + ExprPtr ReduceToSingleton(Reducer* c, StmtPtr& red_stmt) override; + +private: + // Whether this operation is appending a single element to a vector. + bool is_vector_elem_append = false; }; class RemoveFromExpr final : public BinaryExpr { public: + bool IsPure() const override { return false; } RemoveFromExpr(ExprPtr op1, ExprPtr op2); ValPtr Eval(Frame* f) const override; // Optimization-related: ExprPtr Duplicate() override; + bool HasReducedOps(Reducer* c) const override { return false; } bool WillTransform(Reducer* c) const override { return true; } + bool IsReduced(Reducer* c) const override; ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; + ExprPtr ReduceToSingleton(Reducer* c, StmtPtr& red_stmt) override; }; class SubExpr final : public BinaryExpr @@ -940,11 +955,9 @@ public: const AttributesPtr& attrs = nullptr, bool type_check = true); ValPtr Eval(Frame* f) const override; - void EvalIntoAggregate(const TypePtr& t, ValPtr aggr, Frame* f) const override; TypePtr InitType() const override; bool IsRecordElement(TypeDecl* td) const override; - ValPtr InitVal(const TypePtr& t, ValPtr aggr) const override; - bool IsPure() const override; + bool IsPure() const override { return false; } // Optimization-related: ExprPtr Duplicate() override; @@ -1153,14 +1166,13 @@ public: // Optimization-related: ExprPtr Duplicate() override; + ExprPtr Inline(Inliner* inl) override; bool HasReducedOps(Reducer* c) const override; ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; StmtPtr ReduceToSingletons(Reducer* c) override; protected: - ValPtr InitVal(const TypePtr& t, ValPtr aggr) const override; - void ExprDescribe(ODesc* d) const override; ListExprPtr op; @@ -1173,6 +1185,7 @@ public: TableConstructorExpr(ListExprPtr constructor_list, std::unique_ptr> attrs, TypePtr arg_type = nullptr, AttributesPtr arg_attrs = nullptr); + void SetAttrs(AttributesPtr _attrs) { attrs = std::move(_attrs); } const AttributesPtr& GetAttrs() const { return attrs; } ValPtr Eval(Frame* f) const override; @@ -1185,8 +1198,6 @@ public: StmtPtr ReduceToSingletons(Reducer* c) override; protected: - ValPtr InitVal(const TypePtr& t, ValPtr aggr) const override; - void ExprDescribe(ODesc* d) const override; AttributesPtr attrs; @@ -1198,6 +1209,7 @@ public: SetConstructorExpr(ListExprPtr constructor_list, std::unique_ptr> attrs, TypePtr arg_type = nullptr, AttributesPtr arg_attrs = nullptr); + void SetAttrs(AttributesPtr _attrs) { attrs = std::move(_attrs); } const AttributesPtr& GetAttrs() const { return attrs; } ValPtr Eval(Frame* f) const override; @@ -1210,8 +1222,6 @@ public: StmtPtr ReduceToSingletons(Reducer* c) override; protected: - ValPtr InitVal(const TypePtr& t, ValPtr aggr) const override; - void ExprDescribe(ODesc* d) const override; AttributesPtr attrs; @@ -1230,8 +1240,6 @@ public: bool HasReducedOps(Reducer* c) const override; protected: - ValPtr InitVal(const TypePtr& t, ValPtr aggr) const override; - void ExprDescribe(ODesc* d) const override; }; @@ -1250,12 +1258,10 @@ public: // (in which case an error is reported). bool PromoteTo(TypePtr t); - void EvalIntoAggregate(const TypePtr& t, ValPtr aggr, Frame* f) const override; bool IsRecordElement(TypeDecl* td) const override; // Optimization-related: ExprPtr Duplicate() override; - bool WillTransform(Reducer* c) const override { return true; } ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; @@ -1292,7 +1298,6 @@ public: const std::vector& Map() const { return map; } protected: - ValPtr InitVal(const TypePtr& t, ValPtr aggr) const override; ValPtr Fold(Val* v) const override; // For each super-record slot, gives subrecord slot with which to @@ -1305,7 +1310,7 @@ extern RecordValPtr coerce_to_record(RecordTypePtr rt, Val* v, const std::vector class TableCoerceExpr final : public UnaryExpr { public: - TableCoerceExpr(ExprPtr op, TableTypePtr r); + TableCoerceExpr(ExprPtr op, TableTypePtr r, bool type_check = true); ~TableCoerceExpr() override; // Optimization-related: @@ -1346,7 +1351,7 @@ class ScheduleExpr final : public Expr public: ScheduleExpr(ExprPtr when, EventExprPtr event); - bool IsPure() const override; + bool IsPure() const override { return false; } ValPtr Eval(Frame* f) const override; @@ -1481,7 +1486,6 @@ public: ValPtr Eval(Frame* f) const override; TypePtr InitType() const override; - ValPtr InitVal(const TypePtr& t, ValPtr aggr) const override; ExprPtr MakeLvalue() override; void Assign(Frame* f, ValPtr v) override; @@ -1497,8 +1501,6 @@ public: StmtPtr ReduceToSingletons(Reducer* c) override; protected: - ValPtr AddSetInit(TypePtr t, ValPtr aggr) const; - void ExprDescribe(ODesc* d) const override; ExprPList exprs; @@ -1616,10 +1618,12 @@ public: AppendToExpr(ExprPtr op1, ExprPtr op2); ValPtr Eval(Frame* f) const override; + ExprPtr Duplicate() override; + + bool IsPure() const override { return false; } bool IsReduced(Reducer* c) const override; ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; - - ExprPtr Duplicate() override; + ExprPtr ReduceToSingleton(Reducer* c, StmtPtr& red_stmt) override; }; // An internal class for reduced form. @@ -1633,6 +1637,7 @@ public: ExprPtr Duplicate() override; + bool IsPure() const override { return false; } bool IsReduced(Reducer* c) const override; bool HasReducedOps(Reducer* c) const override; ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; @@ -1664,6 +1669,7 @@ public: ExprPtr Duplicate() override; + bool IsPure() const override { return false; } bool IsReduced(Reducer* c) const override; bool HasReducedOps(Reducer* c) const override; ExprPtr Reduce(Reducer* c, StmtPtr& red_stmt) override; @@ -1763,7 +1769,14 @@ inline Val* Expr::ExprVal() const } // Decides whether to return an AssignExpr or a RecordAssignExpr. -ExprPtr get_assign_expr(ExprPtr op1, ExprPtr op2, bool is_init); +extern ExprPtr get_assign_expr(ExprPtr op1, ExprPtr op2, bool is_init); + +// Takes a RHS constructor list and returns a version with any embedded +// indices within it (used to concisely represent multiple set/table entries) +// expanded. +// +// Second argument gives the type that the list will expand to, if known. +extern ListExprPtr expand_op(ListExprPtr op, const TypePtr& t); /** * Type-check the given expression(s) against the given type(s). Complain @@ -1784,7 +1797,7 @@ extern bool check_and_promote_exprs_to_type(ListExpr* elements, TypePtr type); // Returns a ListExpr simplified down to a list a values, or nil // if they couldn't all be reduced. -std::optional> eval_list(Frame* f, const ListExpr* l); +extern std::optional> eval_list(Frame* f, const ListExpr* l); // Returns true if e1 is "greater" than e2 - here "greater" is just // a heuristic, used with commutative operators to put them into @@ -1801,5 +1814,16 @@ inline bool is_vector(const ExprPtr& e) return is_vector(e.get()); } +// True if the given Expr* has a list type +inline bool is_list(Expr* e) + { + return e->GetType()->Tag() == TYPE_LIST; + } + +inline bool is_list(const ExprPtr& e) + { + return is_list(e.get()); + } + } // namespace detail } // namespace zeek diff --git a/src/Func.cc b/src/Func.cc index 282c65dde8..600c9aaabd 100644 --- a/src/Func.cc +++ b/src/Func.cc @@ -33,6 +33,7 @@ #include "zeek/Debug.h" #include "zeek/Desc.h" #include "zeek/Event.h" +#include "zeek/EventTrace.h" #include "zeek/Expr.h" #include "zeek/File.h" #include "zeek/Frame.h" @@ -132,20 +133,6 @@ std::string render_call_stack() return rval; } -Func::Func() - { - unique_id = unique_ids.size(); - unique_ids.push_back({NewRef{}, this}); - } - -Func::Func(Kind arg_kind) : kind(arg_kind) - { - unique_id = unique_ids.size(); - unique_ids.push_back({NewRef{}, this}); - } - -Func::~Func() = default; - void Func::AddBody(detail::StmtPtr /* new_body */, const std::vector& /* new_inits */, size_t /* new_frame_size */, int /* priority */) @@ -237,7 +224,6 @@ void Func::CopyStateInto(Func* other) const other->type = type; other->name = name; - other->unique_id = unique_id; } void Func::CheckPluginResult(bool handled, const ValPtr& hook_result, FunctionFlavor flavor) const @@ -401,6 +387,9 @@ ValPtr ScriptFunc::Invoke(zeek::Args* args, Frame* parent) const const CallExpr* call_expr = parent ? parent->GetCall() : nullptr; call_stack.emplace_back(CallInfo{call_expr, this, *args}); + if ( etm && Flavor() == FUNC_FLAVOR_EVENT ) + etm->StartEvent(this, args); + if ( g_trace_state.DoTrace() ) { ODesc d; @@ -481,6 +470,9 @@ ValPtr ScriptFunc::Invoke(zeek::Args* args, Frame* parent) const result = val_mgr->True(); } + else if ( etm && Flavor() == FUNC_FLAVOR_EVENT ) + etm->EndEvent(this, args); + // Warn if the function returns something, but we returned from // the function without an explicit return, or without a value. else if ( GetType()->Yield() && GetType()->Yield()->Tag() != TYPE_VOID && diff --git a/src/Func.h b/src/Func.h index 713a34fb67..7d89f88609 100644 --- a/src/Func.h +++ b/src/Func.h @@ -18,16 +18,11 @@ #include "zeek/ZeekArgs.h" #include "zeek/ZeekList.h" -namespace caf - { -template class expected; - } - namespace broker { class data; using vector = std::vector; -using caf::expected; +template class expected; } namespace zeek @@ -66,9 +61,7 @@ public: BUILTIN_FUNC }; - explicit Func(Kind arg_kind); - - ~Func() override; + explicit Func(Kind arg_kind) : kind(arg_kind) { } virtual bool IsPure() const = 0; FunctionFlavor Flavor() const { return GetType()->Flavor(); } @@ -127,14 +120,8 @@ public: virtual detail::TraversalCode Traverse(detail::TraversalCallback* cb) const; - uint32_t GetUniqueFuncID() const { return unique_id; } - static const FuncPtr& GetFuncPtrByID(uint32_t id) - { - return id >= unique_ids.size() ? Func::nil : unique_ids[id]; - } - protected: - Func(); + Func() = default; // Copies this function's state into other. void CopyStateInto(Func* other) const; @@ -144,11 +131,9 @@ protected: std::vector bodies; detail::ScopePtr scope; - Kind kind; - uint32_t unique_id; + Kind kind = SCRIPT_FUNC; FuncTypePtr type; std::string name; - static inline std::vector unique_ids; }; namespace detail @@ -303,7 +288,7 @@ protected: virtual void SetCaptures(Frame* f); private: - size_t frame_size; + size_t frame_size = 0; // List of the outer IDs used in the function. IDPList outer_ids; @@ -374,8 +359,8 @@ struct function_ingredients IDPtr id; StmtPtr body; std::vector inits; - int frame_size; - int priority; + int frame_size = 0; + int priority = 0; ScopePtr scope; }; diff --git a/src/ID.cc b/src/ID.cc index b58cf2e872..dd9b39a73d 100644 --- a/src/ID.cc +++ b/src/ID.cc @@ -248,13 +248,9 @@ void ID::UpdateValAttrs() if ( ! attrs ) return; - if ( val && val->GetType()->Tag() == TYPE_TABLE ) - val->AsTableVal()->SetAttrs(attrs); + auto tag = GetType()->Tag(); - if ( val && val->GetType()->Tag() == TYPE_FILE ) - val->AsFile()->SetAttrs(attrs.get()); - - if ( GetType()->Tag() == TYPE_FUNC ) + if ( tag == TYPE_FUNC ) { const auto& attr = attrs->Find(ATTR_ERROR_HANDLER); @@ -262,7 +258,7 @@ void ID::UpdateValAttrs() event_registry->SetErrorHandler(Name()); } - if ( GetType()->Tag() == TYPE_RECORD ) + if ( tag == TYPE_RECORD ) { const auto& attr = attrs->Find(ATTR_LOG); @@ -281,6 +277,17 @@ void ID::UpdateValAttrs() } } } + + if ( ! val ) + return; + + auto vtag = val->GetType()->Tag(); + + if ( vtag == TYPE_TABLE ) + val->AsTableVal()->SetAttrs(attrs); + + else if ( vtag == TYPE_FILE ) + val->AsFile()->SetAttrs(attrs.get()); } const AttrPtr& ID::GetAttr(AttrTag t) const diff --git a/src/IPAddr.h b/src/IPAddr.h index ea4ed9ac08..91170d73a0 100644 --- a/src/IPAddr.h +++ b/src/IPAddr.h @@ -504,7 +504,6 @@ inline void IPAddr::ConvertToThreadingValue(threading::Value::addr_t* v) const switch ( v->family ) { - case IPv4: CopyIPv4(&v->in.in4); return; @@ -512,9 +511,6 @@ inline void IPAddr::ConvertToThreadingValue(threading::Value::addr_t* v) const case IPv6: CopyIPv6(&v->in.in6); return; - - // Can't be reached. - abort(); } } diff --git a/src/NetVar.cc b/src/NetVar.cc index e9084f3085..3d66da0990 100644 --- a/src/NetVar.cc +++ b/src/NetVar.cc @@ -189,8 +189,6 @@ int dpd_ignore_ports; int check_for_unused_event_handlers; -double timer_mgr_inactivity_timeout; - int record_all_packets; bro_uint_t bits_per_uid; @@ -345,8 +343,6 @@ void init_net_var() dpd_match_only_beginning = id::find_val("dpd_match_only_beginning")->AsBool(); dpd_late_match_stop = id::find_val("dpd_late_match_stop")->AsBool(); dpd_ignore_ports = id::find_val("dpd_ignore_ports")->AsBool(); - - timer_mgr_inactivity_timeout = id::find_val("timer_mgr_inactivity_timeout")->AsInterval(); } } // namespace zeek::detail diff --git a/src/NetVar.h b/src/NetVar.h index b79e80d2b1..3f8e11bac0 100644 --- a/src/NetVar.h +++ b/src/NetVar.h @@ -90,8 +90,6 @@ extern int dpd_ignore_ports; extern int check_for_unused_event_handlers; -extern double timer_mgr_inactivity_timeout; - extern int record_all_packets; extern bro_uint_t bits_per_uid; diff --git a/src/Obj.h b/src/Obj.h index 0926e23512..a62ca4886a 100644 --- a/src/Obj.h +++ b/src/Obj.h @@ -38,6 +38,7 @@ public: #define YYLTYPE zeek::detail::yyltype using yyltype = Location; YYLTYPE GetCurrentLocation(); +void SetCurrentLocation(YYLTYPE currloc); // Used to mean "no location associated with this object". inline constexpr Location no_location("", 0, 0, 0, 0); diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index b43306e0e2..07d2d3899d 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -33,7 +33,7 @@ inline bool get_vector_idx(const V& v, unsigned int i, D* dst) if ( i >= v.size() ) return false; - auto x = caf::get_if(&v[i]); + auto x = broker::get_if(&v[i]); if ( ! x ) return false; @@ -81,12 +81,12 @@ broker::expected OpaqueVal::Serialize() const OpaqueValPtr OpaqueVal::Unserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() == 2) ) return nullptr; - auto type = caf::get_if(&(*v)[0]); + auto type = broker::get_if(&(*v)[0]); if ( ! type ) return nullptr; @@ -118,17 +118,17 @@ broker::expected OpaqueVal::SerializeType(const TypePtr& t) TypePtr OpaqueVal::UnserializeType(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() == 2) ) return nullptr; - auto by_name = caf::get_if(&(*v)[0]); + auto by_name = broker::get_if(&(*v)[0]); if ( ! by_name ) return nullptr; if ( *by_name ) { - auto name = caf::get_if(&(*v)[1]); + auto name = broker::get_if(&(*v)[1]); if ( ! name ) return nullptr; @@ -142,7 +142,7 @@ TypePtr OpaqueVal::UnserializeType(const broker::data& data) return id->GetType(); } - auto tag = caf::get_if(&(*v)[1]); + auto tag = broker::get_if(&(*v)[1]); if ( ! tag ) return nullptr; @@ -215,7 +215,10 @@ HashVal::HashVal(OpaqueTypePtr t) : OpaqueVal(std::move(t)) valid = false; } -MD5Val::MD5Val() : HashVal(md5_type) { } +MD5Val::MD5Val() : HashVal(md5_type) + { + memset(&ctx, 0, sizeof(ctx)); + } MD5Val::~MD5Val() { } @@ -295,11 +298,11 @@ broker::expected MD5Val::DoSerialize() const bool MD5Val::DoUnserialize(const broker::data& data) { - auto d = caf::get_if(&data); + auto d = broker::get_if(&data); if ( ! d ) return false; - auto valid = caf::get_if(&(*d)[0]); + auto valid = broker::get_if(&(*d)[0]); if ( ! valid ) return false; @@ -312,7 +315,7 @@ bool MD5Val::DoUnserialize(const broker::data& data) if ( (*d).size() != 2 ) return false; - auto s = caf::get_if(&(*d)[1]); + auto s = broker::get_if(&(*d)[1]); if ( ! s ) return false; @@ -324,7 +327,10 @@ bool MD5Val::DoUnserialize(const broker::data& data) return true; } -SHA1Val::SHA1Val() : HashVal(sha1_type) { } +SHA1Val::SHA1Val() : HashVal(sha1_type) + { + memset(&ctx, 0, sizeof(ctx)); + } SHA1Val::~SHA1Val() { } @@ -385,11 +391,11 @@ broker::expected SHA1Val::DoSerialize() const bool SHA1Val::DoUnserialize(const broker::data& data) { - auto d = caf::get_if(&data); + auto d = broker::get_if(&data); if ( ! d ) return false; - auto valid = caf::get_if(&(*d)[0]); + auto valid = broker::get_if(&(*d)[0]); if ( ! valid ) return false; @@ -402,7 +408,7 @@ bool SHA1Val::DoUnserialize(const broker::data& data) if ( (*d).size() != 2 ) return false; - auto s = caf::get_if(&(*d)[1]); + auto s = broker::get_if(&(*d)[1]); if ( ! s ) return false; @@ -414,7 +420,10 @@ bool SHA1Val::DoUnserialize(const broker::data& data) return true; } -SHA256Val::SHA256Val() : HashVal(sha256_type) { } +SHA256Val::SHA256Val() : HashVal(sha256_type) + { + memset(&ctx, 0, sizeof(ctx)); + } SHA256Val::~SHA256Val() { } @@ -475,11 +484,11 @@ broker::expected SHA256Val::DoSerialize() const bool SHA256Val::DoUnserialize(const broker::data& data) { - auto d = caf::get_if(&data); + auto d = broker::get_if(&data); if ( ! d ) return false; - auto valid = caf::get_if(&(*d)[0]); + auto valid = broker::get_if(&(*d)[0]); if ( ! valid ) return false; @@ -492,7 +501,7 @@ bool SHA256Val::DoUnserialize(const broker::data& data) if ( (*d).size() != 2 ) return false; - auto s = caf::get_if(&(*d)[1]); + auto s = broker::get_if(&(*d)[1]); if ( ! s ) return false; @@ -546,7 +555,7 @@ broker::expected EntropyVal::DoSerialize() const bool EntropyVal::DoUnserialize(const broker::data& data) { - auto d = caf::get_if(&data); + auto d = broker::get_if(&data); if ( ! d ) return false; @@ -780,12 +789,12 @@ broker::expected BloomFilterVal::DoSerialize() const bool BloomFilterVal::DoUnserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() == 2) ) return false; - auto no_type = caf::get_if(&(*v)[0]); + auto no_type = broker::get_if(&(*v)[0]); if ( ! no_type ) { auto t = UnserializeType((*v)[0]); @@ -874,12 +883,12 @@ broker::expected CardinalityVal::DoSerialize() const bool CardinalityVal::DoUnserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() == 2) ) return false; - auto no_type = caf::get_if(&(*v)[0]); + auto no_type = broker::get_if(&(*v)[0]); if ( ! no_type ) { auto t = UnserializeType((*v)[0]); @@ -931,7 +940,7 @@ broker::expected ParaglobVal::DoSerialize() const bool ParaglobVal::DoUnserialize(const broker::data& data) { - auto d = caf::get_if(&data); + auto d = broker::get_if(&data); if ( ! d ) return false; diff --git a/src/Options.cc b/src/Options.cc index 9ddeed5b0c..0cf23a4fd6 100644 --- a/src/Options.cc +++ b/src/Options.cc @@ -114,6 +114,8 @@ void usage(const char* prog, int code) #endif fprintf(stderr, " -C|--no-checksums | ignore checksums\n"); fprintf(stderr, " -D|--deterministic | initialize random seeds to zero\n"); + fprintf(stderr, " -E|--event-trace | generate a replayable event trace to " + "the given file\n"); fprintf(stderr, " -F|--force-dns | force DNS\n"); fprintf(stderr, " -G|--load-seeds | load seeds from given file\n"); fprintf(stderr, " -H|--save-seeds | save seeds to given file\n"); @@ -193,7 +195,8 @@ static void print_analysis_help() fprintf(stderr, " no-ZAM-opt omit low-level ZAM optimization\n"); fprintf(stderr, " optimize-all optimize all scripts, even inlined ones\n"); fprintf(stderr, " optimize-AST optimize the (transformed) AST; implies xform\n"); - fprintf(stderr, " profile-ZAM generate to stdout a ZAM execution profile\n"); + fprintf(stderr, + " profile-ZAM generate to stdout a ZAM execution profile; implies -O ZAM\n"); fprintf(stderr, " report-recursive report on recursive functions and exit\n"); fprintf(stderr, " xform transform scripts to \"reduced\" form\n"); @@ -248,7 +251,7 @@ static void set_analysis_option(const char* opt, Options& opts) else if ( util::streq(opt, "optimize-AST") ) a_o.activate = a_o.optimize_AST = true; else if ( util::streq(opt, "profile-ZAM") ) - a_o.activate = a_o.profile_ZAM = true; + a_o.activate = a_o.gen_ZAM_code = a_o.profile_ZAM = true; else if ( util::streq(opt, "report-C++") ) a_o.report_CPP = true; else if ( util::streq(opt, "report-recursive") ) @@ -379,6 +382,7 @@ Options parse_cmdline(int argc, char** argv) {"no-checksums", no_argument, nullptr, 'C'}, {"force-dns", no_argument, nullptr, 'F'}, {"deterministic", no_argument, nullptr, 'D'}, + {"event-trace", required_argument, nullptr, 'E'}, {"load-seeds", required_argument, nullptr, 'G'}, {"save-seeds", required_argument, nullptr, 'H'}, {"print-plugins", no_argument, nullptr, 'N'}, @@ -399,7 +403,7 @@ Options parse_cmdline(int argc, char** argv) {"mem-profile", no_argument, nullptr, 'M'}, #endif - {"pseudo-realtime", optional_argument, nullptr, 'E'}, + {"pseudo-realtime", optional_argument, nullptr, '~'}, {"jobs", optional_argument, nullptr, 'j'}, {"test", no_argument, nullptr, '#'}, @@ -407,7 +411,7 @@ Options parse_cmdline(int argc, char** argv) }; char opts[256]; - util::safe_strncpy(opts, "B:c:e:f:G:H:I:i:j::n:O:0:o:p:r:s:T:t:U:w:X:CDFMNPQSWabdhmuv", + util::safe_strncpy(opts, "B:c:E:e:f:G:H:I:i:j::n:O:0:o:p:r:s:T:t:U:w:X:CDFMNPQSWabdhmuv", sizeof(opts)); int op; @@ -522,9 +526,7 @@ Options parse_cmdline(int argc, char** argv) rval.deterministic_mode = true; break; case 'E': - rval.pseudo_realtime = 1.0; - if ( optarg ) - rval.pseudo_realtime = atof(optarg); + rval.event_trace_file = optarg; break; case 'F': if ( rval.dns_mode != detail::DNS_DEFAULT ) @@ -585,6 +587,12 @@ Options parse_cmdline(int argc, char** argv) break; #endif + case '~': + rval.pseudo_realtime = 1.0; + if ( optarg ) + rval.pseudo_realtime = atof(optarg); + break; + case '#': fprintf(stderr, "ERROR: --test only allowed as first argument.\n"); usage(zargs[0], 1); diff --git a/src/Options.h b/src/Options.h index b9bd1c5e9b..0065b527b9 100644 --- a/src/Options.h +++ b/src/Options.h @@ -73,6 +73,7 @@ struct Options std::optional process_status_file; std::optional zeekygen_config_file; std::optional unprocessed_output_file; + std::optional event_trace_file; std::set plugins_to_load; std::vector scripts_to_load; diff --git a/src/PolicyFile.h b/src/PolicyFile.h index d85883cb0b..8631bc3c55 100644 --- a/src/PolicyFile.h +++ b/src/PolicyFile.h @@ -8,7 +8,7 @@ // (probably in the lexer). Then later any function that so desires // can call a relevant function. Note that since it caches the contents, // changes to the policy files will not be reflected until restart, -// which is probably good since it'll always display the code that Bro +// which is probably good since it'll always display the code that Zeek // is actually using. // policy_filename arguments should be absolute or relative paths; diff --git a/src/RandTest.cc b/src/RandTest.cc index 1422993695..5062c6968d 100644 --- a/src/RandTest.cc +++ b/src/RandTest.cc @@ -9,7 +9,7 @@ without fee is hereby granted, without any conditions or restrictions. This software is provided “as is” without express or implied warranty. - Modified for Bro by Seth Hall - July 2010 + Modified for Zeek/Bro by Seth Hall - July 2010 */ #include "zeek/RandTest.h" diff --git a/src/Reporter.cc b/src/Reporter.cc index 3bbb121208..0ed84562e7 100644 --- a/src/Reporter.cc +++ b/src/Reporter.cc @@ -9,6 +9,7 @@ #include #include +#include "zeek/3rdparty/doctest.h" #include "zeek/Conn.h" #include "zeek/Desc.h" #include "zeek/Event.h" @@ -676,10 +677,29 @@ void Reporter::DoLog(const char* prefix, EventHandlerPtr event, FILE* out, Conne } s += buffer; - s += "\n"; - if ( out ) +#ifdef ENABLE_ZEEK_UNIT_TESTS + if ( doctest::is_running_in_test ) + { + try + { + MESSAGE(s); + } + catch ( const doctest::detail::TestFailureException& e ) + { + // If doctest throws an exception, just write the string out to stdout + // like normal, just so it's captured somewhere. + fprintf(out, "%s\n", s.c_str()); + } + } + else + { +#endif + s += "\n"; fprintf(out, "%s", s.c_str()); +#ifdef ENABLE_ZEEK_UNIT_TESTS + } +#endif } if ( alloced ) diff --git a/src/RuleCondition.h b/src/RuleCondition.h index c83e712b3e..e32a4553cc 100644 --- a/src/RuleCondition.h +++ b/src/RuleCondition.h @@ -122,7 +122,7 @@ private: Comp comp; }; -// Implements "eval" which evaluates the given Bro identifier. +// Implements "eval" which evaluates the given Zeek identifier. class RuleConditionEval : public RuleCondition { public: diff --git a/src/RuleMatcher.cc b/src/RuleMatcher.cc index 42c3c568b0..a418c30cc6 100644 --- a/src/RuleMatcher.cc +++ b/src/RuleMatcher.cc @@ -266,6 +266,13 @@ bool RuleMatcher::ReadFiles(const std::vector& files) if ( ! f.full_path ) f.full_path = util::find_file(f.file, util::zeek_path(), ".sig"); + // We mimic previous Zeek versions by temporarily setting the current + // script location to the place where the loading happened. This + // behavior was never documented, but seems worth not breaking as some + // plugins ended up relying on it. + Location orig_location = detail::GetCurrentLocation(); + detail::SetCurrentLocation(f.load_location); + std::pair> rc = {-1, std::nullopt}; rc.first = PLUGIN_HOOK_WITH_RESULT( HOOK_LOAD_FILE, HookLoadFile(zeek::plugin::Plugin::SIGNATURES, f.file, *f.full_path), @@ -277,6 +284,9 @@ bool RuleMatcher::ReadFiles(const std::vector& files) HookLoadFileExtended(zeek::plugin::Plugin::SIGNATURES, f.file, *f.full_path), std::make_pair(-1, std::nullopt)); + // Restore original location information. + detail::SetCurrentLocation(orig_location); + switch ( rc.first ) { case -1: diff --git a/src/RuleMatcher.h b/src/RuleMatcher.h index 76599c117a..6713a623c0 100644 --- a/src/RuleMatcher.h +++ b/src/RuleMatcher.h @@ -74,7 +74,7 @@ using maskedvalue_list = PList; using string_list = PList; using bstr_list = PList; -// Get values from Bro's script-level variables. +// Get values from Zeek's script-level variables. extern void id_to_maskedvallist(const char* id, maskedvalue_list* append_to, std::vector* prefix_vector = nullptr); extern char* id_to_str(const char* id); diff --git a/src/RunState.cc b/src/RunState.cc index 3a8e367714..76ac99e79d 100644 --- a/src/RunState.cc +++ b/src/RunState.cc @@ -190,7 +190,7 @@ void init_run(const std::optional& interface, if ( const auto& id = zeek::detail::global_scope()->Find("trace_output_file") ) id->SetVal(make_intrusive(writefile)); else - reporter->Error("trace_output_file not defined in bro.init"); + reporter->Error("trace_output_file not defined"); } zeek::detail::init_ip_addr_anonymizers(); @@ -283,7 +283,7 @@ void run_loop() { util::detail::set_processing_status("RUNNING", "run_loop"); - std::vector ready; + iosource::Manager::ReadySources ready; ready.reserve(iosource_mgr->TotalSize()); while ( iosource_mgr->Size() || (BifConst::exit_only_after_terminate && ! terminating) ) @@ -310,11 +310,16 @@ void run_loop() if ( ! ready.empty() ) { - for ( auto src : ready ) + for ( const auto& src : ready ) { - DBG_LOG(DBG_MAINLOOP, "processing source %s", src->Tag()); - current_iosrc = src; - src->Process(); + auto* iosrc = src.src; + + DBG_LOG(DBG_MAINLOOP, "processing source %s", iosrc->Tag()); + current_iosrc = iosrc; + if ( iosrc->ImplementsProcessFd() && src.fd != -1 ) + iosrc->ProcessFd(src.fd, src.flags); + else + iosrc->Process(); } } else if ( (have_pending_timers || communication_enabled || @@ -472,7 +477,7 @@ double pseudo_realtime = 0.0; double network_time = 0.0; // time according to last packet timestamp // (or current time) double processing_start_time = 0.0; // time started working on current pkt -double zeek_start_time = 0.0; // time Bro started. +double zeek_start_time = 0.0; // time Zeek started. double zeek_start_network_time; // timestamp of first packet bool terminating = false; // whether we're done reading and finishing up bool is_parsing = false; diff --git a/src/RunState.h b/src/RunState.h index 6bba345b74..3b9171c483 100644 --- a/src/RunState.h +++ b/src/RunState.h @@ -93,10 +93,10 @@ extern double pseudo_realtime; // queue. extern double processing_start_time; -// When the Bro process was started. +// When the Zeek process was started. extern double zeek_start_time; -// Time at which the Bro process was started with respect to network time, +// Time at which the Zeek process was started with respect to network time, // i.e. the timestamp of the first packet. extern double zeek_start_network_time; @@ -106,7 +106,7 @@ extern double network_time; // True if we're a in the process of cleaning-up just before termination. extern bool terminating; -// True if Bro is currently parsing scripts. +// True if Zeek is currently parsing scripts. extern bool is_parsing; extern const zeek::Packet* current_pkt; diff --git a/src/ScannedFile.cc b/src/ScannedFile.cc index 590511050e..0e25b47a27 100644 --- a/src/ScannedFile.cc +++ b/src/ScannedFile.cc @@ -49,8 +49,9 @@ bool ScannedFile::AlreadyScanned() const SignatureFile::SignatureFile(std::string file) : file(std::move(file)) { } -SignatureFile::SignatureFile(std::string file, std::string full_path) - : file(std::move(file)), full_path(std::move(full_path)) +SignatureFile::SignatureFile(std::string file, std::string full_path, Location load_location) + : file(std::move(file)), full_path(std::move(full_path)), + load_location(std::move(load_location)) { } diff --git a/src/ScannedFile.h b/src/ScannedFile.h index 9829b2c1d2..b51369ef8f 100644 --- a/src/ScannedFile.h +++ b/src/ScannedFile.h @@ -2,6 +2,7 @@ #pragma once +#include #include #include #include @@ -40,9 +41,10 @@ struct SignatureFile { std::string file; std::optional full_path; + Location load_location; SignatureFile(std::string file); - SignatureFile(std::string file, std::string full_path); + SignatureFile(std::string file, std::string full_path, Location load_location); }; extern std::vector sig_files; diff --git a/src/ScriptCoverageManager.h b/src/ScriptCoverageManager.h index 702137d9ed..1cc74bb43d 100644 --- a/src/ScriptCoverageManager.h +++ b/src/ScriptCoverageManager.h @@ -13,7 +13,7 @@ namespace zeek::detail class Stmt; /** - * A simple class for managing stats of Bro script coverage across Bro runs. + * A simple class for managing stats of Zeek script coverage across Zeek runs. */ class ScriptCoverageManager { @@ -22,7 +22,7 @@ public: virtual ~ScriptCoverageManager(); /** - * Imports Bro script Stmt usage information from file pointed to by + * Imports Zeek script Stmt usage information from file pointed to by * environment variable ZEEK_PROFILER_FILE. * * @return: true if usage info was read, otherwise false. diff --git a/src/Stmt.cc b/src/Stmt.cc index 7cbb13d7da..23b3faea4c 100644 --- a/src/Stmt.cc +++ b/src/Stmt.cc @@ -8,6 +8,7 @@ #include "zeek/Debug.h" #include "zeek/Desc.h" #include "zeek/Event.h" +#include "zeek/EventTrace.h" #include "zeek/Expr.h" #include "zeek/File.h" #include "zeek/Frame.h" @@ -396,7 +397,7 @@ void do_print_stmt(const std::vector& vals) ExprStmt::ExprStmt(ExprPtr arg_e) : Stmt(STMT_EXPR), e(std::move(arg_e)) { - if ( e && e->IsPure() && e->GetType()->Tag() != TYPE_ERROR ) + if ( e && e->Tag() != EXPR_CALL && e->IsPure() && e->GetType()->Tag() != TYPE_ERROR ) Warn("expression value ignored"); SetLocationInfo(e->GetLocationInfo()); @@ -1076,11 +1077,17 @@ EventStmt::EventStmt(EventExprPtr arg_e) : ExprStmt(STMT_EVENT, arg_e), event_ex ValPtr EventStmt::Exec(Frame* f, StmtFlowType& flow) { RegisterAccess(); + auto args = eval_list(f, event_expr->Args()); auto h = event_expr->Handler(); if ( args && h ) + { + if ( etm ) + etm->ScriptEventQueued(h); + event_mgr.Enqueue(h, std::move(*args)); + } flow = FLOW_NEXT; return nullptr; diff --git a/src/Trigger.cc b/src/Trigger.cc index 0ce780ef38..9be2bed8d0 100644 --- a/src/Trigger.cc +++ b/src/Trigger.cc @@ -166,7 +166,7 @@ void Trigger::Init(ExprPtr arg_cond, StmtPtr arg_body, StmtPtr arg_timeout_stmts DBG_LOG(DBG_NOTIFIERS, "%s: instantiating", Name()); - if ( is_return ) + if ( is_return && frame && arg_frame ) { Trigger* parent = frame->GetTrigger(); if ( ! parent ) diff --git a/src/Type.cc b/src/Type.cc index cb459533a6..808fff7bff 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -42,21 +42,19 @@ const char* type_name(TypeTag t) "string", // 7 "pattern", // 8 "enum", // 9 - "timer", // 10 - "port", // 11 - "addr", // 12 - "subnet", // 13 - "any", // 14 - "table", // 15 - "union", // 16 - "record", // 17 - "types", // 18 - "func", // 19 - "file", // 20 - "vector", // 21 - "opaque", // 22 - "type", // 23 - "error", // 24 + "port", // 10 + "addr", // 11 + "subnet", // 12 + "any", // 13 + "table", // 14 + "record", // 15 + "types", // 16 + "func", // 17 + "file", // 18 + "vector", // 19 + "opaque", // 20 + "type", // 21 + "error", // 22 }; if ( int(t) >= NUM_TYPES ) @@ -208,7 +206,6 @@ TypePtr Type::ShallowClone() case TYPE_INTERVAL: case TYPE_STRING: case TYPE_PATTERN: - case TYPE_TIMER: case TYPE_PORT: case TYPE_ADDR: case TYPE_SUBNET: @@ -518,6 +515,29 @@ TableType::TableType(TypeListPtr ind, TypePtr yield) } bool TableType::CheckExpireFuncCompatibility(const detail::AttrPtr& attr) + { + if ( reported_error ) + return false; + + bool success = DoExpireCheck(attr); + if ( ! success ) + reported_error = true; + + return success; + } + +TypePtr TableType::ShallowClone() + { + return make_intrusive(indices, yield_type); + } + +bool TableType::IsUnspecifiedTable() const + { + // Unspecified types have an empty list of indices. + return indices->GetTypes().empty(); + } + +bool TableType::DoExpireCheck(const detail::AttrPtr& attr) { assert(attr->Tag() == detail::ATTR_EXPIRE_FUNC); @@ -572,17 +592,6 @@ bool TableType::CheckExpireFuncCompatibility(const detail::AttrPtr& attr) return true; } -TypePtr TableType::ShallowClone() - { - return make_intrusive(indices, yield_type); - } - -bool TableType::IsUnspecifiedTable() const - { - // Unspecified types have an empty list of indices. - return indices->GetTypes().empty(); - } - SetType::SetType(TypeListPtr ind, detail::ListExprPtr arg_elements) : TableType(std::move(ind), nullptr), elements(std::move(arg_elements)) { @@ -719,6 +728,9 @@ bool FuncType::CheckArgs(const TypePList* args, bool is_init, bool do_warn) cons bool FuncType::CheckArgs(const std::vector& args, bool is_init, bool do_warn) const { + if ( reported_error ) + return false; + const auto& my_args = arg_types->GetTypes(); if ( my_args.size() != args.size() ) @@ -726,6 +738,7 @@ bool FuncType::CheckArgs(const std::vector& args, bool is_init, bool do if ( do_warn ) Warn(util::fmt("Wrong number of arguments for function. Expected %zu, got %zu.", args.size(), my_args.size())); + const_cast(this)->reported_error = true; return false; } @@ -740,6 +753,8 @@ bool FuncType::CheckArgs(const std::vector& args, bool is_init, bool do success = false; } + const_cast(this)->reported_error = ! success; + return success; } @@ -1870,7 +1885,6 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, bool is_init, bool match_ case TYPE_INTERVAL: case TYPE_STRING: case TYPE_PATTERN: - case TYPE_TIMER: case TYPE_PORT: case TYPE_ADDR: case TYPE_SUBNET: @@ -1971,9 +1985,6 @@ bool same_type(const Type& arg_t1, const Type& arg_t2, bool is_init, bool match_ case TYPE_FILE: case TYPE_TYPE: break; - - case TYPE_UNION: - reporter->Error("union type in same_type()"); } // If we get to here, then we're dealing with a type with @@ -2190,7 +2201,6 @@ bool is_assignable(TypeTag t) case TYPE_STRING: case TYPE_PATTERN: case TYPE_ENUM: - case TYPE_TIMER: case TYPE_PORT: case TYPE_ADDR: case TYPE_SUBNET: @@ -2210,9 +2220,6 @@ bool is_assignable(TypeTag t) case TYPE_VOID: return false; - - case TYPE_UNION: - reporter->Error("union type in is_assignable()"); } return false; @@ -2244,12 +2251,218 @@ TypeTag max_type(TypeTag t1, TypeTag t2) } } +TypePtr merge_enum_types(const Type* t1, const Type* t2) + { + // Could compare pointers t1 == t2, but maybe there's someone out + // there creating clones of the type, so safer to compare name. + if ( t1->GetName() != t2->GetName() ) + { + std::string msg = util::fmt("incompatible enum types: '%s' and '%s'", t1->GetName().data(), + t2->GetName().data()); + + t1->Error(msg.data(), t2); + return nullptr; + } + + // Doing a lookup here as a roundabout way of ref-ing t1, without + // changing the function params which has t1 as const and also + // (potentially) avoiding a pitfall mentioned earlier about clones. + const auto& id = detail::global_scope()->Find(t1->GetName()); + + if ( id && id->IsType() && id->GetType()->Tag() == TYPE_ENUM ) + // It should make most sense to return the real type here rather + // than a copy since it may be redef'd later in parsing. If we + // return a copy, then whoever is using this return value won't + // actually see those changes from the redef. + return id->GetType(); + + std::string msg = util::fmt("incompatible enum types: '%s' and '%s'" + " ('%s' enum type ID is invalid)", + t1->GetName().data(), t2->GetName().data(), t1->GetName().data()); + t1->Error(msg.data(), t2); + return nullptr; + } + +TypePtr merge_table_types(const Type* t1, const Type* t2) + { + const IndexType* it1 = (const IndexType*)t1; + const IndexType* it2 = (const IndexType*)t2; + + const auto& tl1 = it1->GetIndexTypes(); + const auto& tl2 = it2->GetIndexTypes(); + TypeListPtr tl3; + + if ( tl1.size() != tl2.size() ) + { + t1->Error("incompatible types", t2); + return nullptr; + } + + tl3 = make_intrusive(); + + for ( auto i = 0u; i < tl1.size(); ++i ) + { + auto tl3_i = merge_types(tl1[i], tl2[i]); + if ( ! tl3_i ) + return nullptr; + + tl3->Append(std::move(tl3_i)); + } + + const auto& y1 = t1->Yield(); + const auto& y2 = t2->Yield(); + TypePtr y3; + + if ( y1 || y2 ) + { + if ( ! y1 || ! y2 ) + { + t1->Error("incompatible types", t2); + return nullptr; + } + + y3 = merge_types(y1, y2); + if ( ! y3 ) + return nullptr; + } + + if ( t1->IsSet() ) + return make_intrusive(std::move(tl3), nullptr); + else + return make_intrusive(std::move(tl3), std::move(y3)); + } + +TypePtr merge_func_types(const Type* t1, const Type* t2) + { + if ( ! same_type(t1, t2) ) + { + t1->Error("incompatible types", t2); + return nullptr; + } + + const FuncType* ft1 = (const FuncType*)t1; + const FuncType* ft2 = (const FuncType*)t1; + auto args = cast_intrusive(merge_types(ft1->Params(), ft2->Params())); + auto yield = t1->Yield() ? merge_types(t1->Yield(), t2->Yield()) : nullptr; + + return make_intrusive(std::move(args), std::move(yield), ft1->Flavor()); + } + +TypePtr merge_record_types(const Type* t1, const Type* t2) + { + const RecordType* rt1 = (const RecordType*)t1; + const RecordType* rt2 = (const RecordType*)t2; + + // We allow the records to have different numbers of fields. + // We first go through all of the fields in rt1, and then we + // check for whether rt2 has any additional fields. + + type_decl_list* tdl3 = new type_decl_list(); + + for ( int i = 0; i < rt1->NumFields(); ++i ) + { + auto td1 = rt1->FieldDecl(i); + auto td2_offset_i = rt2->FieldOffset(rt1->FieldName(i)); + + TypePtr tdl3_i; + auto attrs3 = make_intrusive(nullptr, true, false); + + if ( td1->attrs ) + attrs3->AddAttrs(td1->attrs); + + if ( td2_offset_i >= 0 ) + { + auto td2 = rt2->FieldDecl(td2_offset_i); + tdl3_i = merge_types(td1->type, td2->type); + + if ( td2->attrs ) + attrs3->AddAttrs(td2->attrs); + + if ( ! util::streq(td1->id, td2->id) || ! tdl3_i ) + { + t1->Error("incompatible record fields", t2); + delete tdl3; + return nullptr; + } + } + else + { + tdl3_i = td1->type; + attrs3->AddAttr(make_intrusive(detail::ATTR_OPTIONAL)); + } + + if ( attrs3->GetAttrs().empty() ) + attrs3 = nullptr; + + auto td3 = new TypeDecl(util::copy_string(td1->id), std::move(tdl3_i), attrs3); + + tdl3->push_back(td3); + } + + // Now add in any extras from rt2. + for ( int i = 0; i < rt2->NumFields(); ++i ) + { + auto td2 = rt2->FieldDecl(i); + auto td1_offset_i = rt1->FieldOffset(rt2->FieldName(i)); + + if ( td1_offset_i < 0 ) + { + auto attrs3 = make_intrusive(nullptr, true, false); + if ( td2->attrs ) + attrs3->AddAttrs(td2->attrs); + + attrs3->AddAttr(make_intrusive(detail::ATTR_OPTIONAL)); + auto td_merge = new TypeDecl(util::copy_string(td2->id), std::move(td2->type), attrs3); + tdl3->push_back(td_merge); + } + } + + return make_intrusive(tdl3); + } + +TypePtr merge_list_types(const Type* t1, const Type* t2) + { + const TypeList* tl1 = t1->AsTypeList(); + const TypeList* tl2 = t2->AsTypeList(); + + if ( tl1->IsPure() != tl2->IsPure() ) + { + tl1->Error("incompatible lists", tl2); + return nullptr; + } + + const auto& l1 = tl1->GetTypes(); + const auto& l2 = tl2->GetTypes(); + + if ( l1.size() == 0 || l2.size() == 0 ) + { + if ( l1.size() == 0 ) + tl1->Error("empty list"); + else + tl2->Error("empty list"); + return nullptr; + } + + if ( l1.size() != l2.size() ) + { + tl1->Error("different number of indices", tl2); + return nullptr; + } + + auto tl3 = make_intrusive(); + + for ( auto i = 0u; i < l1.size(); ++i ) + tl3->Append(merge_types(l1[i], l2[i])); + + return tl3; + } + TypePtr merge_types(const TypePtr& arg_t1, const TypePtr& arg_t2) { auto t1 = arg_t1.get(); auto t2 = arg_t2.get(); - t1 = flatten_type(t1); - t2 = flatten_type(t2); + // t1 = flatten_type(t1); + // t2 = flatten_type(t2); TypeTag tg1 = t1->Tag(); TypeTag tg2 = t2->Tag(); @@ -2269,7 +2482,6 @@ TypePtr merge_types(const TypePtr& arg_t1, const TypePtr& arg_t2) case TYPE_INTERVAL: case TYPE_STRING: case TYPE_PATTERN: - case TYPE_TIMER: case TYPE_PORT: case TYPE_ADDR: case TYPE_SUBNET: @@ -2279,179 +2491,19 @@ TypePtr merge_types(const TypePtr& arg_t1, const TypePtr& arg_t2) return base_type(tg1); case TYPE_ENUM: - { - // Could compare pointers t1 == t2, but maybe there's someone out - // there creating clones of the type, so safer to compare name. - if ( t1->GetName() != t2->GetName() ) - { - std::string msg = util::fmt("incompatible enum types: '%s' and '%s'", - t1->GetName().data(), t2->GetName().data()); - - t1->Error(msg.data(), t2); - return nullptr; - } - - // Doing a lookup here as a roundabout way of ref-ing t1, without - // changing the function params which has t1 as const and also - // (potentially) avoiding a pitfall mentioned earlier about clones. - const auto& id = detail::global_scope()->Find(t1->GetName()); - - if ( id && id->IsType() && id->GetType()->Tag() == TYPE_ENUM ) - // It should make most sense to return the real type here rather - // than a copy since it may be redef'd later in parsing. If we - // return a copy, then whoever is using this return value won't - // actually see those changes from the redef. - return id->GetType(); - - std::string msg = util::fmt("incompatible enum types: '%s' and '%s'" - " ('%s' enum type ID is invalid)", - t1->GetName().data(), t2->GetName().data(), - t1->GetName().data()); - t1->Error(msg.data(), t2); - return nullptr; - } + return merge_enum_types(t1, t2); case TYPE_TABLE: - { - const IndexType* it1 = (const IndexType*)t1; - const IndexType* it2 = (const IndexType*)t2; - - const auto& tl1 = it1->GetIndexTypes(); - const auto& tl2 = it2->GetIndexTypes(); - TypeListPtr tl3; - - if ( tl1.size() != tl2.size() ) - { - t1->Error("incompatible types", t2); - return nullptr; - } - - tl3 = make_intrusive(); - - for ( auto i = 0u; i < tl1.size(); ++i ) - { - auto tl3_i = merge_types(tl1[i], tl2[i]); - if ( ! tl3_i ) - return nullptr; - - tl3->Append(std::move(tl3_i)); - } - - const auto& y1 = t1->Yield(); - const auto& y2 = t2->Yield(); - TypePtr y3; - - if ( y1 || y2 ) - { - if ( ! y1 || ! y2 ) - { - t1->Error("incompatible types", t2); - return nullptr; - } - - y3 = merge_types(y1, y2); - if ( ! y3 ) - return nullptr; - } - - if ( t1->IsSet() ) - return make_intrusive(std::move(tl3), nullptr); - else - return make_intrusive(std::move(tl3), std::move(y3)); - } + return merge_table_types(t1, t2); case TYPE_FUNC: - { - if ( ! same_type(t1, t2) ) - { - t1->Error("incompatible types", t2); - return nullptr; - } - - const FuncType* ft1 = (const FuncType*)t1; - const FuncType* ft2 = (const FuncType*)t1; - auto args = cast_intrusive(merge_types(ft1->Params(), ft2->Params())); - auto yield = t1->Yield() ? merge_types(t1->Yield(), t2->Yield()) : nullptr; - - return make_intrusive(std::move(args), std::move(yield), ft1->Flavor()); - } + return merge_func_types(t1, t2); case TYPE_RECORD: - { - const RecordType* rt1 = (const RecordType*)t1; - const RecordType* rt2 = (const RecordType*)t2; - - if ( rt1->NumFields() != rt2->NumFields() ) - return nullptr; - - type_decl_list* tdl3 = new type_decl_list(rt1->NumFields()); - - for ( int i = 0; i < rt1->NumFields(); ++i ) - { - const TypeDecl* td1 = rt1->FieldDecl(i); - const TypeDecl* td2 = rt2->FieldDecl(i); - auto tdl3_i = merge_types(td1->type, td2->type); - - if ( ! util::streq(td1->id, td2->id) || ! tdl3_i ) - { - t1->Error("incompatible record fields", t2); - delete tdl3; - return nullptr; - } - - tdl3->push_back(new TypeDecl(util::copy_string(td1->id), std::move(tdl3_i))); - } - - return make_intrusive(tdl3); - } + return merge_record_types(t1, t2); case TYPE_LIST: - { - const TypeList* tl1 = t1->AsTypeList(); - const TypeList* tl2 = t2->AsTypeList(); - - if ( tl1->IsPure() != tl2->IsPure() ) - { - tl1->Error("incompatible lists", tl2); - return nullptr; - } - - const auto& l1 = tl1->GetTypes(); - const auto& l2 = tl2->GetTypes(); - - if ( l1.size() == 0 || l2.size() == 0 ) - { - if ( l1.size() == 0 ) - tl1->Error("empty list"); - else - tl2->Error("empty list"); - return nullptr; - } - - if ( tl1->IsPure() ) - { - // We will be expanding the pure list when converting - // the initialization expression into a set of values. - // So the merge type of the list is the type of one - // of the elements, providing they're consistent. - return merge_types(l1[0], l2[0]); - } - - // Impure lists - must have the same size and match element - // by element. - if ( l1.size() != l2.size() ) - { - tl1->Error("different number of indices", tl2); - return nullptr; - } - - auto tl3 = make_intrusive(); - - for ( auto i = 0u; i < l1.size(); ++i ) - tl3->Append(merge_types(l1[i], l2[i])); - - return tl3; - } + return merge_list_types(t1, t2); case TYPE_VECTOR: if ( ! same_type(t1->Yield(), t2->Yield()) ) @@ -2471,10 +2523,6 @@ TypePtr merge_types(const TypePtr& arg_t1, const TypePtr& arg_t2) return make_intrusive(merge_types(t1->Yield(), t2->Yield())); - case TYPE_UNION: - reporter->InternalError("union type in merge_types()"); - return nullptr; - default: reporter->InternalError("bad type in merge_types()"); return nullptr; @@ -2526,26 +2574,96 @@ static Type* reduce_type(Type* t) return t; } -TypePtr init_type(detail::Expr* init) +static TableTypePtr init_table_type(detail::ListExpr* l) + { + auto& elems = l->Exprs(); + TypePtr index; + TypePtr yield; + + for ( auto e : elems ) + { + if ( e->Tag() != detail::EXPR_ASSIGN ) + { + e->Error("table constructor element lacks '=' structure"); + return nullptr; + } + + auto& ind = e->GetOp1()->GetType(); + auto& y = e->GetOp2()->GetType(); + + if ( ! index ) + { + index = ind; + yield = y; + continue; + } + + index = merge_types(index, ind); + yield = merge_types(yield, y); + + if ( ! index || ! yield ) + // Error message already generated. + return nullptr; + } + + if ( index->Tag() != TYPE_LIST ) + return nullptr; + + return make_intrusive(cast_intrusive(index), yield); + } + +static SetTypePtr init_set_type(detail::ListExpr* l) + { + auto& elems = l->Exprs(); + TypePtr index; + + for ( auto e : elems ) + { + auto& ind = e->GetType(); + + if ( ! index ) + { + index = ind; + continue; + } + + index = merge_types(index, ind); + + if ( ! index ) + return nullptr; + } + + TypeListPtr ind_list; + + if ( index->Tag() == TYPE_LIST ) + ind_list = cast_intrusive(index); + else + { + ind_list = make_intrusive(index); + ind_list->Append(index); + } + + return make_intrusive(ind_list, nullptr); + } + +TypePtr init_type(const detail::ExprPtr& init) { if ( init->Tag() != detail::EXPR_LIST ) { auto t = init->InitType(); - if ( ! t ) - return nullptr; - - if ( t->Tag() == TYPE_LIST && t->AsTypeList()->GetTypes().size() != 1 ) + if ( (t->Tag() == TYPE_TABLE && cast_intrusive(t)->IsUnspecifiedTable()) || + (t->Tag() == TYPE_VECTOR && cast_intrusive(t)->IsUnspecifiedVector()) ) { - init->Error("list used in scalar initialization"); + init->Error("empty constructor in untyped initialization"); return nullptr; } return t; } - detail::ListExpr* init_list = init->AsListExpr(); - const ExprPList& el = init_list->Exprs(); + auto init_list = init->AsListExpr(); + const auto& el = init_list->Exprs(); if ( el.length() == 0 ) { @@ -2554,58 +2672,16 @@ TypePtr init_type(detail::Expr* init) } // Could be a record, a set, or a list of table elements. - detail::Expr* e0 = el[0]; + auto e0 = el[0]; if ( e0->IsRecordElement(nullptr) ) - // ListExpr's know how to build a record from their - // components. + // ListExpr's know how to build a record from their components. return init_list->InitType(); - auto t = e0->InitType(); - - if ( t ) - t = {NewRef{}, reduce_type(t.get())}; - - if ( ! t ) - return nullptr; - - for ( int i = 1; t && i < el.length(); ++i ) - { - auto el_t = el[i]->InitType(); - TypePtr ti; - - if ( el_t ) - ti = {NewRef{}, reduce_type(el_t.get())}; - - if ( ! ti ) - return nullptr; - - if ( same_type(t, ti) ) - continue; - - t = merge_types(t, ti); - } - - if ( ! t ) - { - init->Error("type error in initialization"); - return nullptr; - } - - if ( t->Tag() == TYPE_TABLE && ! t->AsTableType()->IsSet() ) - // A list of table elements. - return t; - - // A set. If the index type isn't yet a type list, make - // it one, as that's what's required for creating a set type. - if ( t->Tag() != TYPE_LIST ) - { - auto tl = make_intrusive(t); - tl->Append(std::move(t)); - t = std::move(tl); - } - - return make_intrusive(cast_intrusive(std::move(t)), nullptr); + if ( e0->Tag() == detail::EXPR_ASSIGN ) + return init_table_type(init_list); + else + return init_set_type(init_list); } bool is_atomic_type(const Type& t) diff --git a/src/Type.h b/src/Type.h index f29180fb0b..dd4dd80b91 100644 --- a/src/Type.h +++ b/src/Type.h @@ -49,21 +49,19 @@ enum TypeTag TYPE_STRING, // 7 TYPE_PATTERN, // 8 TYPE_ENUM, // 9 - TYPE_TIMER, // 10 - TYPE_PORT, // 11 - TYPE_ADDR, // 12 - TYPE_SUBNET, // 13 - TYPE_ANY, // 14 - TYPE_TABLE, // 15 - TYPE_UNION, // 16 - TYPE_RECORD, // 17 - TYPE_LIST, // 18 - TYPE_FUNC, // 19 - TYPE_FILE, // 20 - TYPE_VECTOR, // 21 - TYPE_OPAQUE, // 22 - TYPE_TYPE, // 23 - TYPE_ERROR // 24 + TYPE_PORT, // 10 + TYPE_ADDR, // 11 + TYPE_SUBNET, // 12 + TYPE_ANY, // 13 + TYPE_TABLE, // 14 + TYPE_RECORD, // 15 + TYPE_LIST, // 16 + TYPE_FUNC, // 17 + TYPE_FILE, // 18 + TYPE_VECTOR, // 19 + TYPE_OPAQUE, // 20 + TYPE_TYPE, // 21 + TYPE_ERROR // 22 #define NUM_TYPES (int(TYPE_ERROR) + 1) }; @@ -126,10 +124,8 @@ constexpr InternalTypeTag to_internal_type_tag(TypeTag tag) noexcept return TYPE_INTERNAL_SUBNET; case TYPE_PATTERN: - case TYPE_TIMER: case TYPE_ANY: case TYPE_TABLE: - case TYPE_UNION: case TYPE_RECORD: case TYPE_LIST: case TYPE_FUNC: @@ -410,6 +406,12 @@ public: // Returns true if this table type is "unspecified", which is // what one gets using an empty "set()" or "table()" constructor. bool IsUnspecifiedTable() const; + +private: + bool DoExpireCheck(const detail::AttrPtr& attr); + + // Used to prevent repeated error messages. + bool reported_error = false; }; class SetType final : public TableType @@ -542,6 +544,9 @@ protected: std::optional captures; // if nil then no captures specified // Used for internal lambdas built for "when" statements: bool expressionless_return_okay = false; + + // Used to prevent repeated error messages. + bool reported_error = false; }; class TypeType final : public Type @@ -904,7 +909,7 @@ TypePtr merge_types(const TypePtr& t1, const TypePtr& t2); TypePtr merge_type_list(detail::ListExpr* elements); // Given an expression, infer its type when used for an initialization. -TypePtr init_type(detail::Expr* init); +TypePtr init_type(const detail::ExprPtr& init); // Returns true if argument is an atomic type. bool is_atomic_type(const Type& t); @@ -951,7 +956,7 @@ inline bool IsInterval(TypeTag t) // True if the given type tag corresponds to a record type. inline bool IsRecord(TypeTag t) { - return (t == TYPE_RECORD || t == TYPE_UNION); + return (t == TYPE_RECORD); } // True if the given type tag corresponds to a function type. diff --git a/src/Val.cc b/src/Val.cc index d476f32a6c..1d0055f71b 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -420,8 +420,9 @@ static void BuildJSON(threading::formatter::JSON::NullDoubleWriter& writer, Val* } rapidjson::Value j; + auto tag = val->GetType()->Tag(); - switch ( val->GetType()->Tag() ) + switch ( tag ) { case TYPE_BOOL: writer.Bool(val->AsBool()); @@ -475,8 +476,15 @@ static void BuildJSON(threading::formatter::JSON::NullDoubleWriter& writer, Val* ODesc d; d.SetStyle(RAW_STYLE); val->Describe(&d); - writer.String(util::json_escape_utf8( - std::string(reinterpret_cast(d.Bytes()), d.Len()))); + std::string desc(reinterpret_cast(d.Bytes()), d.Len()); + + // None of our function types should have surrounding + // whitespace, but ODesc might produce it due to its + // many output modes and flags. Strip it. + if ( tag == TYPE_FUNC ) + desc = util::strstrip(desc); + + writer.String(util::json_escape_utf8(desc)); break; } @@ -939,9 +947,7 @@ StringVal::StringVal(int length, const char* s) { } -StringVal::StringVal(const char* s) : StringVal(new String(s)) { } - -StringVal::StringVal(const string& s) : StringVal(s.length(), s.data()) { } +StringVal::StringVal(std::string_view s) : StringVal(s.length(), s.data()) { } StringVal::~StringVal() { @@ -1770,57 +1776,6 @@ bool TableVal::IsSubsetOf(const TableVal& tv) const return true; } -bool TableVal::ExpandAndInit(ValPtr index, ValPtr new_val) - { - const auto& index_type = index->GetType(); - - if ( index_type->IsSet() ) - { - index = index->AsTableVal()->ToListVal(); - return ExpandAndInit(std::move(index), std::move(new_val)); - } - - if ( index_type->Tag() != TYPE_LIST ) - // Nothing to expand. - return CheckAndAssign(std::move(index), std::move(new_val)); - - ListVal* iv = index->AsListVal(); - if ( iv->BaseTag() != TYPE_ANY ) - { - if ( table_type->GetIndices()->GetTypes().size() != 1 ) - reporter->InternalError("bad singleton list index"); - - for ( int i = 0; i < iv->Length(); ++i ) - if ( ! ExpandAndInit(iv->Idx(i), new_val) ) - return false; - - return true; - } - - else - { // Compound table. - int i; - - for ( i = 0; i < iv->Length(); ++i ) - { - const auto& v = iv->Idx(i); - // ### if CompositeHash::ComputeHash did flattening - // of 1-element lists (like ComputeSingletonHash does), - // then we could optimize here. - const auto& t = v->GetType(); - - if ( t->IsSet() || t->Tag() == TYPE_LIST ) - break; - } - - if ( i >= iv->Length() ) - // Nothing to expand. - return CheckAndAssign(std::move(index), std::move(new_val)); - else - return ExpandCompoundAndInit(iv, i, std::move(new_val)); - } - } - ValPtr TableVal::Default(const ValPtr& index) { const auto& def_attr = GetAttr(detail::ATTR_DEFAULT); @@ -2145,15 +2100,7 @@ void TableVal::SendToStore(const Val* index, const TableEntryVal* new_entry_val, case ELEMENT_NEW: case ELEMENT_CHANGED: { -#ifndef __clang__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" -#endif - broker::optional expiry; -#ifndef __clang__ -#pragma GCC diagnostic pop -#endif - + std::optional expiry; auto expire_time = GetExpireTime(); if ( expire_time == 0 ) // Entry is set to immediately expire. Let's not forward it. @@ -2456,49 +2403,6 @@ void TableVal::Describe(ODesc* d) const } } -bool TableVal::ExpandCompoundAndInit(ListVal* lv, int k, ValPtr new_val) - { - Val* ind_k_v = lv->Idx(k).get(); - auto ind_k = ind_k_v->GetType()->IsSet() ? ind_k_v->AsTableVal()->ToListVal() - : ListValPtr{NewRef{}, ind_k_v->AsListVal()}; - - for ( int i = 0; i < ind_k->Length(); ++i ) - { - const auto& ind_k_i = ind_k->Idx(i); - auto expd = make_intrusive(TYPE_ANY); - - for ( auto j = 0; j < lv->Length(); ++j ) - { - const auto& v = lv->Idx(j); - - if ( j == k ) - expd->Append(ind_k_i); - else - expd->Append(v); - } - - if ( ! ExpandAndInit(std::move(expd), new_val) ) - return false; - } - - return true; - } - -bool TableVal::CheckAndAssign(ValPtr index, ValPtr new_val) - { - Val* v = nullptr; - if ( subnets ) - // We need an exact match here. - v = (Val*)subnets->Lookup(index.get(), true); - else - v = Find(index).get(); - - if ( v ) - index->Warn("multiple initializations for index"); - - return Assign(std::move(index), std::move(new_val)); - } - void TableVal::InitDefaultFunc(detail::Frame* f) { // Value aready initialized. @@ -2511,6 +2415,14 @@ void TableVal::InitDefaultFunc(detail::Frame* f) return; const auto& ytype = GetType()->Yield(); + + if ( ! ytype ) + // This happens for empty table() constructors. Don't + // instantiate a default value at this point, as we'll + // first need to type-check the attribute when the value + // is finally used. + return; + const auto& dtype = def_attr->GetExpr()->GetType(); if ( dtype->Tag() == TYPE_RECORD && ytype->Tag() == TYPE_RECORD && ! same_type(dtype, ytype) && @@ -2969,13 +2881,12 @@ ValPtr RecordVal::GetFieldOrDefault(const char* field) const return GetFieldOrDefault(idx); } -RecordValPtr RecordVal::CoerceTo(RecordTypePtr t, RecordValPtr aggr, bool allow_orphaning) const +RecordValPtr RecordVal::DoCoerceTo(RecordTypePtr t, bool allow_orphaning) const { if ( ! record_promotion_compatible(t.get(), GetType()->AsRecordType()) ) return nullptr; - if ( ! aggr ) - aggr = make_intrusive(std::move(t)); + auto aggr = make_intrusive(std::move(t)); RecordType* ar_t = aggr->GetType()->AsRecordType(); const RecordType* rv_t = GetType()->AsRecordType(); @@ -3033,7 +2944,7 @@ RecordValPtr RecordVal::CoerceTo(RecordTypePtr t, bool allow_orphaning) if ( same_type(GetType(), t) ) return {NewRef{}, this}; - return CoerceTo(std::move(t), nullptr, allow_orphaning); + return DoCoerceTo(std::move(t), allow_orphaning); } TableValPtr RecordVal::GetRecordFieldsVal() const @@ -3326,19 +3237,10 @@ bool VectorVal::Insert(unsigned int index, ValPtr element) auto n = vector_val->size(); if ( index < n ) - { // May need to delete previous element + { // Find location within existing vector elements. it = std::next(vector_val->begin(), index); if ( yield_types ) - { - if ( *it ) - ZVal::DeleteIfManaged(**it, element->GetType()); types_it = std::next(yield_types->begin(), index); - } - else if ( managed_yield ) - { - if ( *it ) - ZVal::DeleteManagedType(**it); - } } else { diff --git a/src/Val.h b/src/Val.h index e5ff5fcb77..b7cae59690 100644 --- a/src/Val.h +++ b/src/Val.h @@ -47,6 +47,7 @@ class PrefixTable; class CompositeHash; class HashKey; +class ValTrace; class ZBody; } // namespace detail @@ -527,8 +528,7 @@ class StringVal final : public Val { public: explicit StringVal(String* s); - explicit StringVal(const char* s); - explicit StringVal(const std::string& s); + StringVal(std::string_view s); StringVal(int length, const char* s); ~StringVal() override; @@ -822,17 +822,14 @@ public: // Returns true if this set contains the same members as the // given set. Note that comparisons are done using hash keys, // so errors can arise for compound sets such as sets-of-sets. - // See https://bro-tracker.atlassian.net/browse/BIT-1949. + // See https://github.com/zeek/zeek/issues/151. bool EqualTo(const TableVal& v) const; + bool EqualTo(const TableValPtr& v) const { return EqualTo(*(v.get())); } // Returns true if this set is a subset (not necessarily proper) // of the given set. bool IsSubsetOf(const TableVal& v) const; - // Expands any lists in the index into multiple initializations. - // Returns true if the initializations typecheck, false if not. - bool ExpandAndInit(ValPtr index, ValPtr new_val); - /** * Finds an index in the table and returns its associated value. * @param index The index to lookup in the table. @@ -939,7 +936,7 @@ public: void InitTimer(double delay); void DoExpire(double t); - // If the &default attribute is not a function, or the functon has + // If the &default attribute is not a function, or the function has // already been initialized, this does nothing. Otherwise, evaluates // the function in the frame allowing it to capture its closure. void InitDefaultFunc(detail::Frame* f); @@ -1004,8 +1001,6 @@ protected: void RebuildTable(ParseTimeTableState ptts); void CheckExpireAttr(detail::AttrTag at); - bool ExpandCompoundAndInit(ListVal* lv, int k, ValPtr new_val); - bool CheckAndAssign(ValPtr index, ValPtr new_val); // Calculates default value for index. Returns nullptr if none. ValPtr Default(const ValPtr& index); @@ -1355,23 +1350,21 @@ public: TableValPtr GetRecordFieldsVal() const; // This is an experiment to associate a Obj within the - // event engine to a record value in bro script. + // event engine to a record value in Zeek script. void SetOrigin(Obj* o) { origin = o; } Obj* GetOrigin() const { return origin; } // Returns a new value representing the value coerced to the given - // type. If coercion is not possible, returns 0. The non-const + // type. If coercion is not possible, returns nil. The non-const // version may return the current value ref'ed if its type matches // directly. // - // *aggr* is optional; if non-zero, we add to it. See - // Expr::InitVal(). We leave it out in the non-const version to make - // the choice unambigious. - // // The *allow_orphaning* parameter allows for a record to be demoted // down to a record type that contains less fields. - RecordValPtr CoerceTo(RecordTypePtr other, RecordValPtr aggr, - bool allow_orphaning = false) const; + RecordValPtr CoerceTo(RecordTypePtr other, bool allow_orphaning = false) const + { + return DoCoerceTo(other, allow_orphaning); + } RecordValPtr CoerceTo(RecordTypePtr other, bool allow_orphaning = false); [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " @@ -1389,8 +1382,11 @@ public: static void DoneParsing(); protected: + friend class zeek::detail::ValTrace; friend class zeek::detail::ZBody; + RecordValPtr DoCoerceTo(RecordTypePtr other, bool allow_orphaning) const; + /** * Appends a value to the record's fields. The caller is responsible * for ensuring that fields are appended in the correct order and @@ -1408,9 +1404,9 @@ protected: record_val->emplace_back(std::nullopt); } - // For use by low-level ZAM instructions. Caller assumes - // responsibility for memory management. The first version - // allows manipulation of whether the field is present at all. + // For internal use by low-level ZAM instructions and event tracing. + // Caller assumes responsibility for memory management. The first + // version allows manipulation of whether the field is present at all. // The second version ensures that the optional value is present. std::optional& RawOptField(int field) { return (*record_val)[field]; } @@ -1621,10 +1617,13 @@ public: } const String* StringAt(unsigned int index) const { return StringValAt(index)->AsString(); } - // Only intended for low-level access by compiled code. + // Only intended for low-level access by internal or compiled code. const auto& RawVec() const { return vector_val; } auto& RawVec() { return vector_val; } + const auto& RawYieldType() const { return yield_type; } + const auto& RawYieldTypes() const { return yield_types; } + protected: /** * Returns the element at a given index or nullptr if it does not exist. diff --git a/src/Var.cc b/src/Var.cc index ce1027b77e..dba0e620fb 100644 --- a/src/Var.cc +++ b/src/Var.cc @@ -23,18 +23,6 @@ namespace zeek::detail { -static ValPtr init_val(ExprPtr init, TypePtr t, ValPtr aggr) - { - try - { - return init->InitVal(t, std::move(aggr)); - } - catch ( InterpreterException& e ) - { - return nullptr; - } - } - static bool add_prototype(const IDPtr& id, Type* t, std::vector* attrs, const ExprPtr& init) { @@ -129,9 +117,103 @@ static bool add_prototype(const IDPtr& id, Type* t, std::vector* attrs, return true; } +static void initialize_var(const IDPtr& id, InitClass c, ExprPtr init) + { + if ( ! id->HasVal() ) + { + if ( c == INIT_REMOVE ) + return; + + bool no_init = ! init; + + if ( ! no_init && init->Tag() == EXPR_LIST ) + no_init = init->AsListExpr()->Exprs().empty(); + + if ( no_init ) + { + auto& t = id->GetType(); + + if ( ! IsAggr(t) ) + return; + + ValPtr init_val; + + if ( t->Tag() == TYPE_RECORD ) + { + try + { + init_val = make_intrusive(cast_intrusive(t)); + } + catch ( InterpreterException& ) + { + id->Error("initialization failed"); + return; + } + } + + else if ( t->Tag() == TYPE_TABLE ) + init_val = make_intrusive(cast_intrusive(t), id->GetAttrs()); + + else if ( t->Tag() == TYPE_VECTOR ) + init_val = make_intrusive(cast_intrusive(t)); + + id->SetVal(init_val); + return; + } + + if ( c == INIT_EXTRA ) + c = INIT_FULL; + } + + bool is_const = id->IsConst() || id->IsOption(); + auto lhs = make_intrusive(id, is_const); + ExprPtr assignment; + + if ( c == INIT_FULL ) + assignment = make_intrusive(lhs, init, false); + else if ( c == INIT_EXTRA ) + assignment = make_intrusive(lhs, init); + else if ( c == INIT_REMOVE ) + assignment = make_intrusive(lhs, init); + else + reporter->InternalError("bad InitClass in initialize_var"); + + if ( assignment->IsError() ) + return; + + try + { + (void)assignment->Eval(nullptr); + } + catch ( InterpreterException& ) + { + id->Error("initialization failed"); + } + } + static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, std::unique_ptr> attr, DeclType dt, bool do_init) { + if ( c == INIT_NONE && init ) + { + // This can happen because the grammar allows any "init_class", + // including none, to be followed by an expression. + init->Warn("initialization not preceded by =/+=/-= is deprecated"); + + // The historical instances of these, such as the + // language/redef-same-prefixtable-idx.zeek btest, treat + // this as += rather than =, and with the initializer + // implicitly inside a list. + init = make_intrusive(init); + c = INIT_EXTRA; + } + + if ( init && init->Tag() == EXPR_LIST ) + { + auto& init_t = t ? t : id->GetType(); + init = expand_op(cast_intrusive(init), init_t); + } + if ( id->GetType() ) { if ( id->IsRedefinable() || (! init && attr && ! IsFunc(id->GetType()->Tag())) ) @@ -166,7 +248,7 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, if ( id->GetType() && id->GetType()->Tag() != TYPE_ERROR ) { - if ( dt != VAR_REDEF && (! init || ! do_init || (! t && ! (t = init_type(init.get())))) ) + if ( dt != VAR_REDEF && (! init || ! do_init || (! t && ! (t = init_type(init)))) ) { id->Error("already defined", init.get()); return; @@ -205,7 +287,7 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, return; } - t = init_type(init.get()); + t = init_type(init); if ( ! t ) { id->SetType(error_type()); @@ -228,6 +310,8 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, auto* ctor = static_cast(init.get()); if ( ctor->GetAttrs() ) id->AddAttrs(ctor->GetAttrs()); + else + ctor->SetAttrs(id->GetAttrs()); } break; @@ -236,6 +320,8 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, auto* ctor = static_cast(init.get()); if ( ctor->GetAttrs() ) id->AddAttrs(ctor->GetAttrs()); + else + ctor->SetAttrs(id->GetAttrs()); } break; @@ -258,48 +344,7 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init, id->SetVal(init, c); else if ( dt != VAR_REDEF || init || ! attr ) - { - ValPtr aggr; - - if ( t->Tag() == TYPE_RECORD ) - { - try - { - aggr = make_intrusive(cast_intrusive(t)); - } - catch ( InterpreterException& ) - { - id->Error("initialization failed"); - return; - } - - if ( init && t ) - // Have an initialization and type is not deduced. - init = make_intrusive( - std::move(init), IntrusivePtr{NewRef{}, t->AsRecordType()}); - } - - else if ( t->Tag() == TYPE_TABLE ) - aggr = make_intrusive(cast_intrusive(t), id->GetAttrs()); - - else if ( t->Tag() == TYPE_VECTOR ) - aggr = make_intrusive(cast_intrusive(t)); - - ValPtr v; - - if ( init ) - { - v = init_val(init, t, aggr); - - if ( ! v ) - return; - } - - if ( aggr ) - id->SetVal(std::move(aggr), c); - else if ( v ) - id->SetVal(std::move(v), c); - } + initialize_var(id, c, init); } if ( dt == VAR_CONST ) diff --git a/src/ZVal.cc b/src/ZVal.cc index d2a1ca684c..4b27d24045 100644 --- a/src/ZVal.cc +++ b/src/ZVal.cc @@ -104,8 +104,6 @@ ZVal::ZVal(ValPtr v, const TypePtr& t) break; case TYPE_ERROR: - case TYPE_TIMER: - case TYPE_UNION: case TYPE_VOID: reporter->InternalError("bad type in ZVal constructor"); } @@ -185,8 +183,6 @@ ZVal::ZVal(const TypePtr& t) break; case TYPE_ERROR: - case TYPE_TIMER: - case TYPE_UNION: case TYPE_VOID: reporter->InternalError("bad type in ZVal constructor"); } @@ -275,8 +271,6 @@ ValPtr ZVal::ToVal(const TypePtr& t) const break; case TYPE_ERROR: - case TYPE_TIMER: - case TYPE_UNION: case TYPE_VOID: default: v = nullptr; diff --git a/src/ZeekString.cc b/src/ZeekString.cc index b62c661b1b..d59c58ff24 100644 --- a/src/ZeekString.cc +++ b/src/ZeekString.cc @@ -47,12 +47,7 @@ String::String(const u_char* str, int arg_n, bool add_NUL) : String() Set(str, arg_n, add_NUL); } -String::String(const char* str) : String() - { - Set(str); - } - -String::String(const std::string& str) : String() +String::String(std::string_view str) : String() { Set(str); } @@ -147,31 +142,21 @@ void String::Set(const u_char* str, int len, bool add_NUL) use_free_to_delete = false; } -void String::Set(const char* str) +void String::Set(std::string_view str) { Reset(); - if ( str ) + if ( str.data() ) { - n = strlen(str); + n = str.size(); b = new u_char[n + 1]; - memcpy(b, str, n + 1); + memcpy(b, str.data(), n); + b[n] = 0; final_NUL = true; use_free_to_delete = false; } } -void String::Set(const std::string& str) - { - Reset(); - - n = str.size(); - b = new u_char[n + 1]; - memcpy(b, str.c_str(), n + 1); - final_NUL = true; - use_free_to_delete = false; - } - void String::Set(const String& str) { *this = str; diff --git a/src/ZeekString.h b/src/ZeekString.h index 9c0845dd0e..e92574c2ec 100644 --- a/src/ZeekString.h +++ b/src/ZeekString.h @@ -43,8 +43,7 @@ public: // Constructors creating internal copies of the data passed in. String(const u_char* str, int arg_n, bool add_NUL); - explicit String(const char* str); - explicit String(const std::string& str); + String(std::string_view str); String(const String& bs); // Constructor that takes owernship of the vector passed in. @@ -72,8 +71,7 @@ public: // contents to a copy of the string given by the arguments. // void Set(const u_char* str, int len, bool add_NUL = true); - void Set(const char* str); - void Set(const std::string& str); + void Set(std::string_view str); void Set(const String& str); void SetUseFreeToDelete(int use_it) { use_free_to_delete = use_it; } @@ -102,7 +100,7 @@ public: static constexpr int EXPANDED_STRING = // the original style ESC_HEX; - static constexpr int BRO_STRING_LITERAL = // as in a Bro string literal + static constexpr int BRO_STRING_LITERAL = // as in a Bro/Zeek string literal ESC_ESC | ESC_QUOT | ESC_HEX; // Renders a string into a newly allocated character array that @@ -193,7 +191,7 @@ extern int Bstr_cmp(const String* s1, const String* s2); // which would be necessary if String were used. Unlike String, // the string should not be deallocated on destruction. // -// "BroConstString" might be a better name here. +// "ZeekConstString" might be a better name here. struct data_chunk_t { diff --git a/src/analyzer/Analyzer.cc b/src/analyzer/Analyzer.cc index 703d378d7d..19b854a957 100644 --- a/src/analyzer/Analyzer.cc +++ b/src/analyzer/Analyzer.cc @@ -685,20 +685,19 @@ void Analyzer::ProtocolConfirmation(zeek::Tag arg_tag) protocol_confirmed = true; - if ( ! protocol_confirmation ) - return; - const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal(); // Enqueue both of these events. In the base scripts, only the analyzer version is handled. // The protocol remains just for handling scripts that haven't been updated. Once that event // is removed, this method is also removed. - event_mgr.Enqueue(analyzer_confirmation, ConnVal(), tval, val_mgr->Count(id)); - event_mgr.Enqueue(protocol_confirmation, ConnVal(), tval, val_mgr->Count(id)); + if ( analyzer_confirmation ) + event_mgr.Enqueue(analyzer_confirmation, ConnVal(), tval, val_mgr->Count(id)); + if ( protocol_confirmation ) + event_mgr.Enqueue(protocol_confirmation, ConnVal(), tval, val_mgr->Count(id)); } void Analyzer::ProtocolViolation(const char* reason, const char* data, int len) { - if ( ! protocol_violation ) + if ( ! protocol_violation && ! analyzer_violation ) return; StringValPtr r; @@ -717,8 +716,10 @@ void Analyzer::ProtocolViolation(const char* reason, const char* data, int len) // Enqueue both of these events. In the base scripts, only the analyzer version is handled. // The protocol remains just for handling scripts that haven't been updated. Once that event // is removed, this method is also removed. - event_mgr.Enqueue(analyzer_violation, ConnVal(), tval, val_mgr->Count(id), std::move(r)); - event_mgr.Enqueue(protocol_violation, ConnVal(), tval, val_mgr->Count(id), std::move(r)); + if ( analyzer_confirmation ) + event_mgr.Enqueue(analyzer_violation, ConnVal(), tval, val_mgr->Count(id), std::move(r)); + if ( protocol_confirmation ) + event_mgr.Enqueue(protocol_violation, ConnVal(), tval, val_mgr->Count(id), std::move(r)); } void Analyzer::AnalyzerConfirmation(zeek::Tag arg_tag) @@ -735,7 +736,7 @@ void Analyzer::AnalyzerConfirmation(zeek::Tag arg_tag) event_mgr.Enqueue(analyzer_confirmation, ConnVal(), tval, val_mgr->Count(id)); } -void Analyzer::AnalyzerViolation(const char* reason, const char* data, int len) +void Analyzer::AnalyzerViolation(const char* reason, const char* data, int len, zeek::Tag arg_tag) { if ( ! analyzer_violation ) return; @@ -752,7 +753,7 @@ void Analyzer::AnalyzerViolation(const char* reason, const char* data, int len) else r = make_intrusive(reason); - const auto& tval = tag.AsVal(); + const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal(); event_mgr.Enqueue(analyzer_violation, ConnVal(), tval, val_mgr->Count(id), std::move(r)); } diff --git a/src/analyzer/Analyzer.h b/src/analyzer/Analyzer.h index 3d84632374..12fa44848f 100644 --- a/src/analyzer/Analyzer.h +++ b/src/analyzer/Analyzer.h @@ -523,7 +523,7 @@ public: void RemoveSupportAnalyzer(SupportAnalyzer* analyzer); /** - * Signals Bro's protocol detection that the analyzer has recognized + * Signals Zeek's protocol detection that the analyzer has recognized * the input to indeed conform to the expected protocol. This should * be called as early as possible during a connection's life-time. It * may turn into \c protocol_confirmed event at the script-layer (but @@ -537,7 +537,7 @@ public: ProtocolConfirmation(zeek::Tag tag = zeek::Tag()); /** - * Signals Bro's protocol detection that the analyzer has found a + * Signals Zeek's protocol detection that the analyzer has found a * severe protocol violation that could indicate that it's not * parsing the expected protocol. This turns into \c * protocol_violation events at the script-layer (one such event is @@ -577,7 +577,7 @@ public: virtual void AnalyzerConfirmation(zeek::Tag tag = zeek::Tag()); /** - * Signals Bro's protocol detection that the analyzer has found a + * Signals Zeek's protocol detection that the analyzer has found a * severe protocol violation that could indicate that it's not * parsing the expected protocol. This turns into \c * analyzer_violation events at the script-layer (one such event is @@ -590,8 +590,12 @@ public: * @param data An optional pointer to the malformed data. * * @param len If \a data is given, the length of it. + * + * @param tag If tag is given, it overrides the analyzer tag passed to the + * scripting layer; the default is the one of the analyzer itself. */ - virtual void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0); + virtual void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0, + zeek::Tag tag = zeek::Tag()); /** * Returns true if ProtocolConfirmation() has been called at least @@ -605,6 +609,8 @@ public: * use this method to attach additional data to the connections. A * call to BuildConnVal() will in turn trigger a call to * UpdateConnVal(). + * TODO: The above comment needs updating, there's no BuildConnVal() + * anymore -VP * * @param conn_val The connenction value being updated. */ @@ -683,7 +689,7 @@ protected: * * @param t The absolute time when the timer will fire. * - * @param do_expire If true, the timer will also fire when Bro + * @param do_expire If true, the timer will also fire when Zeek * terminates even if \a t has not been reache yet. * * @param type The timer's type. @@ -899,7 +905,7 @@ private: SupportAnalyzer* sibling; }; -// The following need to be consistent with bro.init. +// The following need to be consistent with zeek.init. #define CONTENTS_NONE 0 #define CONTENTS_ORIG 1 #define CONTENTS_RESP 2 diff --git a/src/analyzer/Component.h b/src/analyzer/Component.h index 6588c512c9..323d31a823 100644 --- a/src/analyzer/Component.h +++ b/src/analyzer/Component.h @@ -83,7 +83,7 @@ public: /** * Returns whether the analyzer supports partial connections. Partial - * connections are those where Bro starts processing payload + * connections are those where Zeek starts processing payload * mid-stream, after missing the beginning. */ bool Partial() const { return partial; } diff --git a/src/analyzer/Manager.h b/src/analyzer/Manager.h index 74938c8941..c510e2d921 100644 --- a/src/analyzer/Manager.h +++ b/src/analyzer/Manager.h @@ -3,7 +3,7 @@ /** * The central management unit for registering and instantiating analyzers. * - * For each protocol that Bro supports, there's one class derived from + * For each protocol that Zeek supports, there's one class derived from * analyzer::Analyzer. Once we have decided that a connection's payload is to * be parsed as a given protocol, we instantiate the corresponding * analyzer-derived class and add the new instance as a child node into the @@ -71,7 +71,7 @@ public: /** * Second-stage initialization of the manager. This is called late - * during Bro's initialization after any scripts are processed. + * during Zeek's initialization after any scripts are processed. */ void InitPostScript(); diff --git a/src/analyzer/protocol/bittorrent/BitTorrent.cc b/src/analyzer/protocol/bittorrent/BitTorrent.cc index 74c6ff269f..143658743e 100644 --- a/src/analyzer/protocol/bittorrent/BitTorrent.cc +++ b/src/analyzer/protocol/bittorrent/BitTorrent.cc @@ -36,10 +36,7 @@ void BitTorrent_Analyzer::DeliverStream(int len, const u_char* data, bool orig) analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - - if ( TCP()->IsPartial() ) - // punt on partial. + if ( TCP() && TCP()->IsPartial() ) return; if ( this_stop ) diff --git a/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc b/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc index 92de0788c8..09f86a54fa 100644 --- a/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc +++ b/src/analyzer/protocol/bittorrent/BitTorrentTracker.cc @@ -80,10 +80,7 @@ void BitTorrentTracker_Analyzer::DeliverStream(int len, const u_char* data, bool { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - - if ( TCP()->IsPartial() ) - // punt on partial. + if ( TCP() && TCP()->IsPartial() ) return; if ( orig ) diff --git a/src/analyzer/protocol/dce-rpc/DCE_RPC.cc b/src/analyzer/protocol/dce-rpc/DCE_RPC.cc index 36678f67ad..109542371e 100644 --- a/src/analyzer/protocol/dce-rpc/DCE_RPC.cc +++ b/src/analyzer/protocol/dce-rpc/DCE_RPC.cc @@ -50,8 +50,6 @@ void DCE_RPC_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( had_gap ) // If only one side had a content gap, we could still try to // deliver data to the other side if the script layer can handle this. diff --git a/src/analyzer/protocol/ftp/FTP.cc b/src/analyzer/protocol/ftp/FTP.cc index c8b3e6f2d7..594da3606a 100644 --- a/src/analyzer/protocol/ftp/FTP.cc +++ b/src/analyzer/protocol/ftp/FTP.cc @@ -45,11 +45,14 @@ void FTP_Analyzer::Done() { analyzer::tcp::TCP_ApplicationAnalyzer::Done(); - if ( nvt_orig->HasPartialLine() && - (TCP()->OrigState() == analyzer::tcp::TCP_ENDPOINT_CLOSED || - TCP()->OrigPrevState() == analyzer::tcp::TCP_ENDPOINT_CLOSED) ) - // ### should include the partial text - Weird("partial_ftp_request"); + if ( TCP() ) + { + if ( nvt_orig->HasPartialLine() && + (TCP()->OrigState() == analyzer::tcp::TCP_ENDPOINT_CLOSED || + TCP()->OrigPrevState() == analyzer::tcp::TCP_ENDPOINT_CLOSED) ) + // ### should include the partial text + Weird("partial_ftp_request"); + } } static uint32_t get_reply_code(int len, const char* line) diff --git a/src/analyzer/protocol/gssapi/GSSAPI.cc b/src/analyzer/protocol/gssapi/GSSAPI.cc index 6730e6bafd..6201d99f5d 100644 --- a/src/analyzer/protocol/gssapi/GSSAPI.cc +++ b/src/analyzer/protocol/gssapi/GSSAPI.cc @@ -38,8 +38,6 @@ void GSSAPI_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - try { interp->NewData(orig, data, data + len); diff --git a/src/analyzer/protocol/imap/IMAP.cc b/src/analyzer/protocol/imap/IMAP.cc index 3d074de875..43ab073b50 100644 --- a/src/analyzer/protocol/imap/IMAP.cc +++ b/src/analyzer/protocol/imap/IMAP.cc @@ -47,8 +47,7 @@ void IMAP_Analyzer::DeliverStream(int len, const u_char* data, bool orig) return; } - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/krb/KRB_TCP.cc b/src/analyzer/protocol/krb/KRB_TCP.cc index 8dd5124eef..dc03e79f4b 100644 --- a/src/analyzer/protocol/krb/KRB_TCP.cc +++ b/src/analyzer/protocol/krb/KRB_TCP.cc @@ -39,8 +39,7 @@ void KRB_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/login/Login.cc b/src/analyzer/protocol/login/Login.cc index 8be877acc4..db90dc359c 100644 --- a/src/analyzer/protocol/login/Login.cc +++ b/src/analyzer/protocol/login/Login.cc @@ -123,8 +123,8 @@ void Login_Analyzer::NewLine(bool orig, char* line) if ( state == LOGIN_STATE_AUTHENTICATE ) { - if ( TCP()->OrigState() == analyzer::tcp::TCP_ENDPOINT_PARTIAL || - TCP()->RespState() == analyzer::tcp::TCP_ENDPOINT_PARTIAL ) + if ( TCP() && (TCP()->OrigState() == analyzer::tcp::TCP_ENDPOINT_PARTIAL || + TCP()->RespState() == analyzer::tcp::TCP_ENDPOINT_PARTIAL) ) state = LOGIN_STATE_CONFUSED; // unknown login state else { diff --git a/src/analyzer/protocol/login/RSH.cc b/src/analyzer/protocol/login/RSH.cc index 35cb65b27d..34cdd92ba6 100644 --- a/src/analyzer/protocol/login/RSH.cc +++ b/src/analyzer/protocol/login/RSH.cc @@ -34,10 +34,13 @@ Contents_Rsh_Analyzer::~Contents_Rsh_Analyzer() { } void Contents_Rsh_Analyzer::DoDeliver(int len, const u_char* data) { - auto* tcp = static_cast(Parent())->TCP(); - assert(tcp); + int endp_state; - int endp_state = IsOrig() ? tcp->OrigState() : tcp->RespState(); + if ( auto* tcp = static_cast(Parent())->TCP() ) + endp_state = IsOrig() ? tcp->OrigState() : tcp->RespState(); + else + endp_state = tcp::TCP_ENDPOINT_ESTABLISHED; // no TCP parent, assume somebody's feeding us a + // legitimate stream for ( ; len > 0; --len, ++data ) { diff --git a/src/analyzer/protocol/login/Rlogin.cc b/src/analyzer/protocol/login/Rlogin.cc index 7543a437f6..771c121874 100644 --- a/src/analyzer/protocol/login/Rlogin.cc +++ b/src/analyzer/protocol/login/Rlogin.cc @@ -30,10 +30,13 @@ Contents_Rlogin_Analyzer::~Contents_Rlogin_Analyzer() { } void Contents_Rlogin_Analyzer::DoDeliver(int len, const u_char* data) { - auto* tcp = static_cast(Parent())->TCP(); - assert(tcp); + int endp_state; - int endp_state = IsOrig() ? tcp->OrigState() : tcp->RespState(); + if ( auto* tcp = static_cast(Parent())->TCP() ) + endp_state = IsOrig() ? tcp->OrigState() : tcp->RespState(); + else + endp_state = tcp::TCP_ENDPOINT_ESTABLISHED; // no TCP parent, assume somebody's feeding us a + // legitimate stream for ( ; len > 0; --len, ++data ) { diff --git a/src/analyzer/protocol/mime/MIME.cc b/src/analyzer/protocol/mime/MIME.cc index 00517beac7..51870e1c00 100644 --- a/src/analyzer/protocol/mime/MIME.cc +++ b/src/analyzer/protocol/mime/MIME.cc @@ -11,7 +11,7 @@ // Here are a few things to do: // -// 1. Add a Bro internal function 'stop_deliver_data_of_entity' so +// 1. Add a Zeek internal function 'stop_deliver_data_of_entity' so // that the engine does not decode and deliver further data for the // entity (which may speed up the engine by avoiding copying). // diff --git a/src/analyzer/protocol/mime/MIME.h b/src/analyzer/protocol/mime/MIME.h index 0fde98a44c..cdde47c5c7 100644 --- a/src/analyzer/protocol/mime/MIME.h +++ b/src/analyzer/protocol/mime/MIME.h @@ -191,7 +191,7 @@ protected: // The reason I separate MIME_Message as an abstract class is to // present the *interface* separated from its implementation to -// generate Bro events. +// generate Zeek events. class MIME_Message { diff --git a/src/analyzer/protocol/mqtt/MQTT.cc b/src/analyzer/protocol/mqtt/MQTT.cc index cf55b04e34..2917b233e3 100644 --- a/src/analyzer/protocol/mqtt/MQTT.cc +++ b/src/analyzer/protocol/mqtt/MQTT.cc @@ -37,8 +37,6 @@ void MQTT_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - try { interp->NewData(orig, data, data + len); diff --git a/src/analyzer/protocol/mysql/MySQL.cc b/src/analyzer/protocol/mysql/MySQL.cc index 1d4c54a2a2..6e9d319903 100644 --- a/src/analyzer/protocol/mysql/MySQL.cc +++ b/src/analyzer/protocol/mysql/MySQL.cc @@ -38,8 +38,7 @@ void MySQL_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/ncp/NCP.cc b/src/analyzer/protocol/ncp/NCP.cc index 8d275168bc..d6ecf6d05b 100644 --- a/src/analyzer/protocol/ncp/NCP.cc +++ b/src/analyzer/protocol/ncp/NCP.cc @@ -180,8 +180,12 @@ void Contents_NCP_Analyzer::DeliverStream(int len, const u_char* data, bool orig if ( ! resync_set ) { resync_set = true; - resync = (IsOrig() ? tcp->OrigState() : tcp->RespState()) != - analyzer::tcp::TCP_ENDPOINT_ESTABLISHED; + + if ( tcp ) + resync = (IsOrig() ? tcp->OrigState() : tcp->RespState()) != + analyzer::tcp::TCP_ENDPOINT_ESTABLISHED; + else + resync = false; } if ( tcp && tcp->HadGap(orig) ) diff --git a/src/analyzer/protocol/ntlm/NTLM.cc b/src/analyzer/protocol/ntlm/NTLM.cc index 8f623c1230..fe81ddf454 100644 --- a/src/analyzer/protocol/ntlm/NTLM.cc +++ b/src/analyzer/protocol/ntlm/NTLM.cc @@ -37,8 +37,6 @@ void NTLM_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - try { interp->NewData(orig, data, data + len); diff --git a/src/analyzer/protocol/pop3/POP3.cc b/src/analyzer/protocol/pop3/POP3.cc index 6bf3f64fb3..bc88a279cd 100644 --- a/src/analyzer/protocol/pop3/POP3.cc +++ b/src/analyzer/protocol/pop3/POP3.cc @@ -1,4 +1,4 @@ -// This code contributed to Bro by Florian Schimandl, Hugh Dollman and +// This code contributed to Zeek/Bro by Florian Schimandl, Hugh Dollman and // Robin Sommer. #include "zeek/analyzer/protocol/pop3/POP3.h" diff --git a/src/analyzer/protocol/pop3/POP3.h b/src/analyzer/protocol/pop3/POP3.h index d5cecee222..8628b29a70 100644 --- a/src/analyzer/protocol/pop3/POP3.h +++ b/src/analyzer/protocol/pop3/POP3.h @@ -1,4 +1,4 @@ -// This code contributed to Bro by Florian Schimandl and Hugh Dollman. +// This code contributed to Zeek/Bro by Florian Schimandl and Hugh Dollman. // // An analyser for the POP3 protocol. diff --git a/src/analyzer/protocol/rdp/RDP.cc b/src/analyzer/protocol/rdp/RDP.cc index e1531e04ba..b50006e6ca 100644 --- a/src/analyzer/protocol/rdp/RDP.cc +++ b/src/analyzer/protocol/rdp/RDP.cc @@ -39,8 +39,7 @@ void RDP_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/rfb/RFB.cc b/src/analyzer/protocol/rfb/RFB.cc index af52b7f4d8..486b9879e5 100644 --- a/src/analyzer/protocol/rfb/RFB.cc +++ b/src/analyzer/protocol/rfb/RFB.cc @@ -36,8 +36,8 @@ void RFB_Analyzer::EndpointEOF(bool is_orig) void RFB_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( TCP()->IsPartial() ) + + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/rpc/NFS.h b/src/analyzer/protocol/rpc/NFS.h index 2e54481c0c..3518a6c74e 100644 --- a/src/analyzer/protocol/rpc/NFS.h +++ b/src/analyzer/protocol/rpc/NFS.h @@ -68,7 +68,7 @@ protected: BifEnum::NFS3::status_t status); // Consumes the file data in the RPC message. Depending on NFS::return_data* consts - // in bro.init returns NULL or the data as string val: + // in zeek.init returns NULL or the data as string val: // * offset is the offset of the read/write call // * size is the amount of bytes read (or requested to be written), StringValPtr nfs3_file_data(const u_char*& buf, int& n, uint64_t offset, int size); diff --git a/src/analyzer/protocol/rpc/RPC.cc b/src/analyzer/protocol/rpc/RPC.cc index f169ea7854..4628749309 100644 --- a/src/analyzer/protocol/rpc/RPC.cc +++ b/src/analyzer/protocol/rpc/RPC.cc @@ -446,16 +446,16 @@ bool Contents_RPC::CheckResync(int& len, const u_char*& data, bool orig) // is fully established we are in sync (since it's the first chunk // of data after the SYN if its not established we need to // resync. - auto* tcp = static_cast(Parent())->TCP(); - assert(tcp); + resync_state = INSYNC; - if ( (IsOrig() ? tcp->OrigState() : tcp->RespState()) != - analyzer::tcp::TCP_ENDPOINT_ESTABLISHED ) + if ( auto* tcp = static_cast(Parent())->TCP() ) { - NeedResync(); + if ( (IsOrig() ? tcp->OrigState() : tcp->RespState()) != + analyzer::tcp::TCP_ENDPOINT_ESTABLISHED ) + { + NeedResync(); + } } - else - resync_state = INSYNC; } if ( resync_state == INSYNC ) diff --git a/src/analyzer/protocol/sip/SIP_TCP.cc b/src/analyzer/protocol/sip/SIP_TCP.cc index d02e0f2252..7011d5df8c 100644 --- a/src/analyzer/protocol/sip/SIP_TCP.cc +++ b/src/analyzer/protocol/sip/SIP_TCP.cc @@ -41,8 +41,7 @@ void SIP_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/smb/SMB.cc b/src/analyzer/protocol/smb/SMB.cc index be27fa60ce..677e9bcdf9 100644 --- a/src/analyzer/protocol/smb/SMB.cc +++ b/src/analyzer/protocol/smb/SMB.cc @@ -61,8 +61,6 @@ void SMB_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - // It we need to resync and we don't have an SMB header, bail! if ( need_sync && ! HasSMBHeader(len, data) ) return; diff --git a/src/analyzer/protocol/socks/SOCKS.cc b/src/analyzer/protocol/socks/SOCKS.cc index cace5115dd..49d66490d1 100644 --- a/src/analyzer/protocol/socks/SOCKS.cc +++ b/src/analyzer/protocol/socks/SOCKS.cc @@ -46,10 +46,7 @@ void SOCKS_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - - if ( TCP()->IsPartial() ) - // punt on partial. + if ( TCP() && TCP()->IsPartial() ) return; if ( orig_done && resp_done ) diff --git a/src/analyzer/protocol/ssh/SSH.cc b/src/analyzer/protocol/ssh/SSH.cc index e62601a858..f2974cce84 100644 --- a/src/analyzer/protocol/ssh/SSH.cc +++ b/src/analyzer/protocol/ssh/SSH.cc @@ -44,8 +44,7 @@ void SSH_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/ssl/CMakeLists.txt b/src/analyzer/protocol/ssl/CMakeLists.txt index 47093a978e..9a82ecff8a 100644 --- a/src/analyzer/protocol/ssl/CMakeLists.txt +++ b/src/analyzer/protocol/ssl/CMakeLists.txt @@ -10,14 +10,10 @@ zeek_plugin_bif(events.bif) zeek_plugin_bif(functions.bif) zeek_plugin_bif(consts.bif) zeek_plugin_pac(tls-handshake.pac tls-handshake-protocol.pac tls-handshake-analyzer.pac ssl-defs.pac - proc-client-hello.pac - proc-server-hello.pac proc-certificate.pac tls-handshake-signed_certificate_timestamp.pac ) zeek_plugin_pac(ssl.pac ssl-dtls-analyzer.pac ssl-analyzer.pac ssl-dtls-protocol.pac ssl-protocol.pac ssl-defs.pac - proc-client-hello.pac - proc-server-hello.pac proc-certificate.pac ) zeek_plugin_pac(dtls.pac ssl-dtls-analyzer.pac dtls-analyzer.pac ssl-dtls-protocol.pac dtls-protocol.pac ssl-defs.pac) diff --git a/src/analyzer/protocol/ssl/DTLS.cc b/src/analyzer/protocol/ssl/DTLS.cc index 9fe8cbbaa0..7ddd714989 100644 --- a/src/analyzer/protocol/ssl/DTLS.cc +++ b/src/analyzer/protocol/ssl/DTLS.cc @@ -76,4 +76,11 @@ void DTLS_Analyzer::SendHandshake(uint16_t raw_tls_version, uint8_t msg_type, ui } } +bool DTLS_Analyzer::TryDecryptApplicationData(int len, const u_char* data, bool is_orig, + uint8_t content_type, uint16_t raw_tls_version) + { + // noop for now as DTLS decryption is currently not supported + return false; + } + } // namespace zeek::analyzer::dtls diff --git a/src/analyzer/protocol/ssl/DTLS.h b/src/analyzer/protocol/ssl/DTLS.h index 497fb1b16e..1642cb4a6c 100644 --- a/src/analyzer/protocol/ssl/DTLS.h +++ b/src/analyzer/protocol/ssl/DTLS.h @@ -39,6 +39,16 @@ public: static analyzer::Analyzer* Instantiate(Connection* conn) { return new DTLS_Analyzer(conn); } + /** + * Try to decrypt TLS application data from a packet. + * + * For DTLS, this operation is not currently implemented and this function will + * always return false. + * + **/ + bool TryDecryptApplicationData(int len, const u_char* data, bool is_orig, uint8_t content_type, + uint16_t raw_tls_version); + protected: binpac::DTLS::SSL_Conn* interp; binpac::TLSHandshake::Handshake_Conn* handshake_interp; diff --git a/src/analyzer/protocol/ssl/SSL.cc b/src/analyzer/protocol/ssl/SSL.cc index d6e07e7b2a..2d8d194e59 100644 --- a/src/analyzer/protocol/ssl/SSL.cc +++ b/src/analyzer/protocol/ssl/SSL.cc @@ -1,20 +1,56 @@ #include "zeek/analyzer/protocol/ssl/SSL.h" +#include +#include +#include + #include "zeek/Reporter.h" +#include "zeek/analyzer/Manager.h" #include "zeek/analyzer/protocol/ssl/events.bif.h" #include "zeek/analyzer/protocol/ssl/ssl_pac.h" #include "zeek/analyzer/protocol/ssl/tls-handshake_pac.h" #include "zeek/analyzer/protocol/tcp/TCP_Reassembler.h" #include "zeek/util.h" +#ifdef OPENSSL_HAVE_KDF_H +#include +#endif + +#if defined(OPENSSL_VERSION_MAJOR) && (OPENSSL_VERSION_MAJOR >= 3) +#include +#endif + namespace zeek::analyzer::ssl { +template static inline T MSB(const T a) + { + return ((a >> 8) & 0xff); + } + +template static inline T LSB(const T a) + { + return (a & 0xff); + } + +static std::basic_string fmt_seq(uint32_t num) + { + std::basic_string out(4, '\0'); + out.reserve(13); + uint32_t netnum = htonl(num); + out.append(reinterpret_cast(&netnum), 4); + out.append(5, '\0'); + return out; + } + SSL_Analyzer::SSL_Analyzer(Connection* c) : analyzer::tcp::TCP_ApplicationAnalyzer("SSL", c) { interp = new binpac::SSL::SSL_Conn(this); handshake_interp = new binpac::TLSHandshake::Handshake_Conn(this); had_gap = false; + c_seq = 0; + s_seq = 0; + pia = nullptr; } SSL_Analyzer::~SSL_Analyzer() @@ -98,4 +134,283 @@ void SSL_Analyzer::Undelivered(uint64_t seq, int len, bool orig) interp->NewGap(orig, len); } +void SSL_Analyzer::SetSecret(const zeek::StringVal& secret) + { + SetSecret(secret.Len(), secret.Bytes()); + } + +void SSL_Analyzer::SetSecret(size_t len, const u_char* data) + { + secret.clear(); + secret.append((const char*)data, len); + } + +void SSL_Analyzer::SetKeys(const zeek::StringVal& nkeys) + { + keys.clear(); + keys.reserve(nkeys.Len()); + std::copy(nkeys.Bytes(), nkeys.Bytes() + nkeys.Len(), std::back_inserter(keys)); + } + +void SSL_Analyzer::SetKeys(const std::vector newkeys) + { + keys = std::move(newkeys); + } + +std::optional> +SSL_Analyzer::TLS12_PRF(const std::string& secret, const std::string& label, + const std::string& rnd1, const std::string& rnd2, size_t requested_len) + { +#ifdef OPENSSL_HAVE_KDF_H +#if defined(OPENSSL_VERSION_MAJOR) && (OPENSSL_VERSION_MAJOR >= 3) + // alloc context + params + EVP_KDF* kdf = EVP_KDF_fetch(NULL, "TLS1-PRF", NULL); + EVP_KDF_CTX* kctx = EVP_KDF_CTX_new(kdf); + OSSL_PARAM params[4], *p = params; + EVP_KDF_free(kdf); +#else /* OSSL 3 */ + // alloc buffers + EVP_PKEY_CTX* pctx = EVP_PKEY_CTX_new_id(EVP_PKEY_TLS1_PRF, NULL); +#endif /* OSSL 3 */ + + // prepare seed: seed = label + rnd1 + rnd2 + std::string seed{}; + seed.reserve(label.size() + rnd1.size() + rnd2.size()); + + seed.append(label); + seed.append(rnd1); + seed.append(rnd2); + +#if defined(OPENSSL_VERSION_MAJOR) && (OPENSSL_VERSION_MAJOR >= 3) + // setup OSSL_PARAM array: digest, secret, seed + // FIXME: sha384 should not be hardcoded + // The const-cast is a bit ugly - but otherwise we have to copy the static string. + *p++ = OSSL_PARAM_construct_utf8_string(OSSL_KDF_PARAM_DIGEST, const_cast(SN_sha384), 0); + *p++ = OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_SECRET, (void*)secret.data(), + secret.size()); + *p++ = OSSL_PARAM_construct_octet_string(OSSL_KDF_PARAM_SEED, (void*)seed.data(), seed.size()); + *p = OSSL_PARAM_construct_end(); + + auto keybuf = std::vector(requested_len); + + // set OSSL params + if ( EVP_KDF_CTX_set_params(kctx, params) <= 0 ) + goto abort; + // derive key material + if ( EVP_KDF_derive(kctx, keybuf.data(), requested_len, nullptr) <= 0 ) + goto abort; + + EVP_KDF_CTX_free(kctx); + return keybuf; + +abort: + EVP_KDF_CTX_free(kctx); + return {}; +#else /* OSSL 3 */ + auto keybuf = std::vector(requested_len); + if ( EVP_PKEY_derive_init(pctx) <= 0 ) + goto abort; /* Error */ + // setup PKEY params: digest, secret, seed + // FIXME: sha384 should not be hardcoded + if ( EVP_PKEY_CTX_set_tls1_prf_md(pctx, EVP_sha384()) <= 0 ) + goto abort; /* Error */ + if ( EVP_PKEY_CTX_set1_tls1_prf_secret(pctx, secret.data(), secret.size()) <= 0 ) + goto abort; /* Error */ + if ( EVP_PKEY_CTX_add1_tls1_prf_seed(pctx, seed.data(), seed.size()) <= 0 ) + goto abort; /* Error */ + if ( EVP_PKEY_derive(pctx, keybuf.data(), &requested_len) <= 0 ) + goto abort; /* Error */ + + EVP_PKEY_CTX_free(pctx); + return keybuf; + +abort: + EVP_PKEY_CTX_free(pctx); +#endif /* OSSL 3 */ + +#endif /* HAVE_KDF */ + return {}; + } + +bool SSL_Analyzer::TryDecryptApplicationData(int len, const u_char* data, bool is_orig, + uint8_t content_type, uint16_t raw_tls_version) + { + // Unsupported cipher suite. Currently supported: + // - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 == 0xC030 + auto cipher = handshake_interp->chosen_cipher(); + if ( cipher != 0xC030 ) + { + DBG_LOG(DBG_ANALYZER, "Unsupported cipher suite for decryption: %d\n", cipher); + return false; + } + + // Neither secret or key present: abort + if ( secret.size() == 0 && keys.size() == 0 ) + { + DBG_LOG( + DBG_ANALYZER, + "Could not decrypt packet due to missing keys/secret. Client_random: %s\n", + util::fmt_bytes(reinterpret_cast(handshake_interp->client_random().data()), + handshake_interp->client_random().length())); + // FIXME: change util function to return a printably std::string for DBG_LOG + // print_hex("->client_random:", handshake_interp->client_random().data(), + // handshake_interp->client_random().size()); + return false; + } + + // Secret present, but no keys derived yet: derive keys + if ( secret.size() != 0 && keys.size() == 0 ) + { +#ifdef OPENSSL_HAVE_KDF_H + DBG_LOG(DBG_ANALYZER, "Deriving TLS keys for connection"); + uint32_t ts = htonl((uint32_t)handshake_interp->gmt_unix_time()); + + auto c_rnd = handshake_interp->client_random(); + auto s_rnd = handshake_interp->server_random(); + + std::string crand; + crand.append(reinterpret_cast(&(ts)), 4); + crand.append(reinterpret_cast(c_rnd.data()), c_rnd.length()); + std::string srand(reinterpret_cast(s_rnd.data()), s_rnd.length()); + + // fixme - 72 should not be hardcoded + auto res = TLS12_PRF(secret, "key expansion", srand, crand, 72); + if ( ! res ) + { + DBG_LOG(DBG_ANALYZER, "TLS PRF failed. Aborting.\n"); + return false; + } + + // save derived keys + SetKeys(res.value()); +#else + DBG_LOG(DBG_ANALYZER, + "Cannot derive TLS keys as Zeek was compiled without "); + return false; +#endif + } + + // Keys present: decrypt TLS application data + if ( keys.size() == 72 ) + { + // FIXME: could also print keys or conn id here + DBG_LOG(DBG_ANALYZER, "Decrypting application data"); + + // NOTE: you must not call functions that invalidate keys.data() on keys during the + // remainder of this function. (Given that we do not manipulate the key material in this + // function that should not be hard) + + // client write_key + const u_char* c_wk = keys.data(); + // server write_key + const u_char* s_wk = keys.data() + 32; + // client IV + const u_char* c_iv = keys.data() + 64; + // server IV + const u_char* s_iv = keys.data() + 68; + + // FIXME: should we change types here? + u_char* encrypted = (u_char*)data; + size_t encrypted_len = len; + + if ( is_orig ) + c_seq++; + else + s_seq++; + + // AEAD nonce, length 12 + std::basic_string s_aead_nonce; + if ( is_orig ) + s_aead_nonce.assign(c_iv, 4); + else + s_aead_nonce.assign(s_iv, 4); + + // this should be the explicit counter + s_aead_nonce.append(encrypted, 8); + assert(s_aead_nonce.size() == 12); + + EVP_CIPHER_CTX* ctx = EVP_CIPHER_CTX_new(); + EVP_CIPHER_CTX_init(ctx); + EVP_CipherInit(ctx, EVP_aes_256_gcm(), NULL, NULL, 0); + + encrypted += 8; + // FIXME: is this because of nonce and aead tag? + if ( encrypted_len <= (16 + 8) ) + { + DBG_LOG(DBG_ANALYZER, "Invalid encrypted length encountered during TLS decryption"); + EVP_CIPHER_CTX_free(ctx); + return false; + } + encrypted_len -= 8; + encrypted_len -= 16; + + // FIXME: aes_256_gcm should not be hardcoded here ;) + if ( is_orig ) + EVP_DecryptInit(ctx, EVP_aes_256_gcm(), c_wk, s_aead_nonce.data()); + else + EVP_DecryptInit(ctx, EVP_aes_256_gcm(), s_wk, s_aead_nonce.data()); + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_TAG, 16, encrypted + encrypted_len); + + // AEAD tag + std::basic_string s_aead_tag; + if ( is_orig ) + s_aead_tag = fmt_seq(c_seq); + else + s_aead_tag = fmt_seq(s_seq); + + s_aead_tag[8] = content_type; + s_aead_tag[9] = MSB(raw_tls_version); + s_aead_tag[10] = LSB(raw_tls_version); + s_aead_tag[11] = MSB(encrypted_len); + s_aead_tag[12] = LSB(encrypted_len); + assert(s_aead_tag.size() == 13); + + auto decrypted = std::vector( + encrypted_len + + 16); // see OpenSSL manpage - 16 is the block size for the supported cipher + int decrypted_len = 0; + + EVP_DecryptUpdate(ctx, NULL, &decrypted_len, s_aead_tag.data(), s_aead_tag.size()); + EVP_DecryptUpdate(ctx, decrypted.data(), &decrypted_len, (const u_char*)encrypted, + encrypted_len); + assert(decrypted_len <= decrypted.size()); + decrypted.resize(decrypted_len); + + int res = 0; + if ( ! (res = EVP_DecryptFinal(ctx, NULL, &res)) ) + { + DBG_LOG(DBG_ANALYZER, "Decryption failed with return code: %d. Invalid key?\n", res); + EVP_CIPHER_CTX_free(ctx); + return false; + } + + DBG_LOG(DBG_ANALYZER, "Successfully decrypted %d bytes.", decrypted_len); + EVP_CIPHER_CTX_free(ctx); + ForwardDecryptedData(decrypted, is_orig); + + return true; + } + + // This is only reached if key derivation fails or is unsupported + return false; + } + +void SSL_Analyzer::ForwardDecryptedData(const std::vector& data, bool is_orig) + { + if ( ! pia ) + { + pia = new analyzer::pia::PIA_TCP(Conn()); + if ( AddChildAnalyzer(pia) ) + { + pia->FirstPacket(true, nullptr); + pia->FirstPacket(false, nullptr); + } + else + reporter->Error("Could not initialize PIA"); + } + + ForwardStream(data.size(), data.data(), is_orig); + } + } // namespace zeek::analyzer::ssl diff --git a/src/analyzer/protocol/ssl/SSL.h b/src/analyzer/protocol/ssl/SSL.h index fa5ff5c1a5..5fdbc27dca 100644 --- a/src/analyzer/protocol/ssl/SSL.h +++ b/src/analyzer/protocol/ssl/SSL.h @@ -1,5 +1,6 @@ #pragma once +#include "zeek/analyzer/protocol/pia/PIA.h" #include "zeek/analyzer/protocol/ssl/events.bif.h" #include "zeek/analyzer/protocol/tcp/TCP.h" @@ -24,6 +25,9 @@ namespace zeek::analyzer::ssl class SSL_Analyzer final : public analyzer::tcp::TCP_ApplicationAnalyzer { + // let binpac forward encrypted TLS application data to us. + friend class binpac::SSL::SSL_Conn; + public: explicit SSL_Analyzer(Connection* conn); ~SSL_Analyzer() override; @@ -45,10 +49,123 @@ public: static analyzer::Analyzer* Instantiate(Connection* conn) { return new SSL_Analyzer(conn); } + /** + * Set the secret that should be used to derive keys for the + * connection. (For TLS 1.2 this is the pre-master secret) + * + * Please note that these functions currently are hardcoded to only work with a single TLS 1.2 + * ciphersuite (TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384). + * + * @param secret The secret to set + */ + void SetSecret(const StringVal& secret); + + /** + * Set the secret that should be used to derive keys for the + * connection. (For TLS 1.2 this is the pre-master secret) + * + * Please note that these functions currently are hardcoded to only work with a single TLS 1.2 + * cuphersuite (TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384). + * + * @param len Length of the secret bytes + * + * @param data Pointer to the secret bytes + */ + void SetSecret(size_t len, const u_char* data); + + /** + * Set the decryption keys that should be used to decrypt + * TLS application data in the connection. + * + * Please note that these functions currently are hardcoded to only work with a single TLS 1.2 + * cuphersuite (TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384). + * + * @param keys The key buffer as derived via TLS PRF (for + * AES_GCM this should be 72 bytes in length) + */ + void SetKeys(const StringVal& keys); + + /** + * Set the decryption keys that should be used to decrypt + * TLS application data in the connection. + * + * Please note that these functions currently are hardcoded to only work with a single TLS 1.2 + * cuphersuite (TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384). + * + * @param keys The key buffer as derived via TLS PRF (for + * AES_GCM this should be 72 bytes in length) + */ + void SetKeys(const std::vector newkeys); + protected: + /** + * Try to decrypt TLS application data from a packet. Requires secret or keys to be set prior. + * + * Please note that these functions currently are hardcoded to only work with a single TLS 1.2 + * cuphersuite (TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384). + * + * @param len Length of the encrypted bytes to decrypt + * + * @param data Pointer to the encrypted bytes to decrypt + * + * @param is_orig Direction of the connection + * + * @param content_type Content type as given in the TLS packet + * + * @param raw_tls_version Raw TLS version as given in the TLS packets + * + * @return True if decryption succeeded and data was forwarded. + */ + bool TryDecryptApplicationData(int len, const u_char* data, bool is_orig, uint8_t content_type, + uint16_t raw_tls_version); + + /** + * TLS 1.2 pseudo random function (PRF) used to expand the pre-master secret and derive keys. + * The seed is obtained by concatinating rnd1 and rnd2. + * + * Please note that these functions currently are hardcoded to only work with a single TLS 1.2 + * cuphersuite (TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384). + * + * @param secret Secret as defined in the TLS RFC + * + * @param label Label as defined in the TLS RFC + * + * @param First part of the seed + * + * @param rnd2 Second part of the seed + * + * @param rnd2_len Length of the second part of the seed + * + * @param requested_len Length indicating how many bytes should be derived + * + * @return The derived bytes, if the operation succeeds. + */ + std::optional> TLS12_PRF(const std::string& secret, + const std::string& label, const std::string& rnd1, + const std::string& rnd2, size_t requested_len); + + /** + * Forward decrypted TLS application data to child analyzers. + * + * @param data Data to forward + * + * @param is_orig Direction of the connection + */ + void ForwardDecryptedData(const std::vector& data, bool is_orig); + binpac::SSL::SSL_Conn* interp; binpac::TLSHandshake::Handshake_Conn* handshake_interp; bool had_gap; + + // client and server sequence number, used for TLS 1.2 decryption + int c_seq; + int s_seq; + // secret, for decyption + std::string secret; + // derived keys, for decryption + std::vector keys; + // PIA, for decrypted data + zeek::analyzer::pia::PIA_TCP* pia; }; } // namespace zeek::analyzer::ssl diff --git a/src/analyzer/protocol/ssl/functions.bif b/src/analyzer/protocol/ssl/functions.bif index 2d72a4a741..75e4d3c26a 100644 --- a/src/analyzer/protocol/ssl/functions.bif +++ b/src/analyzer/protocol/ssl/functions.bif @@ -9,10 +9,59 @@ ## finished succesfully). ## ## c: The SSL connection. -function set_ssl_established%(c: connection%): any +## +## Returns: T on success, F on failure. +function set_ssl_established%(c: connection%): bool %{ zeek::analyzer::Analyzer* sa = c->FindAnalyzer("SSL"); + if ( sa ) + { static_cast(sa)->StartEncryption(); - return nullptr; + return zeek::val_mgr->True(); + } + + return zeek::val_mgr->False(); + %} + +## Set the secret that should be used to derive keys for the connection. +## (For TLS 1.2 this is the pre-master secret). +## +## c: The affected connection +## +## secret: secret to set +## +## Returns: T on success, F on failure. +function set_secret%(c: connection, secret: string%): bool + %{ + analyzer::Analyzer* sa = c->FindAnalyzer("SSL"); + + if ( sa ) + { + static_cast(sa)->SetSecret(*secret); + return zeek::val_mgr->True(); + } + + return zeek::val_mgr->False(); + %} + +## Set the decryption keys that should be used to decrypt +## TLS application data in the connection. +## +## c: The affected connection +## +## keys: The key buffer as derived via TLS PRF. +## +## Returns: T on success, F on failure. +function set_keys%(c: connection, keys: string%): bool + %{ + analyzer::Analyzer* sa = c->FindAnalyzer("SSL"); + + if ( sa ) + { + static_cast(sa)->SetKeys(*keys); + return zeek::val_mgr->True(); + } + + return zeek::val_mgr->False(); %} diff --git a/src/analyzer/protocol/ssl/proc-client-hello.pac b/src/analyzer/protocol/ssl/proc-client-hello.pac deleted file mode 100644 index 7a0b3a9461..0000000000 --- a/src/analyzer/protocol/ssl/proc-client-hello.pac +++ /dev/null @@ -1,54 +0,0 @@ - function proc_client_hello( - version : uint16, ts : double, - client_random : bytestring, - session_id : uint8[], - cipher_suites16 : uint16[], - cipher_suites24 : uint24[], - compression_methods: uint8[]) : bool - %{ - if ( ! version_ok(version) ) - { - zeek_analyzer()->AnalyzerViolation(zeek::util::fmt("unsupported client SSL version 0x%04x", version)); - zeek_analyzer()->SetSkip(true); - } - else - zeek_analyzer()->AnalyzerConfirmation(); - - if ( ssl_client_hello ) - { - vector cipher_suites; - - if ( cipher_suites16 ) - std::copy(cipher_suites16->begin(), cipher_suites16->end(), std::back_inserter(cipher_suites)); - else - std::transform(cipher_suites24->begin(), cipher_suites24->end(), std::back_inserter(cipher_suites), to_int()); - - auto cipher_vec = zeek::make_intrusive(zeek::id::index_vec); - - for ( unsigned int i = 0; i < cipher_suites.size(); ++i ) - { - auto ciph = zeek::val_mgr->Count(cipher_suites[i]); - cipher_vec->Assign(i, ciph); - } - - auto comp_vec = zeek::make_intrusive(zeek::id::index_vec); - - if ( compression_methods ) - { - for ( unsigned int i = 0; i < compression_methods->size(); ++i ) - { - auto comp = zeek::val_mgr->Count((*compression_methods)[i]); - comp_vec->Assign(i, comp); - } - } - - zeek::BifEvent::enqueue_ssl_client_hello(zeek_analyzer(), zeek_analyzer()->Conn(), - version, record_version(), ts, - zeek::make_intrusive(client_random.length(), - (const char*) client_random.data()), - {zeek::AdoptRef{}, to_string_val(session_id)}, - std::move(cipher_vec), std::move(comp_vec)); - } - - return true; - %} diff --git a/src/analyzer/protocol/ssl/proc-server-hello.pac b/src/analyzer/protocol/ssl/proc-server-hello.pac deleted file mode 100644 index 9e44dfad36..0000000000 --- a/src/analyzer/protocol/ssl/proc-server-hello.pac +++ /dev/null @@ -1,40 +0,0 @@ - function proc_server_hello( - version : uint16, v2 : bool, - server_random : bytestring, - session_id : uint8[], - cipher_suites16 : uint16[], - cipher_suites24 : uint24[], - comp_method : uint8) : bool - %{ - if ( ! version_ok(version) ) - { - zeek_analyzer()->AnalyzerViolation(zeek::util::fmt("unsupported server SSL version 0x%04x", version)); - zeek_analyzer()->SetSkip(true); - } - - if ( ssl_server_hello ) - { - vector* ciphers = new vector(); - - if ( cipher_suites16 ) - std::copy(cipher_suites16->begin(), cipher_suites16->end(), std::back_inserter(*ciphers)); - else - std::transform(cipher_suites24->begin(), cipher_suites24->end(), std::back_inserter(*ciphers), to_int()); - - uint32 ts = 0; - if ( v2 == 0 && server_random.length() >= 4 ) - ts = ntohl(*((uint32*)server_random.data())); - - zeek::BifEvent::enqueue_ssl_server_hello(zeek_analyzer(), - zeek_analyzer()->Conn(), - version, record_version(), ts, - zeek::make_intrusive(server_random.length(), - (const char*) server_random.data()), - {zeek::AdoptRef{}, to_string_val(session_id)}, - ciphers->size()==0 ? 0 : ciphers->at(0), comp_method); - - delete ciphers; - } - - return true; - %} diff --git a/src/analyzer/protocol/ssl/ssl-analyzer.pac b/src/analyzer/protocol/ssl/ssl-analyzer.pac index c1a8876058..3f7543c39f 100644 --- a/src/analyzer/protocol/ssl/ssl-analyzer.pac +++ b/src/analyzer/protocol/ssl/ssl-analyzer.pac @@ -2,10 +2,104 @@ refine connection SSL_Conn += { - %include proc-client-hello.pac - %include proc-server-hello.pac %include proc-certificate.pac + function proc_client_hello( + version : uint16, ts : double, + client_random : bytestring, + session_id : uint8[], + cipher_suites16 : uint16[], + cipher_suites24 : uint24[], + compression_methods: uint8[]) : bool + %{ + if ( ! version_ok(version) ) + { + zeek_analyzer()->AnalyzerViolation(zeek::util::fmt("unsupported client SSL version 0x%04x", version)); + zeek_analyzer()->SetSkip(true); + } + else + zeek_analyzer()->AnalyzerConfirmation(); + + if ( ssl_client_hello ) + { + vector cipher_suites; + + if ( cipher_suites16 ) + std::copy(cipher_suites16->begin(), cipher_suites16->end(), std::back_inserter(cipher_suites)); + else + std::transform(cipher_suites24->begin(), cipher_suites24->end(), std::back_inserter(cipher_suites), to_int()); + + auto cipher_vec = zeek::make_intrusive(zeek::id::index_vec); + + for ( unsigned int i = 0; i < cipher_suites.size(); ++i ) + { + auto ciph = zeek::val_mgr->Count(cipher_suites[i]); + cipher_vec->Assign(i, ciph); + } + + auto comp_vec = zeek::make_intrusive(zeek::id::index_vec); + + if ( compression_methods ) + { + for ( unsigned int i = 0; i < compression_methods->size(); ++i ) + { + auto comp = zeek::val_mgr->Count((*compression_methods)[i]); + comp_vec->Assign(i, comp); + } + } + + zeek::BifEvent::enqueue_ssl_client_hello(zeek_analyzer(), zeek_analyzer()->Conn(), + version, record_version(), ts, + zeek::make_intrusive(client_random.length(), + (const char*) client_random.data()), + {zeek::AdoptRef{}, to_string_val(session_id)}, + std::move(cipher_vec), std::move(comp_vec)); + } + + return true; + %} + + function proc_server_hello( + version : uint16, v2 : bool, + server_random : bytestring, + session_id : uint8[], + cipher_suites16 : uint16[], + cipher_suites24 : uint24[], + comp_method : uint8) : bool + %{ + if ( ! version_ok(version) ) + { + zeek_analyzer()->AnalyzerViolation(zeek::util::fmt("unsupported server SSL version 0x%04x", version)); + zeek_analyzer()->SetSkip(true); + } + + if ( ssl_server_hello ) + { + vector* ciphers = new vector(); + + if ( cipher_suites16 ) + std::copy(cipher_suites16->begin(), cipher_suites16->end(), std::back_inserter(*ciphers)); + else + std::transform(cipher_suites24->begin(), cipher_suites24->end(), std::back_inserter(*ciphers), to_int()); + + uint32 ts = 0; + if ( v2 == 0 && server_random.length() >= 4 ) + ts = ntohl(*((uint32*)server_random.data())); + + zeek::BifEvent::enqueue_ssl_server_hello(zeek_analyzer(), + zeek_analyzer()->Conn(), + version, record_version(), ts, + zeek::make_intrusive(server_random.length(), + (const char*) server_random.data()), + {zeek::AdoptRef{}, to_string_val(session_id)}, + ciphers->size()==0 ? 0 : ciphers->at(0), comp_method); + + delete ciphers; + } + + return true; + %} + function proc_v2_certificate(is_orig: bool, cert : bytestring) : bool %{ vector* cert_list = new vector(1,cert); diff --git a/src/analyzer/protocol/ssl/ssl-dtls-analyzer.pac b/src/analyzer/protocol/ssl/ssl-dtls-analyzer.pac index 4c28970698..8ddecbe318 100644 --- a/src/analyzer/protocol/ssl/ssl-dtls-analyzer.pac +++ b/src/analyzer/protocol/ssl/ssl-dtls-analyzer.pac @@ -13,10 +13,12 @@ refine connection SSL_Conn += { %member{ int established_; + int decryption_failed_; %} %init{ established_ = false; + decryption_failed_ = false; %} %cleanup{ @@ -43,7 +45,7 @@ refine connection SSL_Conn += { return true; %} - function proc_ciphertext_record(rec : SSLRecord) : bool + function proc_ciphertext_record(rec : SSLRecord, cont: const_bytestring) : bool %{ if ( established_ == false && determine_tls13() == 1 ) { @@ -62,8 +64,17 @@ refine connection SSL_Conn += { } if ( ssl_encrypted_data ) + { zeek::BifEvent::enqueue_ssl_encrypted_data(zeek_analyzer(), zeek_analyzer()->Conn(), ${rec.is_orig}, ${rec.raw_tls_version}, ${rec.content_type}, ${rec.length}); + } + + if ( rec->content_type() == APPLICATION_DATA && decryption_failed_ == false ) + { + // If decryption of one packet fails, do not try to decrypt future packets. + if ( ! zeek_analyzer()->TryDecryptApplicationData(cont.length(), cont.begin(), rec->is_orig(), rec->content_type(), rec->raw_tls_version()) ) + decryption_failed_ = true; + } return true; %} @@ -123,7 +134,7 @@ refine typeattr UnknownRecord += &let { }; refine typeattr CiphertextRecord += &let { - proc : bool = $context.connection.proc_ciphertext_record(rec); + proc : bool = $context.connection.proc_ciphertext_record(rec, cont); } refine typeattr PlaintextRecord += &let { diff --git a/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac b/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac index 33e0af5924..0f222ae682 100644 --- a/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac +++ b/src/analyzer/protocol/ssl/tls-handshake-analyzer.pac @@ -25,10 +25,107 @@ refine connection Handshake_Conn += { - %include proc-client-hello.pac - %include proc-server-hello.pac %include proc-certificate.pac + function proc_client_hello( + version : uint16, ts : double, + client_random : bytestring, + session_id : uint8[], + cipher_suites16 : uint16[], + cipher_suites24 : uint24[], + compression_methods: uint8[]) : bool + %{ + if ( ! version_ok(version) ) + { + zeek_analyzer()->AnalyzerViolation(zeek::util::fmt("unsupported client SSL version 0x%04x", version)); + zeek_analyzer()->SetSkip(true); + } + else + zeek_analyzer()->AnalyzerConfirmation(); + + if ( ssl_client_hello ) + { + vector cipher_suites; + + if ( cipher_suites16 ) + std::copy(cipher_suites16->begin(), cipher_suites16->end(), std::back_inserter(cipher_suites)); + else + std::transform(cipher_suites24->begin(), cipher_suites24->end(), std::back_inserter(cipher_suites), to_int()); + + auto cipher_vec = zeek::make_intrusive(zeek::id::index_vec); + + for ( unsigned int i = 0; i < cipher_suites.size(); ++i ) + { + auto ciph = zeek::val_mgr->Count(cipher_suites[i]); + cipher_vec->Assign(i, ciph); + } + + auto comp_vec = zeek::make_intrusive(zeek::id::index_vec); + + if ( compression_methods ) + { + for ( unsigned int i = 0; i < compression_methods->size(); ++i ) + { + auto comp = zeek::val_mgr->Count((*compression_methods)[i]); + comp_vec->Assign(i, comp); + } + } + + set_client_random(client_random); + set_gmt_unix_time(ts); + zeek::BifEvent::enqueue_ssl_client_hello(zeek_analyzer(), zeek_analyzer()->Conn(), + version, record_version(), ts, + zeek::make_intrusive(client_random.length(), + (const char*) client_random.data()), + {zeek::AdoptRef{}, to_string_val(session_id)}, + std::move(cipher_vec), std::move(comp_vec)); + } + + return true; + %} + + function proc_server_hello( + version : uint16, v2 : bool, + server_random : bytestring, + session_id : uint8[], + cipher_suites16 : uint16[], + cipher_suites24 : uint24[], + comp_method : uint8) : bool + %{ + if ( ! version_ok(version) ) + { + zeek_analyzer()->AnalyzerViolation(zeek::util::fmt("unsupported server SSL version 0x%04x", version)); + zeek_analyzer()->SetSkip(true); + } + + if ( ssl_server_hello ) + { + vector* ciphers = new vector(); + + if ( cipher_suites16 ) + std::copy(cipher_suites16->begin(), cipher_suites16->end(), std::back_inserter(*ciphers)); + else + std::transform(cipher_suites24->begin(), cipher_suites24->end(), std::back_inserter(*ciphers), to_int()); + + uint32 ts = 0; + if ( v2 == 0 && server_random.length() >= 4 ) + ts = ntohl(*((uint32*)server_random.data())); + + set_server_random(server_random); + zeek::BifEvent::enqueue_ssl_server_hello(zeek_analyzer(), + zeek_analyzer()->Conn(), + version, record_version(), ts, + zeek::make_intrusive(server_random.length(), + (const char*) server_random.data()), + {zeek::AdoptRef{}, to_string_val(session_id)}, + ciphers->size()==0 ? 0 : ciphers->at(0), comp_method); + + delete ciphers; + } + + return true; + %} + function proc_session_ticket_handshake(rec: SessionTicketHandshake, is_orig: bool): bool %{ if ( ssl_session_ticket_handshake ) diff --git a/src/analyzer/protocol/ssl/tls-handshake-protocol.pac b/src/analyzer/protocol/ssl/tls-handshake-protocol.pac index e37a0c512a..357bd77909 100644 --- a/src/analyzer/protocol/ssl/tls-handshake-protocol.pac +++ b/src/analyzer/protocol/ssl/tls-handshake-protocol.pac @@ -943,6 +943,9 @@ refine connection Handshake_Conn += { uint32 chosen_cipher_; uint16 chosen_version_; uint16 record_version_; + bytestring client_random_; + bytestring server_random_; + uint32 gmt_unix_time_; %} %init{ @@ -950,6 +953,12 @@ refine connection Handshake_Conn += { chosen_version_ = UNKNOWN_VERSION; record_version_ = 0; + gmt_unix_time_ = 0; + %} + + %cleanup{ + client_random_.free(); + server_random_.free(); %} function chosen_cipher() : int %{ return chosen_cipher_; %} @@ -983,5 +992,31 @@ refine connection Handshake_Conn += { record_version_ = version; return true; %} + + function client_random() : bytestring %{ return client_random_; %} + + function set_client_random(client_random: bytestring) : bool + %{ + client_random_.free(); + client_random_.init(client_random.data(), client_random.length()); + return true; + %} + + function server_random() : bytestring %{ return server_random_; %} + + function set_server_random(server_random: bytestring) : bool + %{ + server_random_.free(); + server_random_.init(server_random.data(), server_random.length()); + return true; + %} + + function gmt_unix_time() : uint32 %{ return gmt_unix_time_; %} + + function set_gmt_unix_time(ts: uint32) : bool + %{ + gmt_unix_time_ = ts; + return true; + %} }; diff --git a/src/analyzer/protocol/syslog/Syslog.cc b/src/analyzer/protocol/syslog/Syslog.cc index 8b059ed58d..8dfd5ab52c 100644 --- a/src/analyzer/protocol/syslog/Syslog.cc +++ b/src/analyzer/protocol/syslog/Syslog.cc @@ -79,10 +79,7 @@ void Syslog_Analyzer::DeliverPacket(int len, const u_char* data, bool orig, uint // { // analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); // -// assert(TCP()); -// -// if ( TCP()->IsPartial() || TCP()->HadGap(orig) ) -// // punt-on-partial or stop-on-gap. +// if ( TCP() && TCP()->IsPartial() ) // return; // // interp->NewData(orig, data, data + len); diff --git a/src/analyzer/protocol/tcp/ContentLine.cc b/src/analyzer/protocol/tcp/ContentLine.cc index 50e1ad2b43..5d4b06c7d6 100644 --- a/src/analyzer/protocol/tcp/ContentLine.cc +++ b/src/analyzer/protocol/tcp/ContentLine.cc @@ -331,9 +331,7 @@ void ContentLine_Analyzer::CheckNUL() // had been an initial SYN, so we check for whether // the connection has at most two bytes so far. - auto* tcp = static_cast(Parent())->TCP(); - - if ( tcp ) + if ( auto* tcp = static_cast(Parent())->TCP() ) { TCP_Endpoint* endp = IsOrig() ? tcp->Orig() : tcp->Resp(); if ( endp->state == TCP_ENDPOINT_PARTIAL && endp->LastSeq() - endp->StartSeq() <= 2 ) @@ -349,7 +347,7 @@ void ContentLine_Analyzer::CheckNUL() void ContentLine_Analyzer::SkipBytesAfterThisLine(int64_t length) { - // This is a little complicated because Bro has to handle + // This is a little complicated because Zeek has to handle // both CR and CRLF as a line break. When a line is delivered, // it's possible that only a CR is seen, and we may not know // if an LF is following until we see the next packet. If an diff --git a/src/analyzer/protocol/tcp/TCP.cc b/src/analyzer/protocol/tcp/TCP.cc index 2ac794dff3..49f416fa69 100644 --- a/src/analyzer/protocol/tcp/TCP.cc +++ b/src/analyzer/protocol/tcp/TCP.cc @@ -34,16 +34,18 @@ void TCP_ApplicationAnalyzer::Init() SetTCP(static_cast(Parent())); } -void TCP_ApplicationAnalyzer::AnalyzerViolation(const char* reason, const char* data, int len) +void TCP_ApplicationAnalyzer::AnalyzerViolation(const char* reason, const char* data, int len, + zeek::Tag tag) { - auto* tcp = TCP(); + if ( auto* tcp = TCP() ) + { + if ( tcp->IsPartial() || tcp->HadGap(false) || tcp->HadGap(true) ) + // Filter out incomplete connections. Parsing them is + // too unreliable. + return; + } - if ( tcp && (tcp->IsPartial() || tcp->HadGap(false) || tcp->HadGap(true)) ) - // Filter out incomplete connections. Parsing them is - // too unreliable. - return; - - Analyzer::AnalyzerViolation(reason, data, len); + Analyzer::AnalyzerViolation(reason, data, len, tag); } void TCP_ApplicationAnalyzer::DeliverPacket(int len, const u_char* data, bool is_orig, uint64_t seq, diff --git a/src/analyzer/protocol/tcp/TCP.h b/src/analyzer/protocol/tcp/TCP.h index b979001aae..3b145cb789 100644 --- a/src/analyzer/protocol/tcp/TCP.h +++ b/src/analyzer/protocol/tcp/TCP.h @@ -68,7 +68,8 @@ public: // This suppresses violations if the TCP connection wasn't // fully established. - void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0) override; + void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0, + zeek::Tag tag = zeek::Tag()) override; // "name" and "val" both now belong to this object, which needs to // delete them when done with them. diff --git a/src/analyzer/protocol/xmpp/XMPP.cc b/src/analyzer/protocol/xmpp/XMPP.cc index f2a007dfed..5c74ea3b25 100644 --- a/src/analyzer/protocol/xmpp/XMPP.cc +++ b/src/analyzer/protocol/xmpp/XMPP.cc @@ -44,8 +44,7 @@ void XMPP_Analyzer::DeliverStream(int len, const u_char* data, bool orig) return; } - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/src/analyzer/protocol/zip/ZIP.cc b/src/analyzer/protocol/zip/ZIP.cc index 5226fbd4fb..4a30f52312 100644 --- a/src/analyzer/protocol/zip/ZIP.cc +++ b/src/analyzer/protocol/zip/ZIP.cc @@ -59,7 +59,7 @@ void ZIP_Analyzer::DeliverStream(int len, const u_char* data, bool orig) zip->next_in = (Bytef*)data; zip->avail_in = len; - Bytef* orig_next_in = zip->next_in; + auto orig_next_in = zip->next_in; size_t orig_avail_in = zip->avail_in; while ( true ) diff --git a/src/broker/Data.cc b/src/broker/Data.cc index 80651fc822..b0b367f8fb 100644 --- a/src/broker/Data.cc +++ b/src/broker/Data.cc @@ -226,7 +226,7 @@ struct val_converter { const auto& expected_index_types = tt->GetIndices()->GetTypes(); broker::vector composite_key; - auto indices = caf::get_if(&item); + auto indices = get_if(&item); if ( indices ) { @@ -281,7 +281,7 @@ struct val_converter { const auto& expected_index_types = tt->GetIndices()->GetTypes(); broker::vector composite_key; - auto indices = caf::get_if(&item.first); + auto indices = get_if(&item.first); if ( indices ) { @@ -446,7 +446,7 @@ struct val_converter if ( idx >= a.size() ) return nullptr; - if ( caf::get_if(&a[idx]) != nullptr ) + if ( get_if(&a[idx]) != nullptr ) { rval->Remove(i); ++idx; @@ -469,8 +469,8 @@ struct val_converter if ( a.size() != 2 ) return nullptr; - auto exact_text = caf::get_if(&a[0]); - auto anywhere_text = caf::get_if(&a[1]); + auto exact_text = get_if(&a[0]); + auto anywhere_text = get_if(&a[1]); if ( ! exact_text || ! anywhere_text ) return nullptr; @@ -606,7 +606,7 @@ struct type_checker for ( const auto& item : a ) { const auto& expected_index_types = tt->GetIndices()->GetTypes(); - auto indices = caf::get_if(&item); + auto indices = get_if(&item); vector indices_to_check; if ( indices ) @@ -663,7 +663,7 @@ struct type_checker for ( auto& item : a ) { const auto& expected_index_types = tt->GetIndices()->GetTypes(); - auto indices = caf::get_if(&item.first); + auto indices = get_if(&item.first); vector indices_to_check; if ( indices ) @@ -765,7 +765,7 @@ struct type_checker if ( idx >= a.size() ) return false; - if ( caf::get_if(&a[idx]) != nullptr ) + if ( get_if(&a[idx]) != nullptr ) { ++idx; continue; @@ -784,8 +784,8 @@ struct type_checker if ( a.size() != 2 ) return false; - auto exact_text = caf::get_if(&a[0]); - auto anywhere_text = caf::get_if(&a[1]); + auto exact_text = get_if(&a[0]); + auto anywhere_text = get_if(&a[1]); if ( ! exact_text || ! anywhere_text ) return false; @@ -820,7 +820,7 @@ static bool data_type_check(const broker::data& d, Type* t) if ( t->Tag() == TYPE_ANY ) return true; - return caf::visit(type_checker{t}, d); + return visit(type_checker{t}, d); } ValPtr data_to_val(broker::data d, Type* type) @@ -828,7 +828,7 @@ ValPtr data_to_val(broker::data d, Type* type) if ( type->Tag() == TYPE_ANY ) return make_data_val(move(d)); - return caf::visit(val_converter{type}, std::move(d)); + return visit(val_converter{type}, d); } broker::expected val_to_data(const Val* v) @@ -961,7 +961,7 @@ broker::expected val_to_data(const Val* v) key = move(composite_key); if ( is_set ) - caf::get(rval).emplace(move(key)); + get(rval).emplace(move(key)); else { auto val = val_to_data(entry->GetVal().get()); @@ -969,7 +969,7 @@ broker::expected val_to_data(const Val* v) if ( ! val ) return broker::ec::invalid_data; - caf::get(rval).emplace(move(key), move(*val)); + get(rval).emplace(move(key), move(*val)); } } @@ -1173,14 +1173,14 @@ struct data_type_getter { // Note that Broker uses vectors to store record data, so there's // no actual way to tell if this data was originally associated - // with a Bro record. + // with a Zeek record. return BifType::Enum::Broker::DataType->GetEnumVal(BifEnum::Broker::VECTOR); } }; EnumValPtr get_data_type(RecordVal* v, zeek::detail::Frame* frame) { - return caf::visit(data_type_getter{}, opaque_field_to_data(v, frame)); + return visit(data_type_getter{}, opaque_field_to_data(v, frame)); } broker::data& opaque_field_to_data(RecordVal* v, zeek::detail::Frame* f) @@ -1240,11 +1240,11 @@ broker::expected SetIterator::DoSerialize() const bool SetIterator::DoUnserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = get_if(&data); if ( ! (v && v->size() == 2) ) return false; - auto x = caf::get_if(&(*v)[0]); + auto x = get_if(&(*v)[0]); // We set the iterator by finding the element it used to point to. // This is not perfect, as there's no guarantee that the restored @@ -1267,11 +1267,11 @@ broker::expected TableIterator::DoSerialize() const bool TableIterator::DoUnserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = get_if(&data); if ( ! (v && v->size() == 2) ) return false; - auto x = caf::get_if(&(*v)[0]); + auto x = get_if(&(*v)[0]); // We set the iterator by finding the element it used to point to. // This is not perfect, as there's no guarantee that the restored @@ -1295,12 +1295,12 @@ broker::expected VectorIterator::DoSerialize() const bool VectorIterator::DoUnserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = get_if(&data); if ( ! (v && v->size() == 2) ) return false; - auto x = caf::get_if(&(*v)[0]); - auto y = caf::get_if(&(*v)[1]); + auto x = get_if(&(*v)[0]); + auto y = get_if(&(*v)[1]); if ( ! (x && y) ) return false; @@ -1320,12 +1320,12 @@ broker::expected RecordIterator::DoSerialize() const bool RecordIterator::DoUnserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = get_if(&data); if ( ! (v && v->size() == 2) ) return false; - auto x = caf::get_if(&(*v)[0]); - auto y = caf::get_if(&(*v)[1]); + auto x = get_if(&(*v)[0]); + auto y = get_if(&(*v)[1]); if ( ! (x && y) ) return false; @@ -1352,25 +1352,24 @@ broker::data threading_field_to_data(const threading::Field* f) threading::Field* data_to_threading_field(broker::data d) { - if ( ! caf::holds_alternative(d) ) + if ( ! holds_alternative(d) ) return nullptr; - auto& v = caf::get(d); - auto name = caf::get_if(&v[0]); + auto& v = get(d); + auto name = get_if(&v[0]); auto secondary = v[1]; - auto type = caf::get_if(&v[2]); - auto subtype = caf::get_if(&v[3]); - auto optional = caf::get_if(&v[4]); + auto type = get_if(&v[2]); + auto subtype = get_if(&v[3]); + auto optional = get_if(&v[4]); if ( ! (name && type && subtype && optional) ) return nullptr; - if ( secondary != broker::nil && ! caf::holds_alternative(secondary) ) + if ( secondary != broker::nil && ! holds_alternative(secondary) ) return nullptr; return new threading::Field( - name->c_str(), - secondary != broker::nil ? caf::get(secondary).c_str() : nullptr, + name->c_str(), secondary != broker::nil ? get(secondary).c_str() : nullptr, static_cast(*type), static_cast(*subtype), *optional); } diff --git a/src/broker/Data.h b/src/broker/Data.h index b948f3744c..4c0ce7197b 100644 --- a/src/broker/Data.h +++ b/src/broker/Data.h @@ -5,6 +5,8 @@ #include "zeek/OpaqueVal.h" #include "zeek/Reporter.h" +#include "broker/data.hh" + namespace zeek { @@ -33,8 +35,8 @@ extern OpaqueTypePtr opaque_of_record_iterator; TransportProto to_zeek_port_proto(broker::port::protocol tp); /** - * Create a Broker::Data value from a Bro value. - * @param v the Bro value to convert to a Broker data value. + * Create a Broker::Data value from a Zeek value. + * @param v the Zeek value to convert to a Broker data value. * @return a Broker::Data value, where the optional field is set if the conversion * was possible, else it is unset. */ @@ -56,17 +58,17 @@ RecordValPtr make_data_val(broker::data d); EnumValPtr get_data_type(RecordVal* v, zeek::detail::Frame* frame); /** - * Convert a Bro value to a Broker data value. - * @param v a Bro value. - * @return a Broker data value if the Bro value could be converted to one. + * Convert a Zeek value to a Broker data value. + * @param v a Zeek value. + * @return a Broker data value if the Zeek value could be converted to one. */ broker::expected val_to_data(const Val* v); /** - * Convert a Broker data value to a Bro value. + * Convert a Broker data value to a Zeek value. * @param d a Broker data value. * @param type the expected type of the value to return. - * @return a pointer to a new Bro value or a nullptr if the conversion was not + * @return a pointer to a new Zeek value or a nullptr if the conversion was not * possible. */ ValPtr data_to_val(broker::data d, Type* type); @@ -87,7 +89,7 @@ broker::data threading_field_to_data(const threading::Field* f); threading::Field* data_to_threading_field(broker::data d); /** - * A Bro value which wraps a Broker data value. + * A Zeek value which wraps a Broker data value. */ class DataVal : public OpaqueVal { @@ -99,7 +101,7 @@ public: ValPtr castTo(zeek::Type* t); bool canCastTo(zeek::Type* t) const; - // Returns the Bro type that scripts use to represent a Broker data + // Returns the Zeek type that scripts use to represent a Broker data // instance. This may be wrapping the opaque value inside another // type. static const TypePtr& ScriptDataType(); @@ -160,7 +162,7 @@ struct type_name_getter }; /** - * Retrieve Broker data value associated with a Broker::Data Bro value. + * Retrieve Broker data value associated with a Broker::Data Zeek value. * @param v a Broker::Data value. * @param f used to get location information on error. * @return a reference to the wrapped Broker data value. A runtime interpreter @@ -172,7 +174,7 @@ broker::data& opaque_field_to_data(zeek::RecordVal* v, zeek::detail::Frame* f); * Retrieve variant data from a Broker data value. * @tparam T a type that the variant may contain. * @param d a Broker data value to get variant data out of. - * @param tag a Bro tag which corresponds to T (just used for error reporting). + * @param tag a Zeek tag which corresponds to T (just used for error reporting). * @param f used to get location information on error. * @return a refrence to the requested type in the variant Broker data. * A runtime interpret exception is thrown if trying to access a type which @@ -181,10 +183,10 @@ broker::data& opaque_field_to_data(zeek::RecordVal* v, zeek::detail::Frame* f); template T& require_data_type(broker::data& d, zeek::TypeTag tag, zeek::detail::Frame* f) { - auto ptr = caf::get_if(&d); + auto ptr = broker::get_if(&d); if ( ! ptr ) zeek::reporter->RuntimeError(f->GetCallLocation(), "data is of type '%s' not of type '%s'", - caf::visit(type_name_getter{tag}, d), zeek::type_name(tag)); + visit(type_name_getter{tag}, d), zeek::type_name(tag)); return *ptr; } diff --git a/src/broker/Manager.cc b/src/broker/Manager.cc index d4a8445c37..f3c6a64ee3 100644 --- a/src/broker/Manager.cc +++ b/src/broker/Manager.cc @@ -1,6 +1,7 @@ #include "zeek/broker/Manager.h" #include +#include #include #include #include @@ -27,6 +28,58 @@ using namespace std; +namespace + { + +void print_escaped(std::string& buf, std::string_view str) + { + buf.push_back('"'); + for ( auto c : str ) + { + switch ( c ) + { + default: + buf.push_back(c); + break; + case '\\': + buf.push_back('\\'); + buf.push_back('\\'); + break; + case '\b': + buf.push_back('\\'); + buf.push_back('b'); + break; + case '\f': + buf.push_back('\\'); + buf.push_back('f'); + break; + case '\n': + buf.push_back('\\'); + buf.push_back('n'); + break; + case '\r': + buf.push_back('\\'); + buf.push_back('r'); + break; + case '\t': + buf.push_back('\\'); + buf.push_back('t'); + break; + case '\v': + buf.push_back('\\'); + buf.push_back('v'); + break; + case '"': + buf.push_back('\\'); + buf.push_back('"'); + break; + } + } + buf.push_back('"'); + } + + } // namespace + namespace zeek::Broker { @@ -90,10 +143,10 @@ namespace struct opt_mapping { broker::configuration* cfg; - std::string_view broker_name; + std::string broker_name; const char* zeek_name; - template auto broker_read() { return caf::get_as(*cfg, broker_name); } + template auto broker_read() { return broker::get_as(*cfg, broker_name); } template auto broker_write(T&& val) { cfg->set(broker_name, std::forward(val)); } @@ -107,23 +160,10 @@ struct opt_mapping } // namespace -class BrokerConfig : public broker::configuration - { -public: - BrokerConfig(broker::broker_options options) : broker::configuration(options) - { - openssl_cafile = get_option("Broker::ssl_cafile")->AsString()->CheckString(); - openssl_capath = get_option("Broker::ssl_capath")->AsString()->CheckString(); - openssl_certificate = get_option("Broker::ssl_certificate")->AsString()->CheckString(); - openssl_key = get_option("Broker::ssl_keyfile")->AsString()->CheckString(); - openssl_passphrase = get_option("Broker::ssl_passphrase")->AsString()->CheckString(); - } - }; - class BrokerState { public: - BrokerState(BrokerConfig config, size_t congestion_queue_size) + BrokerState(broker::configuration config, size_t congestion_queue_size) : endpoint(std::move(config)), subscriber(endpoint.make_subscriber({broker::topic::statuses(), broker::topic::errors()}, congestion_queue_size)) @@ -235,10 +275,17 @@ void Manager::InitPostScript() broker::broker_options options; options.disable_ssl = get_option("Broker::disable_ssl")->AsBool(); - options.forward = get_option("Broker::forward_messages")->AsBool(); + options.skip_ssl_init = true; + options.disable_forwarding = ! get_option("Broker::forward_messages")->AsBool(); options.use_real_time = use_real_time; - BrokerConfig config{std::move(options)}; + broker::configuration config{std::move(options)}; + + config.openssl_cafile(get_option("Broker::ssl_cafile")->AsString()->CheckString()); + config.openssl_capath(get_option("Broker::ssl_capath")->AsString()->CheckString()); + config.openssl_certificate(get_option("Broker::ssl_certificate")->AsString()->CheckString()); + config.openssl_key(get_option("Broker::ssl_keyfile")->AsString()->CheckString()); + config.openssl_passphrase(get_option("Broker::ssl_passphrase")->AsString()->CheckString()); auto scheduler_policy = get_option("Broker::scheduler_policy")->AsString()->CheckString(); @@ -257,11 +304,11 @@ void Manager::InitPostScript() config.set("caf.scheduler.max-threads", get_option("Broker::max_threads")->AsCount()); config.set("caf.work-stealing.moderate-sleep-duration", - caf::timespan(static_cast( + broker::timespan(static_cast( get_option("Broker::moderate_sleep")->AsInterval() * 1e9))); config.set("caf.work-stealing.relaxed-sleep-duration", - caf::timespan( + broker::timespan( static_cast(get_option("Broker::relaxed_sleep")->AsInterval() * 1e9))); config.set("caf.work-stealing.aggressive-poll-attempts", @@ -363,6 +410,8 @@ void Manager::InitPostScript() bstate->subscriber.add_topic(broker::topic::store_events(), true); + telemetry_mgr->InitPostBrokerSetup(bstate->endpoint); + InitializeBrokerStoreForwarding(); } @@ -426,10 +475,6 @@ void Manager::Terminate() CloseStore(x); FlushLogBuffers(); - - for ( auto& p : bstate->endpoint.peers() ) - if ( p.peer.network ) - bstate->endpoint.unpeer(p.peer.network->address, p.peer.network->port); } bool Manager::Active() @@ -506,7 +551,8 @@ void Manager::Peer(const string& addr, uint16_t port, double retry) if ( bstate->endpoint.is_shutdown() ) return; - DBG_LOG(DBG_BROKER, "Starting to peer with %s:%" PRIu16, addr.c_str(), port); + DBG_LOG(DBG_BROKER, "Starting to peer with %s:%" PRIu16 " (retry: %fs)", addr.c_str(), port, + retry); auto e = getenv("ZEEK_DEFAULT_CONNECT_RETRY"); @@ -1030,7 +1076,7 @@ void Manager::DispatchMessage(const broker::topic& topic, broker::data msg) if ( ! batch.valid() ) { reporter->Warning("received invalid broker Batch: %s", - broker::to_string(batch).data()); + broker::to_string(batch.as_data()).data()); return; } @@ -1343,17 +1389,35 @@ void Manager::ProcessEvent(const broker::topic& topic, broker::zeek::Event ev) { auto got_type = args[i].get_type_name(); const auto& expected_type = arg_types[i]; - auto val = detail::data_to_val(std::move(args[i]), expected_type.get()); + auto val = detail::data_to_val(args[i], expected_type.get()); if ( val ) vl.emplace_back(std::move(val)); else { auto expected_name = type_name(expected_type->Tag()); + std::string msg_addl = util::fmt("got %s, expected %s", got_type, expected_name); - reporter->Warning("failed to convert remote event '%s' arg #%zu," - " got %s, expected %s", - name.data(), i, got_type, expected_name); + if ( strcmp(expected_name, "record") == 0 && strcmp("vector", got_type) == 0 ) + { + // This means the vector elements didn't align with the record + // fields. Produce an error message that shows what we + // received. + std::string elements; + for ( const auto& e : broker::get(args[i]) ) + { + if ( ! elements.empty() ) + elements += ", "; + + elements += e.get_type_name(); + } + + msg_addl = util::fmt("got mismatching field types [%s] for record type '%s'", + elements.c_str(), expected_type->GetName().c_str()); + } + + reporter->Warning("failed to convert remote event '%s' arg #%zu, %s", name.data(), i, + msg_addl.c_str()); // If we got a vector and expected a function this is // possibly because of a mismatch between @@ -1376,7 +1440,8 @@ bool Manager::ProcessLogCreate(broker::zeek::LogCreate lc) DBG_LOG(DBG_BROKER, "Received log-create: %s", RenderMessage(lc.as_data()).c_str()); if ( ! lc.valid() ) { - reporter->Warning("received invalid broker LogCreate: %s", broker::to_string(lc).data()); + reporter->Warning("received invalid broker LogCreate: %s", + broker::to_string(lc.as_data()).data()); return false; } @@ -1402,7 +1467,7 @@ bool Manager::ProcessLogCreate(broker::zeek::LogCreate lc) } // Get log fields. - auto fields_data = caf::get_if(&lc.fields_data()); + auto fields_data = get_if(&lc.fields_data()); if ( ! fields_data ) { @@ -1442,7 +1507,8 @@ bool Manager::ProcessLogWrite(broker::zeek::LogWrite lw) if ( ! lw.valid() ) { - reporter->Warning("received invalid broker LogWrite: %s", broker::to_string(lw).data()); + reporter->Warning("received invalid broker LogWrite: %s", + broker::to_string(lw.as_data()).data()); return false; } @@ -1467,7 +1533,7 @@ bool Manager::ProcessLogWrite(broker::zeek::LogWrite lw) return false; } - auto path = caf::get_if(&lw.path()); + auto path = get_if(&lw.path()); if ( ! path ) { @@ -1476,7 +1542,7 @@ bool Manager::ProcessLogWrite(broker::zeek::LogWrite lw) return false; } - auto serial_data = caf::get_if(&lw.serial_data()); + auto serial_data = get_if(&lw.serial_data()); if ( ! serial_data ) { @@ -1531,7 +1597,7 @@ bool Manager::ProcessIdentifierUpdate(broker::zeek::IdentifierUpdate iu) if ( ! iu.valid() ) { reporter->Warning("received invalid broker IdentifierUpdate: %s", - broker::to_string(iu).data()); + broker::to_string(iu.as_data()).data()); return false; } @@ -1589,6 +1655,14 @@ void Manager::ProcessStatus(broker::status_view stat) event = ::Broker::peer_lost; break; + case broker::sc::endpoint_discovered: + event = ::Broker::endpoint_discovered; + break; + + case broker::sc::endpoint_unreachable: + event = ::Broker::endpoint_unreachable; + break; + default: reporter->Warning("Unhandled Broker status: %s", to_string(stat).data()); break; @@ -1655,12 +1729,12 @@ void Manager::ProcessError(broker::error_view err) if ( auto ctx = err.context() ) { msg += '('; - msg += to_string(ctx->node); + msg += broker::to_string(ctx->node); msg += ", "; - msg += caf::deep_to_string(ctx->network); + msg += broker::to_string(ctx->network); msg += ", "; if ( auto what = err.message() ) - msg += caf::deep_to_string(*what); + print_escaped(msg, *what); else msg += R"_("")_"; msg += ')'; @@ -1761,7 +1835,10 @@ detail::StoreHandleVal* Manager::MakeMaster(const string& name, broker::backend Ref(handle); data_stores.emplace(name, handle); - iosource_mgr->RegisterFd(handle->proxy.mailbox().descriptor(), this); + if ( ! iosource_mgr->RegisterFd(handle->proxy.mailbox().descriptor(), this) ) + reporter->FatalError( + "Failed to register broker master mailbox descriptor with iosource_mgr"); + PrepareForwarding(name); if ( ! bstate->endpoint.use_real_time() ) @@ -1783,7 +1860,7 @@ void Manager::BrokerStoreToZeekTable(const std::string& name, const detail::Stor if ( ! keys ) return; - auto set = caf::get_if(&(keys->get_data())); + auto set = get_if(&(keys->get_data())); auto table = handle->forward_to; const auto& its = table->GetType()->AsTableType()->GetIndexTypes(); bool is_set = table->GetType()->IsSet(); @@ -1866,7 +1943,9 @@ detail::StoreHandleVal* Manager::MakeClone(const string& name, double resync_int Ref(handle); data_stores.emplace(name, handle); - iosource_mgr->RegisterFd(handle->proxy.mailbox().descriptor(), this); + if ( ! iosource_mgr->RegisterFd(handle->proxy.mailbox().descriptor(), this) ) + reporter->FatalError( + "Failed to register broker clone mailbox descriptor with iosource_mgr"); PrepareForwarding(name); return handle; } @@ -1982,53 +2061,4 @@ void Manager::SetMetricsExportPrefixes(std::vector filter) bstate->endpoint.metrics_exporter().set_prefixes(std::move(filter)); } -std::unique_ptr Manager::NewTelemetryManager() - { - // The telemetry Manager actually only has a dependency on the actor system, - // not to the Broker Manager. By having the telemetry Manager hold on to a - // shared_ptr to our Broker state, we make sure the Broker endpoint, which - // owns the CAF actor system, lives for as long as necessary. This also - // makes sure that the Broker Manager may even get destroyed before the - // telemetry Manager. - struct TM final : public telemetry::Manager - { - using MetricRegistryPtr = std::unique_ptr; - - static auto getPimpl(BrokerState& st) - { - auto registry = std::addressof(st.endpoint.system().metrics()); - return reinterpret_cast(registry); - } - - static auto getPimpl(MetricRegistryPtr& ptr) - { - return reinterpret_cast(ptr.get()); - } - - explicit TM(Broker::Manager* parent, MetricRegistryPtr ptr) - : telemetry::Manager(getPimpl(ptr)), parent(parent), tmp(std::move(ptr)) - { - assert(tmp != nullptr); - assert(parent != nullptr); - } - - void InitPostScript() override - { - assert(parent->bstate != nullptr); - ptr = parent->bstate; - auto registry = std::addressof(ptr->endpoint.system().metrics()); - registry->merge(*tmp); - tmp.reset(); - pimpl = reinterpret_cast(registry); - } - - Broker::Manager* parent; - MetricRegistryPtr tmp; - std::shared_ptr ptr; - }; - - auto tmp = std::make_unique(); - return std::make_unique(this, std::move(tmp)); - } - } // namespace zeek::Broker diff --git a/src/broker/Manager.h b/src/broker/Manager.h index 867180e7e9..8e03474c41 100644 --- a/src/broker/Manager.h +++ b/src/broker/Manager.h @@ -8,10 +8,6 @@ #include #include #include -#include -#include -#include -#include #include #include #include @@ -77,7 +73,7 @@ struct Stats }; /** - * Manages various forms of communication between peer Bro processes + * Manages various forms of communication between peer Zeek processes * or other external applications via use of the Broker messaging library. */ class Manager : public iosource::IOSource @@ -96,7 +92,7 @@ public: ~Manager() override; /** - * Initialization of the manager. This is called late during Bro's + * Initialization of the manager. This is called late during Zeek's * initialization after any scripts are processed. */ void InitPostScript(); @@ -223,7 +219,7 @@ public: * @param topic a topic string associated with the event message. * Peers advertise interest by registering a subscription to some prefix * of this topic name. - * @param event a Bro event value. + * @param event a Zeek event value. * @return true if automatic event sending is now enabled. */ bool AutoPublishEvent(std::string topic, Val* event); @@ -396,11 +392,6 @@ public: */ void SetMetricsExportPrefixes(std::vector filter); - /** - * Allocates a new manager for telemetry data. - */ - std::unique_ptr NewTelemetryManager(); - private: void DispatchMessage(const broker::topic& topic, broker::data msg); // Process events used for Broker store backed zeek tables diff --git a/src/broker/Store.h b/src/broker/Store.h index b81eec89cc..10ebad95e5 100644 --- a/src/broker/Store.h +++ b/src/broker/Store.h @@ -52,9 +52,9 @@ inline RecordValPtr query_result(RecordValPtr data) * @param e: expire interval as double; 0 if no expiry * @return expire interval in Broker format */ -static broker::optional convert_expiry(double e) +static std::optional convert_expiry(double e) { - broker::optional ts; + std::optional ts; if ( e ) { @@ -122,7 +122,7 @@ public: broker::store store; broker::store::proxy proxy; - broker::publisher_id store_pid; + broker::entity_id store_pid; // Zeek table that events are forwarded to. TableValPtr forward_to; bool have_store = false; diff --git a/src/broker/comm.bif b/src/broker/comm.bif index 2b6d4be802..e4552d84f5 100644 --- a/src/broker/comm.bif +++ b/src/broker/comm.bif @@ -19,6 +19,12 @@ event Broker::peer_removed%(endpoint: EndpointInfo, msg: string%); ## Generated when an existing peering has been lost. event Broker::peer_lost%(endpoint: EndpointInfo, msg: string%); +## Generated when a new Broker endpoint appeared. +event Broker::endpoint_discovered%(endpoint: EndpointInfo, msg: string%); + +## Generated when the last path to a Broker endpoint has been lost. +event Broker::endpoint_unreachable%(endpoint: EndpointInfo, msg: string%); + ## Generated when an error occurs in the Broker sub-system. event Broker::error%(code: ErrorCode, msg: string%); diff --git a/src/broker/data.bif b/src/broker/data.bif index f671702bf2..85b34aa991 100644 --- a/src/broker/data.bif +++ b/src/broker/data.bif @@ -471,7 +471,7 @@ function Broker::__record_lookup%(r: Broker::Data, idx: count%): Broker::Data auto& v = zeek::Broker::detail::require_data_type(r->AsRecordVal(), zeek::TYPE_RECORD, frame); - if ( idx >= v.size() || caf::get_if(&v[idx]) ) + if ( idx >= v.size() || broker::get_if(&v[idx]) ) return zeek::make_intrusive(zeek::BifType::Record::Broker::Data); return zeek::Broker::detail::make_data_val(v[idx]); @@ -510,7 +510,7 @@ function Broker::__record_iterator_value%(it: opaque of Broker::RecordIterator%) return rval; } - if ( caf::get_if(&(*ri->it)) ) + if ( broker::get_if(&(*ri->it)) ) return rval; // field isn't set rval->Assign(0, zeek::make_intrusive(*ri->it)); diff --git a/src/broker/store.bif b/src/broker/store.bif index 87f614f373..d29a44191d 100644 --- a/src/broker/store.bif +++ b/src/broker/store.bif @@ -100,6 +100,11 @@ function Broker::__close%(h: opaque of Broker::Store%): bool return val_mgr->False(); } + if ( ! handle->have_store ) + { + return val_mgr->False(); + } + return zeek::val_mgr->Bool(broker_mgr->CloseStore(handle->store.name())); %} @@ -325,6 +330,12 @@ function Broker::__keys%(h: opaque of Broker::Store%): Broker::QueryResult return zeek::Broker::detail::query_result(); } + if ( ! handle->have_store ) + { + zeek::emit_builtin_error("cannot retrieve keys from a closed store", h); + return zeek::Broker::detail::query_result(); + } + auto trigger = frame->GetTrigger(); if ( ! trigger ) diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 5f48fddec7..30a78fef7c 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -49,13 +49,13 @@ public: /** * First-stage initializion of the manager. This is called early on - * during Bro's initialization, before any scripts are processed. + * during Zeek's initialization, before any scripts are processed. */ void InitPreScript(); /** * Second-stage initialization of the manager. This is called late - * during Bro's initialization after any scripts are processed. + * during Zeek's initialization after any scripts are processed. */ void InitPostScript(); @@ -384,7 +384,7 @@ protected: * Evaluate timeout policy for a file and remove the File object mapped to * \a file_id if needed. * @param file_id the file identifier/hash. - * @param is_termination whether the Manager (and probably Bro) is in a + * @param is_termination whether the Manager (and probably Zeek) is in a * terminating state. If true, then the timeout cannot be postponed. */ void Timeout(const std::string& file_id, bool is_terminating = run_state::terminating); diff --git a/src/file_analysis/analyzer/x509/X509.cc b/src/file_analysis/analyzer/x509/X509.cc index e5841cca33..29e5d951a8 100644 --- a/src/file_analysis/analyzer/x509/X509.cc +++ b/src/file_analysis/analyzer/x509/X509.cc @@ -623,7 +623,7 @@ broker::expected X509Val::DoSerialize() const bool X509Val::DoUnserialize(const broker::data& data) { - auto s = caf::get_if(&data); + auto s = broker::get_if(&data); if ( ! s ) return false; diff --git a/src/file_analysis/analyzer/x509/X509.h b/src/file_analysis/analyzer/x509/X509.h index 033f86ab83..d4ad9b02d4 100644 --- a/src/file_analysis/analyzer/x509/X509.h +++ b/src/file_analysis/analyzer/x509/X509.h @@ -150,8 +150,8 @@ private: /** * This class wraps an OpenSSL X509 data structure. * - * We need these to be able to pass OpenSSL pointers around in Bro - * script-land. Otherwise, we cannot verify certificates from Bro + * We need these to be able to pass OpenSSL pointers around in Zeek + * script-land. Otherwise, we cannot verify certificates from Zeek * scriptland */ class X509Val : public OpaqueVal diff --git a/src/fuzzers/CMakeLists.txt b/src/fuzzers/CMakeLists.txt index ac8b6ca345..0b23958b3a 100644 --- a/src/fuzzers/CMakeLists.txt +++ b/src/fuzzers/CMakeLists.txt @@ -78,5 +78,6 @@ target_link_libraries(zeek_fuzzer_shared ${zeek_fuzzer_shared_deps} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) +add_fuzz_target(dns) add_fuzz_target(pop3) add_fuzz_target(packet) diff --git a/src/fuzzers/dns-corpus.zip b/src/fuzzers/dns-corpus.zip new file mode 100644 index 0000000000..a67cdaff32 Binary files /dev/null and b/src/fuzzers/dns-corpus.zip differ diff --git a/src/fuzzers/dns-fuzzer.cc b/src/fuzzers/dns-fuzzer.cc new file mode 100644 index 0000000000..c87781ea14 --- /dev/null +++ b/src/fuzzers/dns-fuzzer.cc @@ -0,0 +1,63 @@ +#include + +#include "zeek/Conn.h" +#include "zeek/RunState.h" +#include "zeek/analyzer/Analyzer.h" +#include "zeek/analyzer/Manager.h" +#include "zeek/analyzer/protocol/pia/PIA.h" +#include "zeek/analyzer/protocol/tcp/TCP.h" +#include "zeek/fuzzers/FuzzBuffer.h" +#include "zeek/fuzzers/fuzzer-setup.h" +#include "zeek/packet_analysis/protocol/tcp/TCPSessionAdapter.h" +#include "zeek/session/Manager.h" + +static constexpr auto ZEEK_FUZZ_ANALYZER = "dns"; + +static zeek::Connection* add_connection() + { + static constexpr double network_time_start = 1439471031; + zeek::run_state::detail::update_network_time(network_time_start); + + zeek::Packet p; + zeek::ConnTuple conn_id; + conn_id.src_addr = zeek::IPAddr("1.2.3.4"); + conn_id.dst_addr = zeek::IPAddr("5.6.7.8"); + conn_id.src_port = htons(23132); + conn_id.dst_port = htons(80); + conn_id.is_one_way = false; + conn_id.proto = TRANSPORT_TCP; + zeek::detail::ConnKey key(conn_id); + zeek::Connection* conn = new zeek::Connection(key, network_time_start, &conn_id, 1, &p); + conn->SetTransport(TRANSPORT_TCP); + zeek::session_mgr->Insert(conn); + return conn; + } + +static zeek::analyzer::Analyzer* add_analyzer(zeek::Connection* conn) + { + auto* tcp = new zeek::packet_analysis::TCP::TCPSessionAdapter(conn); + auto* pia = new zeek::analyzer::pia::PIA_TCP(conn); + auto a = zeek::analyzer_mgr->InstantiateAnalyzer(ZEEK_FUZZ_ANALYZER, conn); + tcp->AddChildAnalyzer(a); + tcp->AddChildAnalyzer(pia->AsAnalyzer()); + conn->SetSessionAdapter(tcp, pia); + return a; + } + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { + auto conn = add_connection(); + auto a = add_analyzer(conn); + + try + { + a->DeliverPacket(size, data, true, -1, nullptr, size); + } + catch ( const binpac::Exception& e ) + { + } + + zeek::event_mgr.Drain(); + zeek::detail::fuzzer_cleanup_one_input(); + return 0; + } diff --git a/src/input/Manager.cc b/src/input/Manager.cc index 7dbef26ebb..119834b7d6 100644 --- a/src/input/Manager.cc +++ b/src/input/Manager.cc @@ -30,7 +30,7 @@ namespace zeek::input /** * InputHashes are used as Dictionaries to store the value and index hashes * for all lines currently stored in a table. Index hash is stored as - * HashKey*, because it is thrown into other Bro functions that need the + * HashKey*, because it is thrown into other Zeek functions that need the * complex structure of it. For everything we do (with values), we just take * the hash_t value and compare it directly with "==" */ @@ -1473,8 +1473,9 @@ void Manager::SendEndOfData(const Stream* i) #ifdef DEBUG DBG_LOG(DBG_INPUT, "SendEndOfData for stream %s", i->name.c_str()); #endif - SendEvent(end_of_data, 2, new StringVal(i->name.c_str()), - new StringVal(i->reader->Info().source)); + auto name = make_intrusive(i->name.c_str()); + auto source = make_intrusive(i->reader->Info().source); + SendEvent(end_of_data, 2, name->Ref(), source->Ref()); if ( i->stream_type == ANALYSIS_STREAM ) file_mgr->EndOfFile(static_cast(i)->file_id); @@ -1889,7 +1890,7 @@ void Manager::SendEvent(EventHandlerPtr ev, list events) const event_mgr.Enqueue(ev, std::move(vl), util::detail::SOURCE_LOCAL); } -// Convert a bro list value to a bro record value. +// Convert a Zeek list value to a Zeek record value. // I / we could think about moving this functionality to val.cc RecordVal* Manager::ListValToRecordVal(ListVal* list, RecordType* request_type, int* position) const { @@ -2067,7 +2068,7 @@ int Manager::GetValueLength(const Value* val) const } // Given a threading::value, copy the raw data bytes into *data and return how many bytes were -// copied. Used for hashing the values for lookup in the bro table +// copied. Used for hashing the values for lookup in the Zeek table int Manager::CopyValue(char* data, const int startpos, const Value* val) const { assert(val->present); // presence has to be checked elsewhere @@ -2243,7 +2244,7 @@ zeek::detail::HashKey* Manager::HashValues(const int num_elements, const Value* return key; } -// convert threading value to Bro value +// convert threading value to Zeek value // have_error is a reference to a boolean which is set to true as soon as an error occurs. // When have_error is set to true at the beginning of the function, it is assumed that // an error already occurred in the past and processing is aborted. @@ -2440,7 +2441,7 @@ Manager::Stream* Manager::FindStream(ReaderFrontend* reader) const return nullptr; } -// Function is called on Bro shutdown. +// Function is called on Zeek shutdown. // Signal all frontends that they will cease operation. void Manager::Terminate() { @@ -2560,8 +2561,8 @@ void Manager::ErrorHandler(const Stream* i, ErrorType et, bool reporter_send, co __builtin_unreachable(); } - auto* message = new StringVal(buf); - SendEvent(i->error_event, 3, i->description->Ref(), message, ev.release()); + auto message = make_intrusive(buf); + SendEvent(i->error_event, 3, i->description->Ref(), message->Ref(), ev.release()); } if ( reporter_send ) diff --git a/src/input/Manager.h b/src/input/Manager.h index 2928b4f23d..4d875ebe12 100644 --- a/src/input/Manager.h +++ b/src/input/Manager.h @@ -99,12 +99,12 @@ public: bool RemoveStream(const std::string& id); /** - * Signals the manager to shutdown at Bro's termination. + * Signals the manager to shutdown at Zeek's termination. */ void Terminate(); /** - * Checks if a Bro type can be used for data reading. Note that + * Checks if a Zeek type can be used for data reading. Note that * this function only applies to input streams; the logging framework * has an equivalent function; however we support logging of a wider * variety of types (e.g. functions). @@ -229,11 +229,11 @@ private: // startpos. int CopyValue(char* data, const int startpos, const threading::Value* val) const; - // Convert Threading::Value to an internal Bro Type (works with Records). + // Convert Threading::Value to an internal Zeek Type (works with Records). Val* ValueToVal(const Stream* i, const threading::Value* val, Type* request_type, bool& have_error) const; - // Convert Threading::Value to an internal Bro list type. + // Convert Threading::Value to an internal Zeek list type. Val* ValueToIndexVal(const Stream* i, int num_fields, const RecordType* type, const threading::Value* const* vals, bool& have_error) const; @@ -244,7 +244,7 @@ private: Val* RecordValToIndexVal(RecordVal* r) const; - // Converts a Bro ListVal to a RecordVal given the record type. + // Converts a Zeek ListVal to a RecordVal given the record type. RecordVal* ListValToRecordVal(ListVal* list, RecordType* request_type, int* position) const; // Internally signal errors, warnings, etc. diff --git a/src/input/ReaderBackend.h b/src/input/ReaderBackend.h index 60bc8690da..319a114dcd 100644 --- a/src/input/ReaderBackend.h +++ b/src/input/ReaderBackend.h @@ -300,7 +300,7 @@ protected: void Put(threading::Value** val); /** - * Method allowing a reader to delete a specific value from a Bro + * Method allowing a reader to delete a specific value from a Zeek * table. * * If the receiving stream is an event stream, only a removed event @@ -312,7 +312,7 @@ protected: void Delete(threading::Value** val); /** - * Method allowing a reader to clear a Bro table. + * Method allowing a reader to clear a Zeek table. * * If the receiving stream is an event stream, this is ignored. * diff --git a/src/input/readers/config/Config.cc b/src/input/readers/config/Config.cc index fb5201fcf0..f2a2a83590 100644 --- a/src/input/readers/config/Config.cc +++ b/src/input/readers/config/Config.cc @@ -239,7 +239,7 @@ bool Config::DoUpdate() // we only send the event if the underlying value has changed. Let's check that. // (Yes, this means we keep all configuration options in memory twice - once here in - // the reader and once in memory in Bro; that is difficult to change. + // the reader and once in memory in Zeek; that is difficult to change. auto search = option_values.find(key); if ( search != option_values.end() && search->second == value ) { diff --git a/src/iosource/BPF_Program.cc b/src/iosource/BPF_Program.cc index 13f15e14d8..a712a8745b 100644 --- a/src/iosource/BPF_Program.cc +++ b/src/iosource/BPF_Program.cc @@ -63,7 +63,7 @@ namespace zeek::iosource::detail { // Simple heuristic to identify filters that always match, so that we can -// skip the filtering in that case. "ip or not ip" is Bro's default filter. +// skip the filtering in that case. "ip or not ip" is Zeek's default filter. static bool filter_matches_anything(const char* filter) { return (! filter) || strlen(filter) == 0 || strcmp(filter, "ip or not ip") == 0; diff --git a/src/iosource/Component.h b/src/iosource/Component.h index e5f4bb18ca..3f6edc3023 100644 --- a/src/iosource/Component.h +++ b/src/iosource/Component.h @@ -115,7 +115,7 @@ public: /** * Generates a human-readable description of the component. This goes - * into the output of \c "bro -NN". + * into the output of \c "zeek -NN". */ void DoDescribe(ODesc* d) const override; @@ -164,7 +164,7 @@ public: /** * Generates a human-readable description of the component. This goes - * into the output of \c "bro -NN". + * into the output of \c "zeek -NN". */ void DoDescribe(ODesc* d) const override; diff --git a/src/iosource/IOSource.h b/src/iosource/IOSource.h index a6931120f7..16fc76a6c5 100644 --- a/src/iosource/IOSource.h +++ b/src/iosource/IOSource.h @@ -6,16 +6,26 @@ namespace zeek::iosource { /** - * Interface class for components providing/consuming data inside Bro's main + * Interface class for components providing/consuming data inside Zeek's main * loop. */ class IOSource { public: + enum ProcessFlags + { + READ = 0x01, + WRITE = 0x02 + }; + /** * Constructor. + * + * @param process_fd A flag for indicating whether the child class implements + * the ProcessFd() method. This is used by the run loop for dispatching to the + * appropriate process method. */ - IOSource() { closed = false; } + IOSource(bool process_fd = false) : implements_process_fd(process_fd) { } /** * Destructor. @@ -66,6 +76,19 @@ public: */ virtual void Process() = 0; + /** + * Optional process method that allows an IOSource to only process + * the file descriptor that is found ready and not every possible + * descriptor. If this method is implemented, true must be passed + * to the IOSource constructor via the child class. + * + * @param fd The file descriptor to process. + * @param flags Flags indicating what type of event is being + * processed. + */ + virtual void ProcessFd(int fd, int flags) { } + bool ImplementsProcessFd() const { return implements_process_fd; } + /** * Returns a descriptive tag representing the source for debugging. * @@ -84,7 +107,8 @@ protected: void SetClosed(bool is_closed) { closed = is_closed; } private: - bool closed; + bool closed = false; + bool implements_process_fd = false; }; } // namespace zeek::iosource diff --git a/src/iosource/Manager.cc b/src/iosource/Manager.cc index 8f669512fa..6c9876c398 100644 --- a/src/iosource/Manager.cc +++ b/src/iosource/Manager.cc @@ -103,7 +103,7 @@ void Manager::Wakeup(const std::string& where) wakeup->Ping(where); } -void Manager::FindReadySources(std::vector* ready) +void Manager::FindReadySources(ReadySources* ready) { ready->clear(); @@ -155,7 +155,7 @@ void Manager::FindReadySources(std::vector* ready) if ( timeout == 0 && ! time_to_poll ) { added = true; - ready->push_back(timeout_src); + ready->push_back({timeout_src, -1, 0}); } } @@ -167,13 +167,13 @@ void Manager::FindReadySources(std::vector* ready) // Avoid calling Poll() if we can help it since on very // high-traffic networks, we spend too much time in // Poll() and end up dropping packets. - ready->push_back(pkt_src); + ready->push_back({pkt_src, -1, 0}); } else { if ( ! run_state::pseudo_realtime && ! time_to_poll ) // A pcap file is always ready to process unless it's suspended - ready->push_back(pkt_src); + ready->push_back({pkt_src, -1, 0}); } } } @@ -189,7 +189,7 @@ void Manager::FindReadySources(std::vector* ready) Poll(ready, timeout, timeout_src); } -void Manager::Poll(std::vector* ready, double timeout, IOSource* timeout_src) +void Manager::Poll(ReadySources* ready, double timeout, IOSource* timeout_src) { struct timespec kqueue_timeout; ConvertTimeout(timeout, kqueue_timeout); @@ -205,7 +205,7 @@ void Manager::Poll(std::vector* ready, double timeout, IOSource* time else if ( ret == 0 ) { if ( timeout_src ) - ready->push_back(timeout_src); + ready->push_back({timeout_src, -1, 0}); } else { @@ -217,7 +217,15 @@ void Manager::Poll(std::vector* ready, double timeout, IOSource* time { std::map::const_iterator it = fd_map.find(events[i].ident); if ( it != fd_map.end() ) - ready->push_back(it->second); + ready->push_back({it->second, static_cast(events[i].ident), + IOSource::ProcessFlags::READ}); + } + else if ( events[i].filter == EVFILT_WRITE ) + { + std::map::const_iterator it = write_fd_map.find(events[i].ident); + if ( it != write_fd_map.end() ) + ready->push_back({it->second, static_cast(events[i].ident), + IOSource::ProcessFlags::WRITE}); } } } @@ -240,41 +248,97 @@ void Manager::ConvertTimeout(double timeout, struct timespec& spec) } } -bool Manager::RegisterFd(int fd, IOSource* src) +bool Manager::RegisterFd(int fd, IOSource* src, int flags) { - struct kevent event; - EV_SET(&event, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); - int ret = kevent(event_queue, &event, 1, NULL, 0, NULL); - if ( ret != -1 ) - { - events.push_back({}); - DBG_LOG(DBG_MAINLOOP, "Registered fd %d from %s", fd, src->Tag()); - fd_map[fd] = src; + std::vector new_events; - Wakeup("RegisterFd"); - return true; - } - else + if ( (flags & IOSource::READ) != 0 ) { - reporter->Error("Failed to register fd %d from %s: %s", fd, src->Tag(), strerror(errno)); - return false; + if ( fd_map.count(fd) == 0 ) + { + new_events.push_back({}); + EV_SET(&(new_events.back()), fd, EVFILT_READ, EV_ADD, 0, 0, NULL); + } } + if ( (flags & IOSource::WRITE) != 0 ) + { + if ( write_fd_map.count(fd) == 0 ) + { + new_events.push_back({}); + EV_SET(&(new_events.back()), fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); + } + } + + if ( ! new_events.empty() ) + { + int ret = kevent(event_queue, new_events.data(), new_events.size(), NULL, 0, NULL); + if ( ret != -1 ) + { + DBG_LOG(DBG_MAINLOOP, "Registered fd %d from %s", fd, src->Tag()); + for ( const auto& a : new_events ) + events.push_back({}); + + if ( (flags & IOSource::READ) != 0 ) + fd_map[fd] = src; + if ( (flags & IOSource::WRITE) != 0 ) + write_fd_map[fd] = src; + + Wakeup("RegisterFd"); + return true; + } + else + { + reporter->Error("Failed to register fd %d from %s: %s (flags %d)", fd, src->Tag(), + strerror(errno), flags); + return false; + } + } + + return true; } -bool Manager::UnregisterFd(int fd, IOSource* src) +bool Manager::UnregisterFd(int fd, IOSource* src, int flags) { - if ( fd_map.find(fd) != fd_map.end() ) + std::vector new_events; + + if ( (flags & IOSource::READ) != 0 ) { - struct kevent event; - EV_SET(&event, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); - int ret = kevent(event_queue, &event, 1, NULL, 0, NULL); + if ( fd_map.count(fd) != 0 ) + { + new_events.push_back({}); + EV_SET(&(new_events.back()), fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); + } + } + if ( (flags & IOSource::WRITE) != 0 ) + { + if ( write_fd_map.count(fd) != 0 ) + { + new_events.push_back({}); + EV_SET(&(new_events.back()), fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); + } + } + + if ( ! new_events.empty() ) + { + int ret = kevent(event_queue, new_events.data(), new_events.size(), NULL, 0, NULL); if ( ret != -1 ) + { DBG_LOG(DBG_MAINLOOP, "Unregistered fd %d from %s", fd, src->Tag()); + for ( const auto& a : new_events ) + events.pop_back(); - fd_map.erase(fd); + if ( (flags & IOSource::READ) != 0 ) + fd_map.erase(fd); + if ( (flags & IOSource::WRITE) != 0 ) + write_fd_map.erase(fd); - Wakeup("UnregisterFd"); - return true; + Wakeup("UnregisterFd"); + return true; + } + + // We don't care about failure here. If it failed to unregister, it's likely because + // the file descriptor was already closed, and kqueue already automatically removed + // it. } else { @@ -282,6 +346,8 @@ bool Manager::UnregisterFd(int fd, IOSource* src) src->Tag()); return false; } + + return true; } void Manager::Register(IOSource* src, bool dont_count, bool manage_lifetime) diff --git a/src/iosource/Manager.h b/src/iosource/Manager.h index cb8af3fe4c..1e2bbbface 100644 --- a/src/iosource/Manager.h +++ b/src/iosource/Manager.h @@ -29,6 +29,15 @@ class PktDumper; class Manager { public: + struct ReadySource + { + IOSource* src = nullptr; + int fd = -1; + int flags = 0; + }; + + using ReadySources = std::vector; + /** * Constructor. */ @@ -85,7 +94,7 @@ public: /** * Opens a new packet source. * - * @param path The interface or file name, as one would give to Bro \c -i. + * @param path The interface or file name, as one would give to zeek \c -i. * * @param is_live True if \a path represents a live interface, false * for a file. @@ -101,7 +110,7 @@ public: * * @param append True to append if \a path already exists. * - * @return The new packet dumper, or null if an error occured. + * @return The new packet dumper, or null if an error occurred. */ PktDumper* OpenPktDumper(const std::string& path, bool append); @@ -110,22 +119,25 @@ public: * * @param ready A vector used to return the set of sources that are ready. */ - void FindReadySources(std::vector* ready); + void FindReadySources(ReadySources* ready); /** * Registers a file descriptor and associated IOSource with the manager - * to be checked during FindReadySources. + * to be checked during FindReadySources. This will register the file + * descriptor to check for read events. * * @param fd A file descriptor pointing at some resource that should be * checked for readiness. * @param src The IOSource that owns the file descriptor. + * @param flags A combination of values from IOSource::ProcessFlags for + * which modes we should register for this file descriptor. */ - bool RegisterFd(int fd, IOSource* src); + bool RegisterFd(int fd, IOSource* src, int flags = IOSource::READ); /** * Unregisters a file descriptor from the FindReadySources checks. */ - bool UnregisterFd(int fd, IOSource* src); + bool UnregisterFd(int fd, IOSource* src, int flags = IOSource::READ); /** * Forces the poll in FindReadySources to wake up immediately. This method @@ -147,7 +159,7 @@ private: * @param timeout_src The source associated with the current timeout value. * This is typically a timer manager object. */ - void Poll(std::vector* ready, double timeout, IOSource* timeout_src); + void Poll(ReadySources* ready, double timeout, IOSource* timeout_src); /** * Converts a double timeout value into a timespec struct used for calls @@ -208,6 +220,7 @@ private: int event_queue = -1; std::map fd_map; + std::map write_fd_map; // This is only used for the output of the call to kqueue in FindReadySources(). // The actual events are stored as part of the queue. diff --git a/src/iosource/PktSrc.cc b/src/iosource/PktSrc.cc index 663fec53ad..a265bd6f74 100644 --- a/src/iosource/PktSrc.cc +++ b/src/iosource/PktSrc.cc @@ -110,7 +110,7 @@ void PktSrc::Closed() void PktSrc::Error(const std::string& msg) { - // We don't report this immediately, Bro will ask us for the error + // We don't report this immediately, Zeek will ask us for the error // once it notices we aren't open. errbuf = msg; DBG_LOG(DBG_PKTIO, "Error with source %s: %s", IsOpen() ? props.path.c_str() : "", @@ -273,25 +273,33 @@ bool PktSrc::GetCurrentPacket(const Packet** pkt) double PktSrc::GetNextTimeout() { + bool pkt_available = have_packet; + + if ( props.selectable_fd == -1 || run_state::pseudo_realtime ) + pkt_available = ExtractNextPacketInternal(); + + if ( run_state::is_processing_suspended() ) + return -1; + // If there's no file descriptor for the source, which is the case for some interfaces like // myricom, we can't rely on the polling mechanism to wait for data to be available. As gross // as it is, just spin with a short timeout here so that it will continually poll the // interface. The old IOSource code had a 20 microsecond timeout between calls to select() // so just use that. if ( props.selectable_fd == -1 ) - return 0.00002; - + { + if ( ! pkt_available && ! run_state::pseudo_realtime ) + return 0.00002; + } // If we're live we want poll to do what it has to with the file descriptor. If we're not live // but we're not in pseudo-realtime mode, let the loop just spin as fast as it can. If we're // in pseudo-realtime mode, find the next time that a packet is ready and have poll block until // then. - if ( IsLive() || run_state::is_processing_suspended() ) + else if ( IsLive() ) return -1; - else if ( ! run_state::pseudo_realtime ) - return 0; - if ( ! have_packet ) - ExtractNextPacketInternal(); + if ( ! run_state::pseudo_realtime ) + return 0; // This duplicates the calculation used in run_state::check_pseudo_time(). double pseudo_time = current_packet.time - run_state::detail::first_timestamp; diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index d35a92f2cf..67e674838c 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -31,27 +31,27 @@ namespace zeek::logging struct Manager::Filter { - Val* fval; + Val* fval = nullptr; string name; - EnumVal* id; - Func* policy; - Func* path_func; + EnumVal* id = nullptr; + Func* policy = nullptr; + Func* path_func = nullptr; string path; - Val* path_val; - EnumVal* writer; - TableVal* config; - TableVal* field_name_map; + Val* path_val = nullptr; + EnumVal* writer = nullptr; + TableVal* config = nullptr; + TableVal* field_name_map = nullptr; string scope_sep; string ext_prefix; - Func* ext_func; - int num_ext_fields; - bool local; - bool remote; - double interval; - Func* postprocessor; + Func* ext_func = nullptr; + int num_ext_fields = 0; + bool local = false; + bool remote = false; + double interval = 0.0; + Func* postprocessor = nullptr; - int num_fields; - threading::Field** fields; + int num_fields = 0; + threading::Field** fields = nullptr; // Vector indexed by field number. Each element is a list of record // indices defining a path leading to the value across potential @@ -63,26 +63,26 @@ struct Manager::Filter struct Manager::WriterInfo { - EnumVal* type; - double open_time; - detail::Timer* rotation_timer; - double interval; - Func* postprocessor; - WriterFrontend* writer; - WriterBackend::WriterInfo* info; - bool from_remote; - bool hook_initialized; + EnumVal* type = nullptr; + double open_time = 0.0; + detail::Timer* rotation_timer = nullptr; + double interval = 0.0; + Func* postprocessor = nullptr; + WriterFrontend* writer = nullptr; + WriterBackend::WriterInfo* info = nullptr; + bool from_remote = false; + bool hook_initialized = false; string instantiating_filter; }; struct Manager::Stream { - EnumVal* id; - bool enabled; + EnumVal* id = nullptr; + bool enabled = false; string name; - RecordType* columns; + RecordType* columns = nullptr; EventHandlerPtr event; - Func* policy; + Func* policy = nullptr; list filters; using WriterPathPair = pair; diff --git a/src/logging/Manager.h b/src/logging/Manager.h index cd194fc797..fbdfc97e35 100644 --- a/src/logging/Manager.h +++ b/src/logging/Manager.h @@ -226,7 +226,7 @@ public: bool Flush(EnumVal* id); /** - * Signals the manager to shutdown at Bro's termination. + * Signals the manager to shutdown at Zeek's termination. */ void Terminate(); diff --git a/src/logging/WriterBackend.cc b/src/logging/WriterBackend.cc index 33c8d5e23c..e0aae0db8f 100644 --- a/src/logging/WriterBackend.cc +++ b/src/logging/WriterBackend.cc @@ -100,16 +100,16 @@ broker::data WriterBackend::WriterInfo::ToBroker() const bool WriterBackend::WriterInfo::FromBroker(broker::data d) { - if ( ! caf::holds_alternative(d) ) + if ( ! broker::is(d) ) return false; - auto v = caf::get(d); - auto bpath = caf::get_if(&v[0]); - auto brotation_base = caf::get_if(&v[1]); - auto brotation_interval = caf::get_if(&v[2]); - auto bnetwork_time = caf::get_if(&v[3]); - auto bconfig = caf::get_if(&v[4]); - auto bppf = caf::get_if(&v[5]); + auto v = broker::get(d); + auto bpath = broker::get_if(&v[0]); + auto brotation_base = broker::get_if(&v[1]); + auto brotation_interval = broker::get_if(&v[2]); + auto bnetwork_time = broker::get_if(&v[3]); + auto bconfig = broker::get_if(&v[4]); + auto bppf = broker::get_if(&v[5]); if ( ! (bpath && brotation_base && brotation_interval && bnetwork_time && bconfig && bppf) ) return false; @@ -122,8 +122,8 @@ bool WriterBackend::WriterInfo::FromBroker(broker::data d) for ( auto i : *bconfig ) { - auto k = caf::get_if(&i.first); - auto v = caf::get_if(&i.second); + auto k = broker::get_if(&i.first); + auto v = broker::get_if(&i.second); if ( ! (k && v) ) return false; diff --git a/src/logging/WriterBackend.h b/src/logging/WriterBackend.h index 30203d26da..c9278a6257 100644 --- a/src/logging/WriterBackend.h +++ b/src/logging/WriterBackend.h @@ -243,7 +243,7 @@ public: * @param close: The timestamp when the origina file was closed. * * @param terminating: True if the original rotation request occured - * due to the main Bro process shutting down. + * due to the main Zeek process shutting down. */ bool FinishedRotation(const char* new_name, const char* old_name, double open, double close, bool terminating); @@ -363,7 +363,7 @@ protected: * @param close The network time when the *current* file was closed. * * @param terminating Indicates whether the rotation request occurs - * due the main Bro prcoess terminating (and not because we've + * due the main Zeek prcoess terminating (and not because we've * reached a regularly scheduled time for rotation). */ virtual bool DoRotate(const char* rotated_path, double open, double close, diff --git a/src/logging/writers/ascii/Ascii.cc b/src/logging/writers/ascii/Ascii.cc index 0b189c75b5..269eef5277 100644 --- a/src/logging/writers/ascii/Ascii.cc +++ b/src/logging/writers/ascii/Ascii.cc @@ -391,7 +391,7 @@ bool Ascii::InitFormatter() if ( enable_utf_8 ) desc.EnableUTF8(); - // Use the default "Bro logs" format. + // Use the default "Zeek logs" format. desc.EnableEscaping(); desc.AddEscapeSequence(separator); threading::formatter::Ascii::SeparatorInfo sep_info(separator, set_separator, unset_field, diff --git a/src/module_util.h b/src/module_util.h index 31e6901b97..111790649b 100644 --- a/src/module_util.h +++ b/src/module_util.h @@ -1,5 +1,5 @@ // -// These functions are used by both Bro and bifcl. +// These functions are used by both Zeek and bifcl. // #pragma once diff --git a/src/packet_analysis/Analyzer.cc b/src/packet_analysis/Analyzer.cc index f55e948fed..aa5c500b3d 100644 --- a/src/packet_analysis/Analyzer.cc +++ b/src/packet_analysis/Analyzer.cc @@ -182,7 +182,7 @@ void Analyzer::AnalyzerConfirmation(session::Session* session, zeek::Tag arg_tag } void Analyzer::AnalyzerViolation(const char* reason, session::Session* session, const char* data, - int len) + int len, zeek::Tag arg_tag) { if ( ! analyzer_violation ) return; @@ -201,7 +201,7 @@ void Analyzer::AnalyzerViolation(const char* reason, session::Session* session, else r = make_intrusive(reason); - const auto& tval = tag.AsVal(); + const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal(); event_mgr.Enqueue(analyzer_violation, session->GetVal(), tval, val_mgr->Count(0), std::move(r)); } diff --git a/src/packet_analysis/Analyzer.h b/src/packet_analysis/Analyzer.h index d778128b7c..63a1a14953 100644 --- a/src/packet_analysis/Analyzer.h +++ b/src/packet_analysis/Analyzer.h @@ -141,7 +141,7 @@ public: virtual void AnalyzerConfirmation(session::Session* session, zeek::Tag tag = zeek::Tag()); /** - * Signals Bro's protocol detection that the analyzer has found a + * Signals Zeek's protocol detection that the analyzer has found a * severe protocol violation that could indicate that it's not * parsing the expected protocol. This turns into \c * analyzer_violation events at the script-layer (one such event is @@ -156,7 +156,8 @@ public: * @param len If \a data is given, the length of it. */ virtual void AnalyzerViolation(const char* reason, session::Session* session, - const char* data = nullptr, int len = 0); + const char* data = nullptr, int len = 0, + zeek::Tag tag = zeek::Tag()); /** * Returns true if ProtocolConfirmation() has been called at least diff --git a/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc b/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc index 3e04adb844..d0f4e6cb21 100644 --- a/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc +++ b/src/packet_analysis/protocol/tcp/TCPSessionAdapter.cc @@ -566,7 +566,7 @@ void TCPSessionAdapter::Process(bool is_orig, const struct tcphdr* tp, int len, if ( seq_underflow && ! flags.RST() ) // Can't tell if if this is a retransmit/out-of-order or something - // before the sequence Bro initialized the endpoint at or the TCP is + // before the sequence Zeek initialized the endpoint at or the TCP is // just broken and sending garbage sequences. In either case, some // standard analysis doesn't apply (e.g. reassembly). Weird("TCP_seq_underflow_or_misorder"); diff --git a/src/parse.y b/src/parse.y index b52cf95a93..240d6017dd 100644 --- a/src/parse.y +++ b/src/parse.y @@ -5,7 +5,7 @@ // Switching parser table type fixes ambiguity problems. %define lr.type ielr -%expect 140 +%expect 196 %token TOK_ADD TOK_ADD_TO TOK_ADDR TOK_ANY %token TOK_ATENDIF TOK_ATELSE TOK_ATIF TOK_ATIFDEF TOK_ATIFNDEF @@ -18,7 +18,7 @@ %token TOK_PORT TOK_PRINT TOK_RECORD TOK_REDEF %token TOK_REMOVE_FROM TOK_RETURN TOK_SCHEDULE TOK_SET %token TOK_STRING TOK_SUBNET TOK_SWITCH TOK_TABLE -%token TOK_TIME TOK_TIMEOUT TOK_TIMER TOK_TYPE TOK_UNION TOK_VECTOR TOK_WHEN +%token TOK_TIME TOK_TIMEOUT TOK_TYPE TOK_VECTOR TOK_WHEN %token TOK_WHILE TOK_AS TOK_IS %token TOK_ATTR_ADD_FUNC TOK_ATTR_DEFAULT TOK_ATTR_OPTIONAL TOK_ATTR_REDEF @@ -58,7 +58,7 @@ %type local_id_list case_type_list %type init_class %type TOK_CONSTANT -%type expr opt_expr init opt_init anonymous_function lambda_body index_slice opt_deprecated when_condition +%type expr opt_expr rhs opt_init anonymous_function lambda_body index_slice opt_deprecated when_condition %type event %type stmt stmt_list func_body for_head %type type opt_type enum_body @@ -67,7 +67,7 @@ %type type_decl formal_args_decl %type type_decl_list formal_args_decl_list %type formal_args -%type expr_list opt_expr_list +%type expr_list opt_expr_list rhs_expr_list %type case %type case_list %type attr @@ -488,7 +488,7 @@ expr: $$ = new AddExpr({AdoptRef{}, $1}, {AdoptRef{}, $3}); } - | expr TOK_ADD_TO expr + | expr TOK_ADD_TO rhs { set_location(@1, @3); $$ = new AddToExpr({AdoptRef{}, $1}, {AdoptRef{}, $3}); @@ -500,7 +500,7 @@ expr: $$ = new SubExpr({AdoptRef{}, $1}, {AdoptRef{}, $3}); } - | expr TOK_REMOVE_FROM expr + | expr TOK_REMOVE_FROM rhs { set_location(@1, @3); $$ = new RemoveFromExpr({AdoptRef{}, $1}, {AdoptRef{}, $3}); @@ -596,7 +596,7 @@ expr: $$ = new CondExpr({AdoptRef{}, $1}, {AdoptRef{}, $3}, {AdoptRef{}, $5}); } - | expr '=' expr + | expr '=' rhs { set_location(@1, @3); @@ -608,7 +608,7 @@ expr: $$ = get_assign_expr({AdoptRef{}, $1}, {AdoptRef{}, $3}, in_init).release(); } - | TOK_LOCAL local_id '=' expr + | TOK_LOCAL local_id '=' rhs { set_location(@2, @4); if ( ! locals_at_this_scope.empty() ) @@ -794,7 +794,6 @@ expr: | anonymous_function - | TOK_SCHEDULE expr '{' event '}' { set_location(@1, @5); @@ -906,6 +905,19 @@ expr: } ; +rhs: '{' { ++in_init; } rhs_expr_list '}' + { + --in_init; + $$ = $3; + } + | expr + ; + +rhs_expr_list: expr_list opt_comma + | + { $$ = new ListExpr(); } + ; + expr_list: expr_list ',' expr { @@ -1023,11 +1035,6 @@ type: $$ = base_type(TYPE_PATTERN)->Ref(); } - | TOK_TIMER { - set_location(@1); - $$ = base_type(TYPE_TIMER)->Ref(); - } - | TOK_PORT { set_location(@1); $$ = base_type(TYPE_PORT)->Ref(); @@ -1070,13 +1077,6 @@ type: $$ = new RecordType($4); } - | TOK_UNION '{' type_list '}' - { - set_location(@1, @4); - reporter->Error("union type not implemented"); - $$ = 0; - } - | TOK_ENUM '{' { set_location(@1); parse_new_enum(); } enum_body '}' { set_location(@1, @5); @@ -1520,20 +1520,12 @@ init_class: ; opt_init: - { ++in_init; } init { --in_init; } + { ++in_init; } rhs { --in_init; } { $$ = $2; } | { $$ = 0; } ; -init: - '{' opt_expr_list '}' - { $$ = $2; } - | '{' expr_list ',' '}' - { $$ = $2; } - | expr - ; - index_slice: expr '[' opt_expr ':' opt_expr ']' { @@ -2076,12 +2068,14 @@ opt_no_test: { $$ = true; } | { $$ = false; } + ; opt_no_test_block: TOK_NO_TEST { $$ = true; script_coverage_mgr.IncIgnoreDepth(); } | { $$ = false; } + ; opt_deprecated: TOK_ATTR_DEPRECATED @@ -2102,6 +2096,11 @@ opt_deprecated: } | { $$ = nullptr; } + ; + +opt_comma: ',' + | + ; %% diff --git a/src/plugin/Component.h b/src/plugin/Component.h index c62ad88cb5..4472bd91c2 100644 --- a/src/plugin/Component.h +++ b/src/plugin/Component.h @@ -101,7 +101,7 @@ public: /** * Returns a textual representation of the component. This goes into - * the output of "bro -NN". + * the output of "zeek -NN". * * By default, this just outputs the type and the name. Derived * versions can override DoDescribe() to add type specific details. diff --git a/src/plugin/Manager.cc b/src/plugin/Manager.cc index 7270191dbf..64cfcbf151 100644 --- a/src/plugin/Manager.cc +++ b/src/plugin/Manager.cc @@ -423,16 +423,26 @@ void Manager::ExtendZeekPathForPlugins() if ( p->DynamicPlugin() || p->Name().empty() ) continue; - string canon = std::regex_replace(p->Name(), std::regex("::"), "_"); - string dir = "builtin-plugins/" + canon; + try + { + string canon = std::regex_replace(p->Name(), std::regex("::"), "_"); + string dir = "builtin-plugins/" + canon; - // Use find_file to find the directory in the path. - string script_dir = util::find_file(dir, util::zeek_path()); - if ( script_dir.empty() || ! util::is_dir(script_dir) ) - continue; + // Use find_file to find the directory in the path. + string script_dir = util::find_file(dir, util::zeek_path()); + if ( script_dir.empty() || ! util::is_dir(script_dir) ) + continue; - DBG_LOG(DBG_PLUGINS, " Adding %s to ZEEKPATH", script_dir.c_str()); - path_additions.push_back(script_dir); + DBG_LOG(DBG_PLUGINS, " Adding %s to ZEEKPATH", script_dir.c_str()); + path_additions.push_back(script_dir); + } + catch ( const std::regex_error& e ) + { + // This really shouldn't ever happen, but we do need to catch the exception. + // Report a fatal error because something is wrong if this occurs. + reporter->FatalError("Failed to replace colons in plugin name %s: %s", + p->Name().c_str(), e.what()); + } } for ( const auto& plugin_path : path_additions ) diff --git a/src/plugin/Manager.h b/src/plugin/Manager.h index f2324c2b44..61a103dd98 100644 --- a/src/plugin/Manager.h +++ b/src/plugin/Manager.h @@ -108,7 +108,7 @@ public: /** * First-stage initializion of the manager. This is called early on - * during Bro's initialization, before any scripts are processed, and + * during Zeek's initialization, before any scripts are processed, and * forwards to the corresponding Plugin methods. */ void InitPreScript(); @@ -121,7 +121,7 @@ public: /** * Third-stage initialization of the manager. This is called late during - * Bro's initialization after any scripts are processed, and forwards to + * Zeek's initialization after any scripts are processed, and forwards to * the corresponding Plugin methods. */ void InitPostScript(); @@ -206,8 +206,8 @@ public: /** * Registers interest in an event by a plugin, even if there's no handler * for it. Normally a plugin receives events through HookQueueEvent() - * only if Bro actually has code to execute for it. By calling this - * method, the plugin tells Bro to raise the event even if there's no + * only if Zeek actually has code to execute for it. By calling this + * method, the plugin tells Zeek to raise the event even if there's no * correspondong handler; it will then go into HookQueueEvent() just as * any other. * @@ -218,7 +218,7 @@ public: void RequestEvent(EventHandlerPtr handler, Plugin* plugin); /** - * Register interest in the destruction of a Obj instance. When Bro's + * Register interest in the destruction of a Obj instance. When Zeek's * reference counting triggers the objects destructor to run, the \a * HookBroObjDtor will be called. * @@ -233,9 +233,9 @@ public: /** * Hook that gives plugins a chance to take over loading an input * file. This method must be called between InitPreScript() and - * InitPostScript() for each input file Bro is about to load, either + * InitPostScript() for each input file Zeek is about to load, either * given on the command line or via @load script directives. The hook can - * take over the file, in which case Bro must not further process it + * take over the file, in which case Zeek must not further process it * otherwise. * * @return 1 if a plugin took over the file and loaded it successfully; 0 @@ -248,10 +248,10 @@ public: /** * Hook that gives plugins a chance to take over loading an input file, * including replacing the file's content. This method must be called - * between InitPreScript() and InitPostScript() for each input file Bro is + * between InitPreScript() and InitPostScript() for each input file Zeek is * about to load, either given on the command line or via @load script - * directives. The hook can take over the file, in which case Bro must not - * further process it otherwise; or provide its content, in which case Bro + * directives. The hook can take over the file, in which case Zeek must not + * further process it otherwise; or provide its content, in which case Zeek * must use that and ignore the original file. * * @return tuple where the first element is 1 if a plugin took over the @@ -397,7 +397,7 @@ public: * * @param conn The associated connection * - * @param addl Additional Bro values; typically will be passed to the event + * @param addl Additional Zeek values; typically will be passed to the event * by the reporter framework. * * @param location True if event expects location information diff --git a/src/plugin/Plugin.cc b/src/plugin/Plugin.cc index c7ef7e4f8f..4e0c1f924c 100644 --- a/src/plugin/Plugin.cc +++ b/src/plugin/Plugin.cc @@ -31,6 +31,7 @@ const char* hook_name(HookType h) "SetupAnalyzerTree", "LogInit", "LogWrite", + "Reporter", "UnprocessedPacket", // MetaHooks "MetaHookPre", diff --git a/src/plugin/Plugin.h b/src/plugin/Plugin.h index 8f2e75e64b..b4d483e178 100644 --- a/src/plugin/Plugin.h +++ b/src/plugin/Plugin.h @@ -283,7 +283,7 @@ public: } /** - * Constructor with a Bro value argument. + * Constructor with a Zeek value argument. */ explicit HookArgument(const Val* a) { @@ -292,7 +292,7 @@ public: } /** - * Constructor with a list of Bro values argument. + * Constructor with a list of Zeek values argument. */ explicit HookArgument(const ValPList* a) { @@ -453,7 +453,7 @@ public: } /** - * Returns the value for a Bro value argument. The argument's type must + * Returns the value for a Zeek value argument. The argument's type must * match accordingly. */ const Val* AsVal() const @@ -463,7 +463,7 @@ public: } /** - * Returns the value for a Bro wrapped value argument. The argument's type must + * Returns the value for a Zeek wrapped value argument. The argument's type must * match accordingly. */ const std::pair AsFuncResult() const @@ -473,7 +473,7 @@ public: } /** - * Returns the value for a Bro frame argument. The argument's type must + * Returns the value for a Zeek frame argument. The argument's type must * match accordingly. */ const zeek::detail::Frame* AsFrame() const @@ -503,7 +503,7 @@ public: } /** - * Returns the value for a list of Bro values argument. The argument's type must + * Returns the value for a list of Zeek values argument. The argument's type must * match accordingly. */ const ValPList* AsValList() const @@ -584,7 +584,7 @@ using HookArgumentList = std::list; /** * Base class for all plugins. * - * Plugins encapsulate functionality that extends one or more of Bro's major + * Plugins encapsulate functionality that extends one or more of Zeek's major * subsystems, such as analysis of a specific protocol, or logging output in * a particular format. A plugin acts a logical container that can provide a * set of functionality. Specifically, it may: @@ -599,7 +599,7 @@ using HookArgumentList = std::list; * they'll be defined in *.bif files, but a plugin can also create them * internally. * - * - Provide hooks (aka callbacks) into Bro's core processing to inject + * - Provide hooks (aka callbacks) into Zeek's core processing to inject * and/or alter functionality. * * A plugin needs to explicitly register all the functionality it provides. @@ -699,7 +699,7 @@ public: * * Note that this method is rarely the right one to use. As it's for * informational purposes only, the plugin still needs to register - * the BiF items themselves with the corresponding Bro parts. Doing + * the BiF items themselves with the corresponding Zeek parts. Doing * so can be tricky, and it's recommned to instead define BiF items * in separate *.bif files that the plugin then pulls in. If defined * there, one does *not* need to call this method. @@ -711,8 +711,8 @@ public: void AddBifItem(const std::string& name, BifItem::Type type); /** - * Adds a file to the list of files that Bro loads at startup. This - * will normally be a Bro script, but it passes through the plugin + * Adds a file to the list of files that Zeek loads at startup. This + * will normally be a Zeek script, but it passes through the plugin * system as well to load files with other extensions as supported by * any of the current plugins. In other words, calling this method is * similar to giving a file on the command line. Note that the file @@ -732,7 +732,7 @@ protected: friend class Manager; /** - * First-stage initialization of the plugin called early during Bro's + * First-stage initialization of the plugin called early during Zeek's * startup, before scripts are parsed. This can be overridden by * derived classes; they must however call the parent's * implementation. @@ -740,7 +740,7 @@ protected: virtual void InitPreScript(); /** - * Second-stage initialization of the plugin called late during Bro's + * Second-stage initialization of the plugin called late during Zeek's * startup, after scripts are parsed. This can be overridden by * derived classes; they must however call the parent's * implementation. @@ -768,12 +768,12 @@ protected: /** * Enables a hook. The corresponding virtual method will now be - * called as Bro's processing proceeds. Note that enabling hooks can - * have performance impact as many trigger frequently inside Bro's + * called as Zeek's processing proceeds. Note that enabling hooks can + * have performance impact as many trigger frequently inside Zeek's * main processing path. * * Note that while hooks may be enabled/disabled dynamically at any - * time, the output of Bro's \c -NN option will only reflect their + * time, the output of Zeek's \c -NN option will only reflect their * state at startup time. Usually one should call this method for a * plugin's hooks in either the plugin's constructor or in * InitPreScript(). @@ -788,7 +788,7 @@ protected: void EnableHook(HookType hook, int priority = 0); /** - * Disables a hook. Bro will no longer call the corresponding virtual + * Disables a hook. Zeek will no longer call the corresponding virtual * method. * * @param hook The hook to disable. @@ -803,9 +803,9 @@ protected: /** * Registers interest in an event, even if there's no handler for it. - * Normally a plugin receives events through HookQueueEvent() only if Bro + * Normally a plugin receives events through HookQueueEvent() only if Zeek * actually has code to execute for it. By calling this method, the - * plugin tells Bro to raise the event even if there's no correspondong + * plugin tells Zeek to raise the event even if there's no correspondong * handler; it will then go into HookQueueEvent() just as any other. * * @param handler The event handler being interested in. @@ -814,7 +814,7 @@ protected: /** * Registers interest in the destruction of a Obj instance. When - * Bro's reference counting triggers the objects destructor to run, + * Zeek's reference counting triggers the objects destructor to run, * \a HookBroObjDtor will be called. * * Note that his can get expensive if triggered for many objects. @@ -828,10 +828,10 @@ protected: /** * Hook into loading input files. This method will be called between * InitPreScript() and InitPostScript(), but with no further order or - * timing guaranteed. It will be called once for each input file Bro + * timing guaranteed. It will be called once for each input file Zeek * is about to load, either given on the command line or via @load * script directives. The hook can take over the file, in which case - * Bro will not further process it otherwise. + * Zeek will not further process it otherwise. * * @param type The type of load encountered: script load, signatures load, * or plugin load. @@ -839,13 +839,13 @@ protected: * @param file The filename that was passed to @load. Only includes * an extension if it was given in @load. * - * @param resolved The file or directory name Bro resolved from + * @param resolved The file or directory name Zeek resolved from * the given path and is going to load. Empty string - * if Bro was not able to resolve a path. + * if Zeek was not able to resolve a path. * * @return 1 if the plugin took over the file and loaded it * successfully; 0 if the plugin took over the file but had trouble - * loading it (Bro will abort in this case, and the plugin should + * loading it (Zeek will abort in this case, and the plugin should * have printed an error message); and -1 if the plugin wasn't * interested in the file at all. */ @@ -856,10 +856,10 @@ protected: * Hook into loading input files, with extended capabilities. This method * will be called between InitPreScript() and InitPostScript(), but with no * further order or timing guaranteed. It will be called once for each - * input file Bro is about to load, either given on the command line or via + * input file Zeek is about to load, either given on the command line or via * @load script directives. The hook can take over the file, in which case - * Bro will not further process it otherwise. It can, alternatively, also - * provide the file content as a string, which Bro will then process just + * Zeek will not further process it otherwise. It can, alternatively, also + * provide the file content as a string, which Zeek will then process just * as if it had read it from a file. * * @param type The type of load encountered: script load, signatures load, @@ -868,21 +868,21 @@ protected: * @param file The filename that was passed to @load. Only includes * an extension if it was given in @load. * - * @param resolved The file or directory name Bro resolved from + * @param resolved The file or directory name Zeek resolved from * the given path and is going to load. Empty string - * if Bro was not able to resolve a path. + * if Zeek was not able to resolve a path. * * @return tuple of an integer and an optional string, where: the integer * must be 1 if the plugin takes over loading the file (see below); 0 if * the plugin wanted to take over the file but had trouble loading it * (processing will abort in this case, and the plugin should have printed - * an error message); and -1 if the plugin wants Bro to proceeed processing + * an error message); and -1 if the plugin wants Zeek to proceeed processing * the file normally. If the plugins takes over by returning 1, there are * two cases: if the second tuple element remains unset, the plugin handled - * the loading completely internally; Bro will not do anything further with + * the loading completely internally; Zeek will not do anything further with * it. Alternatively, the plugin may optionally return the acutal content * to use for the file as a string through the tuple's second element. If - * so, Bro will ignore the file on disk and use that provided content + * so, Zeek will ignore the file on disk and use that provided content * instead (including when there's actually no physical file in place on * disk at all, and loading would have hence failed otherwise). */ @@ -959,7 +959,7 @@ protected: /** * Hook for destruction of objects registered with - * RequestBroObjDtor(). When Bro's reference counting triggers the + * RequestBroObjDtor(). When Zeek's reference counting triggers the * objects destructor to run, this method will be run. It may also * run for other objects that this plugin has not registered for. * @@ -1041,7 +1041,7 @@ protected: * * @param conn The associated connection * - * @param addl Additional Bro values; typically will be passed to the event + * @param addl Additional Zeek values; typically will be passed to the event * by the reporter framework. * * @param location True if event expects location information diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc index 2f2175a594..d724a54e7f 100644 --- a/src/probabilistic/BitVector.cc +++ b/src/probabilistic/BitVector.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "zeek/digest.h" @@ -512,12 +513,12 @@ broker::expected BitVector::Serialize() const std::unique_ptr BitVector::Unserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() >= 2) ) return nullptr; - auto num_bits = caf::get_if(&(*v)[0]); - auto size = caf::get_if(&(*v)[1]); + auto num_bits = broker::get_if(&(*v)[0]); + auto size = broker::get_if(&(*v)[1]); if ( ! (num_bits && size) ) return nullptr; @@ -530,7 +531,7 @@ std::unique_ptr BitVector::Unserialize(const broker::data& data) for ( size_t i = 0; i < *size; ++i ) { - auto x = caf::get_if(&(*v)[2 + i]); + auto x = broker::get_if(&(*v)[2 + i]); if ( ! x ) return nullptr; diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index 6211ef49fb..56dc10603c 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -46,12 +46,12 @@ broker::expected BloomFilter::Serialize() const std::unique_ptr BloomFilter::Unserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() == 3) ) return nullptr; - auto type = caf::get_if(&(*v)[0]); + auto type = broker::get_if(&(*v)[0]); if ( ! type ) return nullptr; diff --git a/src/probabilistic/CardinalityCounter.cc b/src/probabilistic/CardinalityCounter.cc index 0e9365b4d3..b203fa6642 100644 --- a/src/probabilistic/CardinalityCounter.cc +++ b/src/probabilistic/CardinalityCounter.cc @@ -212,13 +212,13 @@ broker::expected CardinalityCounter::Serialize() const std::unique_ptr CardinalityCounter::Unserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() >= 3) ) return nullptr; - auto m = caf::get_if(&(*v)[0]); - auto V = caf::get_if(&(*v)[1]); - auto alpha_m = caf::get_if(&(*v)[2]); + auto m = broker::get_if(&(*v)[0]); + auto V = broker::get_if(&(*v)[1]); + auto alpha_m = broker::get_if(&(*v)[2]); if ( ! (m && V && alpha_m) ) return nullptr; @@ -233,7 +233,7 @@ std::unique_ptr CardinalityCounter::Unserialize(const broker for ( size_t i = 0; i < *m; ++i ) { - auto x = caf::get_if(&(*v)[3 + i]); + auto x = broker::get_if(&(*v)[3 + i]); if ( ! x ) return nullptr; diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index c4912b425e..a9a9a287a1 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -187,11 +187,11 @@ broker::expected CounterVector::Serialize() const std::unique_ptr CounterVector::Unserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() >= 2) ) return nullptr; - auto width = caf::get_if(&(*v)[0]); + auto width = broker::get_if(&(*v)[0]); auto bits = BitVector::Unserialize((*v)[1]); if ( ! (width && bits) ) diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h index c6f61b4f11..c70a509edd 100644 --- a/src/probabilistic/CounterVector.h +++ b/src/probabilistic/CounterVector.h @@ -158,13 +158,13 @@ public: protected: friend CounterVector operator|(const CounterVector& x, const CounterVector& y); - CounterVector() { } + CounterVector() = default; private: CounterVector& operator=(const CounterVector&); // Disable. - BitVector* bits; - size_t width; + BitVector* bits = nullptr; + size_t width = 0; }; } // namespace zeek::probabilistic::detail diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index 9935f31896..5b20222d10 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -60,15 +60,15 @@ broker::expected Hasher::Serialize() const std::unique_ptr Hasher::Unserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() == 4) ) return nullptr; - auto type = caf::get_if(&(*v)[0]); - auto k = caf::get_if(&(*v)[1]); - auto h1 = caf::get_if(&(*v)[2]); - auto h2 = caf::get_if(&(*v)[3]); + auto type = broker::get_if(&(*v)[0]); + auto k = broker::get_if(&(*v)[1]); + auto h1 = broker::get_if(&(*v)[2]); + auto h2 = broker::get_if(&(*v)[3]); if ( ! (type && k && h1 && h2) ) return nullptr; diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index 70a3ca6630..c98b630d95 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -49,7 +49,7 @@ public: * @param data A pointer to contiguous data that should be crunched into a * seed. If 0, the function tries to find a global_hash_seed script variable * to derive a seed from. If this variable does not exist, the function uses - * the initial seed generated at Bro startup. + * the initial seed generated at Zeek startup. * * @param size The number of bytes of *data*. * @@ -148,7 +148,7 @@ public: /** * Constructs an hash function seeded with a given seed and an - * optional extra seed to replace the initial Bro seed. + * optional extra seed to replace the initial Zeek seed. * * @param arg_seed The seed to use for this instance. */ diff --git a/src/probabilistic/Topk.cc b/src/probabilistic/Topk.cc index 481499630e..bc2232335d 100644 --- a/src/probabilistic/Topk.cc +++ b/src/probabilistic/Topk.cc @@ -447,14 +447,14 @@ broker::expected TopkVal::DoSerialize() const bool TopkVal::DoUnserialize(const broker::data& data) { - auto v = caf::get_if(&data); + auto v = broker::get_if(&data); if ( ! (v && v->size() >= 4) ) return false; - auto size_ = caf::get_if(&(*v)[0]); - auto numElements_ = caf::get_if(&(*v)[1]); - auto pruned_ = caf::get_if(&(*v)[2]); + auto size_ = broker::get_if(&(*v)[0]); + auto numElements_ = broker::get_if(&(*v)[1]); + auto pruned_ = broker::get_if(&(*v)[2]); if ( ! (size_ && numElements_ && pruned_) ) return false; @@ -463,7 +463,7 @@ bool TopkVal::DoUnserialize(const broker::data& data) numElements = *numElements_; pruned = *pruned_; - auto no_type = caf::get_if(&(*v)[3]); + auto no_type = broker::get_if(&(*v)[3]); if ( ! no_type ) { auto t = UnserializeType((*v)[3]); @@ -479,8 +479,8 @@ bool TopkVal::DoUnserialize(const broker::data& data) while ( i < numElements ) { - auto elements_count = caf::get_if(&(*v)[idx++]); - auto count = caf::get_if(&(*v)[idx++]); + auto elements_count = broker::get_if(&(*v)[idx++]); + auto count = broker::get_if(&(*v)[idx++]); if ( ! (elements_count && count) ) return false; @@ -491,7 +491,7 @@ bool TopkVal::DoUnserialize(const broker::data& data) for ( uint64_t j = 0; j < *elements_count; j++ ) { - auto epsilon = caf::get_if(&(*v)[idx++]); + auto epsilon = broker::get_if(&(*v)[idx++]); auto val = Broker::detail::data_to_val((*v)[idx++], type.get()); if ( ! (epsilon && val) ) diff --git a/src/probabilistic/Topk.h b/src/probabilistic/Topk.h index d449879f70..10238bef98 100644 --- a/src/probabilistic/Topk.h +++ b/src/probabilistic/Topk.h @@ -58,7 +58,7 @@ public: /** * Call this when a new value is encountered. Note that on the first - * call, the Bro type of the value types that are counted is set. All + * call, the Zeek type of the value types that are counted is set. All * following calls to encountered have to specify the same type. * * @param value The encountered element @@ -81,7 +81,7 @@ public: * certain val. Returns 0 if the val is unknown (and logs the error * to reporter). * - * @param value Bro value to get counts for + * @param value Zeek value to get counts for * * @returns internal count for val, 0 if unknown */ @@ -91,7 +91,7 @@ public: * Get the current epsilon tracked in the top-k data structure for a * certain val. * - * @param value Bro value to get epsilons for + * @param value Zeek value to get epsilons for * * @returns the epsilon. Returns 0 if the val is unknown (and logs * the error to reporter) @@ -171,12 +171,12 @@ private: void Typify(TypePtr t); TypePtr type; - zeek::detail::CompositeHash* hash; + zeek::detail::CompositeHash* hash = nullptr; std::list buckets; - PDict* elementDict; - uint64_t size; // how many elements are we tracking? - uint64_t numElements; // how many elements do we have at the moment - bool pruned; // was this data structure pruned? + PDict* elementDict = nullptr; + uint64_t size = 0; // how many elements are we tracking? + uint64_t numElements = 0; // how many elements do we have at the moment + bool pruned = false; // was this data structure pruned? }; } // namespace zeek::probabilistic::detail diff --git a/src/re-scan.l b/src/re-scan.l index 9e0397e231..d7d3c1eb05 100644 --- a/src/re-scan.l +++ b/src/re-scan.l @@ -1,4 +1,4 @@ -/* scan.l - scanner for Bro regular expressions */ +/* scan.l - scanner for Zeek regular expressions */ /* * See the file "COPYING" in the main distribution directory for copyright. diff --git a/src/rule-scan.l b/src/rule-scan.l index 3d9bc6da84..128292043d 100644 --- a/src/rule-scan.l +++ b/src/rule-scan.l @@ -218,7 +218,7 @@ finger { rules_lval.val = zeek::detail::Rule::FINGER; return TOK_PATTERN_TYPE; } %% -// We're about to parse a Bro policy-layer symbol. +// We're about to parse a Zeek policy-layer symbol. void begin_PS() { BEGIN(PS); diff --git a/src/scan.l b/src/scan.l index 4012d60645..52d2896692 100644 --- a/src/scan.l +++ b/src/scan.l @@ -297,9 +297,7 @@ switch return TOK_SWITCH; table return TOK_TABLE; time return TOK_TIME; timeout return TOK_TIMEOUT; -timer return TOK_TIMER; type return TOK_TYPE; -union return TOK_UNION; vector return TOK_VECTOR; when return TOK_WHEN; @@ -376,7 +374,7 @@ when return TOK_WHEN; @load-sigs{WS}{FILE} { const char* file = zeek::util::skip_whitespace(yytext + 10); std::string path = find_relative_file(file, ".sig"); - sig_files.emplace_back(file, path); + sig_files.emplace_back(file, path, GetCurrentLocation()); } @load-plugin{WS}{ID} { @@ -588,6 +586,11 @@ YYLTYPE zeek::detail::GetCurrentLocation() return currloc; } +void zeek::detail::SetCurrentLocation(YYLTYPE currloc) { + ::filename = currloc.filename; + line_number = currloc.first_line; +} + static int load_files(const char* orig_file) { std::string file_path = find_relative_script_file(orig_file); diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 6aad9eb1e9..447427204e 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -792,6 +792,7 @@ private: std::string GenIndexExpr(const Expr* e, GenType gt); std::string GenAssignExpr(const Expr* e, GenType gt, bool top_level); std::string GenAddToExpr(const Expr* e, GenType gt, bool top_level); + std::string GenRemoveFromExpr(const Expr* e, GenType gt, bool top_level); std::string GenSizeExpr(const Expr* e, GenType gt); std::string GenScheduleExpr(const Expr* e); std::string GenLambdaExpr(const Expr* e); diff --git a/src/script_opt/CPP/Exprs.cc b/src/script_opt/CPP/Exprs.cc index a9c41a8377..fed8ddcbe9 100644 --- a/src/script_opt/CPP/Exprs.cc +++ b/src/script_opt/CPP/Exprs.cc @@ -77,8 +77,6 @@ string CPPCompile::GenExpr(const Expr* e, GenType gt, bool top_level) return GenBinary(e, gt, "+", "add"); case EXPR_SUB: return GenBinary(e, gt, "-", "sub"); - case EXPR_REMOVE_FROM: - return GenBinary(e, gt, "-="); case EXPR_TIMES: return GenBinary(e, gt, "*", "mul"); case EXPR_DIVIDE: @@ -127,6 +125,8 @@ string CPPCompile::GenExpr(const Expr* e, GenType gt, bool top_level) return GenAssignExpr(e, gt, top_level); case EXPR_ADD_TO: return GenAddToExpr(e, gt, top_level); + case EXPR_REMOVE_FROM: + return GenRemoveFromExpr(e, gt, top_level); case EXPR_REF: return GenExpr(e->GetOp1(), gt); case EXPR_SIZE: @@ -476,16 +476,34 @@ string CPPCompile::GenAssignExpr(const Expr* e, GenType gt, bool top_level) string CPPCompile::GenAddToExpr(const Expr* e, GenType gt, bool top_level) { const auto& t = e->GetType(); + auto lhs = e->GetOp1(); + auto rhs = e->GetOp2(); + + std::string add_to_func; if ( t->Tag() == TYPE_VECTOR ) { - auto gen = string("vector_append__CPP(") + GenExpr(e->GetOp1(), GEN_VAL_PTR) + ", " + - GenExpr(e->GetOp2(), GEN_VAL_PTR) + ")"; + if ( same_type(lhs->GetType(), rhs->GetType()) ) + add_to_func = "vector_vec_append__CPP"; + else + add_to_func = "vector_append__CPP"; + } + + else if ( t->Tag() == TYPE_PATTERN ) + add_to_func = "re_append__CPP"; + + else if ( t->Tag() == TYPE_TABLE ) + add_to_func = "table_append__CPP"; + + if ( ! add_to_func.empty() ) + { + auto gen = add_to_func + "(" + GenExpr(lhs, GEN_VAL_PTR) + ", " + + GenExpr(rhs, GEN_VAL_PTR) + ")"; return GenericValPtrToGT(gen, t, gt); } - // Second GetOp1 is because for non-vectors, LHS will be a RefExpr. - auto lhs = e->GetOp1()->GetOp1(); + // Second GetOp1 is because if we get this far, LHS will be a RefExpr. + lhs = lhs->GetOp1(); if ( t->Tag() == TYPE_STRING ) { @@ -499,7 +517,7 @@ string CPPCompile::GenAddToExpr(const Expr* e, GenType gt, bool top_level) { // LHS is a compound, or a global (and thus doesn't // equate to a C++ variable); expand x += y to x = x + y - auto rhs = make_intrusive(lhs, e->GetOp2()); + rhs = make_intrusive(lhs, rhs); auto assign = make_intrusive(lhs, rhs, false, nullptr, nullptr, false); // Make sure any newly created types are known to @@ -513,6 +531,40 @@ string CPPCompile::GenAddToExpr(const Expr* e, GenType gt, bool top_level) return GenBinary(e, gt, "+="); } +string CPPCompile::GenRemoveFromExpr(const Expr* e, GenType gt, bool top_level) + { + const auto& t = e->GetType(); + auto lhs = e->GetOp1(); + auto rhs = e->GetOp2(); + + if ( t->Tag() == TYPE_TABLE && same_type(lhs->GetType(), rhs->GetType()) ) + { + auto gen = std::string("table_remove_from__CPP(") + GenExpr(lhs, GEN_VAL_PTR) + ", " + + GenExpr(rhs, GEN_VAL_PTR) + ")"; + return GenericValPtrToGT(gen, t, gt); + } + + // Second GetOp1 is because if we get this far, LHS will be a RefExpr. + lhs = lhs->GetOp1(); + + if ( lhs->Tag() != EXPR_NAME || lhs->AsNameExpr()->Id()->IsGlobal() ) + { + // LHS is a compound, or a global (and thus doesn't + // equate to a C++ variable); expand x -= y to x = x - y + rhs = make_intrusive(lhs, rhs); + auto assign = make_intrusive(lhs, rhs, false, nullptr, nullptr, false); + + // Make sure any newly created types are known to + // the profiler. + (void)pfs.HashType(rhs->GetType()); + (void)pfs.HashType(assign->GetType()); + + return GenExpr(assign, gt, top_level); + } + + return GenBinary(e, gt, "-="); + } + string CPPCompile::GenSizeExpr(const Expr* e, GenType gt) { const auto& t = e->GetType(); diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index 5286540997..ef30da2242 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -279,7 +279,6 @@ void CPP_TypeInits::Generate(InitsManager* im, vector& ivec, int offset case TYPE_PORT: case TYPE_STRING: case TYPE_TIME: - case TYPE_TIMER: case TYPE_VOID: case TYPE_SUBNET: case TYPE_FILE: diff --git a/src/script_opt/CPP/RuntimeOps.h b/src/script_opt/CPP/RuntimeOps.h index c0ea9231f9..02df049eb9 100644 --- a/src/script_opt/CPP/RuntimeOps.h +++ b/src/script_opt/CPP/RuntimeOps.h @@ -113,6 +113,21 @@ inline TableValPtr table_coerce__CPP(const ValPtr& v, const TypePtr& t) return make_intrusive(cast_intrusive(t), tv->GetAttrs()); } +// For tables, executes t1 += t2. +inline TableValPtr table_append__CPP(const TableValPtr& t1, const TableValPtr& t2) + { + t2->AddTo(t1.get(), false); + return t1; + } + +// For tables, executes t1 -= t2. +inline TableValPtr table_remove_from__CPP(const TableValPtr& t1, const TableValPtr& t2) + { + if ( t2->Size() > 0 ) + t2->RemoveFrom(t1.get()); + return t1; + } + // The same, for an empty record. inline VectorValPtr vector_coerce__CPP(const ValPtr& v, const TypePtr& t) { @@ -147,6 +162,13 @@ extern RecordValPtr record_constructor_map__CPP(std::vector vals, std::v // Constructs a vector of the given type, populated with the given values. extern VectorValPtr vector_constructor__CPP(std::vector vals, VectorTypePtr t); +// For patterns, executes p1 += p2. +inline PatternValPtr re_append__CPP(const PatternValPtr& p1, const PatternValPtr& p2) + { + p2->AddTo(p1.get(), false); + return p1; + } + // Schedules an event to occur at the given absolute time, parameterized // with the given set of values. A separate function to facilitate avoiding // the scheduling if Zeek is terminating. diff --git a/src/script_opt/CPP/RuntimeVec.h b/src/script_opt/CPP/RuntimeVec.h index 2dfe1374c3..41be84c55f 100644 --- a/src/script_opt/CPP/RuntimeVec.h +++ b/src/script_opt/CPP/RuntimeVec.h @@ -15,10 +15,17 @@ namespace zeek::detail // Appends v2 to the vector v1. A separate function because of the // need to support assignment cascades. -inline ValPtr vector_append__CPP(VectorValPtr v1, ValPtr v2) +inline ValPtr vector_append__CPP(VectorValPtr v1, const ValPtr& v2) { v1->Assign(v1->Size(), v2); - return v2; + return v1; + } + +// Appends vector v2 to the vector v1. +inline ValPtr vector_vec_append__CPP(VectorValPtr v1, const VectorValPtr& v2) + { + v2->AddTo(v1.get(), false); + return v1; } // Unary vector operations. diff --git a/src/script_opt/CPP/Types.cc b/src/script_opt/CPP/Types.cc index 9a482d072e..79d667e988 100644 --- a/src/script_opt/CPP/Types.cc +++ b/src/script_opt/CPP/Types.cc @@ -141,8 +141,6 @@ const char* CPPCompile::TypeTagName(TypeTag tag) return "TYPE_TABLE"; case TYPE_TIME: return "TYPE_TIME"; - case TYPE_TIMER: - return "TYPE_TIMER"; case TYPE_TYPE: return "TYPE_TYPE"; case TYPE_VECTOR: @@ -302,7 +300,6 @@ shared_ptr CPPCompile::RegisterType(const TypePtr& tp) case TYPE_PORT: case TYPE_STRING: case TYPE_TIME: - case TYPE_TIMER: case TYPE_VOID: case TYPE_SUBNET: case TYPE_FILE: diff --git a/src/script_opt/Expr.cc b/src/script_opt/Expr.cc index b4db48a6ec..8cc45b654f 100644 --- a/src/script_opt/Expr.cc +++ b/src/script_opt/Expr.cc @@ -881,40 +881,77 @@ ExprPtr AddToExpr::Duplicate() return SetSucc(new AddToExpr(op1_d, op2_d)); } +bool AddToExpr::IsReduced(Reducer* c) const + { + auto t = op1->GetType(); + auto tag = t->Tag(); + + if ( tag == TYPE_PATTERN ) + return op1->HasReducedOps(c) && op2->IsReduced(c); + + if ( tag == TYPE_TABLE ) + return op1->IsReduced(c) && op2->IsReduced(c); + + if ( tag == TYPE_VECTOR && same_type(t, op2->GetType()) ) + return op1->IsReduced(c) && op2->IsReduced(c); + + return NonReduced(this); + } + ExprPtr AddToExpr::Reduce(Reducer* c, StmtPtr& red_stmt) { - if ( IsVector(op1->GetType()->Tag()) ) + auto tag = op1->GetType()->Tag(); + + switch ( tag ) { - StmtPtr red_stmt1; - StmtPtr red_stmt2; + case TYPE_PATTERN: + case TYPE_TABLE: + case TYPE_VECTOR: + { + StmtPtr red_stmt1; + StmtPtr red_stmt2; - if ( op1->Tag() == EXPR_FIELD ) - red_stmt1 = op1->ReduceToSingletons(c); - else - op1 = op1->Reduce(c, red_stmt1); + if ( tag == TYPE_PATTERN && op1->Tag() == EXPR_FIELD ) + red_stmt1 = op1->ReduceToSingletons(c); + else + op1 = op1->Reduce(c, red_stmt1); - op2 = op2->Reduce(c, red_stmt2); + auto& t = op1->GetType(); + op2 = op2->Reduce(c, red_stmt2); - auto append = make_intrusive(op1->Duplicate(), op2); - append->SetOriginal(ThisPtr()); + red_stmt = MergeStmts(red_stmt1, red_stmt2); - auto append_stmt = make_intrusive(append); + if ( tag == TYPE_VECTOR && ! same_type(t, op2->GetType()) ) + { + auto append = make_intrusive(op1->Duplicate(), op2); + append->SetOriginal(ThisPtr()); - red_stmt = MergeStmts(red_stmt1, red_stmt2, append_stmt); + auto append_stmt = make_intrusive(append); - return op1; + red_stmt = MergeStmts(red_stmt, append_stmt); + + return op1; + } + + return ThisPtr(); + } + + default: + { + auto rhs = op1->AsRefExprPtr()->GetOp1(); + auto do_incr = make_intrusive(rhs->Duplicate(), op2); + auto assign = make_intrusive(op1, do_incr, false, nullptr, nullptr, false); + + return assign->ReduceToSingleton(c, red_stmt); + } } + } - else - { - // We could do an ASSERT that op1 is an EXPR_REF, but - // the following is basically equivalent. - auto rhs = op1->AsRefExprPtr()->GetOp1(); - auto do_incr = make_intrusive(rhs->Duplicate(), op2); - auto assign = make_intrusive(op1, do_incr, false, nullptr, nullptr, false); - - return assign->ReduceToSingleton(c, red_stmt); - } +ExprPtr AddToExpr::ReduceToSingleton(Reducer* c, StmtPtr& red_stmt) + { + auto at_stmt = make_intrusive(Duplicate()); + red_stmt = at_stmt->Reduce(c); + return op1; } ExprPtr SubExpr::Duplicate() @@ -972,15 +1009,43 @@ ExprPtr RemoveFromExpr::Duplicate() return SetSucc(new RemoveFromExpr(op1_d, op2_d)); } +bool RemoveFromExpr::IsReduced(Reducer* c) const + { + if ( op1->GetType()->Tag() == TYPE_TABLE ) + return op1->IsReduced(c) && op2->IsReduced(c); + + return NonReduced(this); + } + ExprPtr RemoveFromExpr::Reduce(Reducer* c, StmtPtr& red_stmt) { - auto rhs = op1->AsRefExprPtr()->GetOp1(); - auto do_decr = make_intrusive(rhs->Duplicate(), op2); + if ( op1->GetType()->Tag() == TYPE_TABLE ) + { + StmtPtr red_stmt1; + StmtPtr red_stmt2; + + op1 = op1->Reduce(c, red_stmt1); + op2 = op2->Reduce(c, red_stmt2); + + red_stmt = MergeStmts(red_stmt1, red_stmt2); + + return ThisPtr(); + } + + auto lhs = op1->AsRefExprPtr()->GetOp1(); + auto do_decr = make_intrusive(lhs->Duplicate(), op2); auto assign = make_intrusive(op1, do_decr, false, nullptr, nullptr, false); return assign->Reduce(c, red_stmt); } +ExprPtr RemoveFromExpr::ReduceToSingleton(Reducer* c, StmtPtr& red_stmt) + { + auto rf_stmt = make_intrusive(Duplicate()); + red_stmt = rf_stmt->Reduce(c); + return op1; + } + ExprPtr TimesExpr::Duplicate() { auto op1_d = op1->Duplicate(); @@ -1858,6 +1923,12 @@ ExprPtr RecordConstructorExpr::Duplicate() return SetSucc(new RecordConstructorExpr(op_l)); } +ExprPtr RecordConstructorExpr::Inline(Inliner* inl) + { + op = op->Inline(inl)->AsListExprPtr(); + return ThisPtr(); + } + bool RecordConstructorExpr::HasReducedOps(Reducer* c) const { auto& exprs = op->AsListExpr()->Exprs(); @@ -2670,6 +2741,13 @@ ExprPtr AppendToExpr::Reduce(Reducer* c, StmtPtr& red_stmt) return ThisPtr(); } +ExprPtr AppendToExpr::ReduceToSingleton(Reducer* c, StmtPtr& red_stmt) + { + auto at_stmt = make_intrusive(Duplicate()); + red_stmt = at_stmt->Reduce(c); + return op1->AsRefExprPtr()->GetOp1(); + } + IndexAssignExpr::IndexAssignExpr(ExprPtr arg_op1, ExprPtr arg_op2, ExprPtr arg_op3) : BinaryExpr(EXPR_INDEX_ASSIGN, std::move(arg_op1), std::move(arg_op2)) { diff --git a/src/script_opt/Inline.cc b/src/script_opt/Inline.cc index 15e1c81391..f9b3e12fd5 100644 --- a/src/script_opt/Inline.cc +++ b/src/script_opt/Inline.cc @@ -125,23 +125,8 @@ void Inliner::Analyze() } for ( auto& f : funcs ) - { - const auto& func_ptr = f.FuncPtr(); - const auto& func = func_ptr.get(); - const auto& body = f.Body(); - - // Processing optimization: only spend time trying to inline f - // if we haven't marked it as inlineable. This trades off a - // bunch of compilation load (inlining every single function, - // even though almost none will be called directly) for a - // modest gain of having compiled code for those rare - // circumstances in which a Zeek function can be called - // not ultimately stemming from an event (such as global - // scripting, or expiration functions). - - if ( should_analyze(func_ptr, body) && inline_ables.count(func) == 0 ) + if ( should_analyze(f.FuncPtr(), f.Body()) ) InlineFunction(&f); - } } void Inliner::InlineFunction(FuncInfo* f) diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 3315fa1fc4..1b8c31b019 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -576,8 +576,6 @@ void ProfileFuncs::TraverseValue(const ValPtr& v) case TYPE_STRING: case TYPE_SUBNET: case TYPE_TIME: - case TYPE_TIMER: - case TYPE_UNION: case TYPE_VOID: break; @@ -777,8 +775,6 @@ p_hash_type ProfileFuncs::HashType(const Type* t) case TYPE_STRING: case TYPE_SUBNET: case TYPE_TIME: - case TYPE_TIMER: - case TYPE_UNION: case TYPE_VOID: h = merge_p_hashes(h, p_hash(t)); break; diff --git a/src/script_opt/ScriptOpt.cc b/src/script_opt/ScriptOpt.cc index 05ef655016..347b0d3348 100644 --- a/src/script_opt/ScriptOpt.cc +++ b/src/script_opt/ScriptOpt.cc @@ -286,9 +286,6 @@ static void init_options() if ( analysis_options.use_CPP && generating_CPP ) reporter->FatalError("generating C++ incompatible with using C++"); - if ( analysis_options.use_CPP && ! CPP_init_hook ) - reporter->FatalError("no C++ functions available to use"); - auto usage = getenv("ZEEK_USAGE_ISSUES"); if ( usage ) @@ -336,10 +333,7 @@ static void init_options() static void report_CPP() { if ( ! CPP_init_hook ) - { - printf("no C++ script bodies available\n"); - exit(0); - } + reporter->FatalError("no C++ script bodies available"); printf("C++ script bodies available that match loaded scripts:\n"); @@ -373,6 +367,9 @@ static void report_CPP() static void use_CPP() { + if ( ! CPP_init_hook ) + reporter->FatalError("no C++ functions available to use"); + for ( auto& f : funcs ) { auto hash = f.Profile()->HashVal(); @@ -511,6 +508,8 @@ static void analyze_scripts_for_ZAM(std::unique_ptr& pfs) if ( ! did_one ) reporter->FatalError("no matching functions/files for -O ZAM"); + + finalize_functions(funcs); } void analyze_scripts() @@ -553,7 +552,14 @@ void analyze_scripts() auto pfs = std::make_unique(funcs, is_CPP_compilable, false); if ( CPP_init_hook ) + { (*CPP_init_hook)(); + if ( compiled_scripts.empty() ) + // The initialization failed to produce any + // script bodies. Make this easily available + // to subsequent checks. + CPP_init_hook = nullptr; + } if ( analysis_options.report_CPP ) { @@ -578,4 +584,23 @@ void analyze_scripts() analyze_scripts_for_ZAM(pfs); } +void profile_script_execution() + { + if ( analysis_options.profile_ZAM ) + { + report_ZOP_profile(); + + for ( auto& f : funcs ) + { + if ( f.Body()->Tag() == STMT_ZAM ) + cast_intrusive(f.Body())->ProfileExecution(); + } + } + } + +void finish_script_execution() + { + profile_script_execution(); + } + } // namespace zeek::detail diff --git a/src/script_opt/ScriptOpt.h b/src/script_opt/ScriptOpt.h index dba9a41187..95d7889d7e 100644 --- a/src/script_opt/ScriptOpt.h +++ b/src/script_opt/ScriptOpt.h @@ -185,6 +185,9 @@ extern bool should_analyze(const ScriptFuncPtr& f, const StmtPtr& body); // Analyze all of the parsed scripts collectively for optimization. extern void analyze_scripts(); +// Called when Zeek is terminating. +extern void finish_script_execution(); + // Used for C++-compiled scripts to signal their presence, by setting this // to a non-empty value. extern void (*CPP_init_hook)(); diff --git a/src/script_opt/Stmt.cc b/src/script_opt/Stmt.cc index 27a55c5658..8251e05c71 100644 --- a/src/script_opt/Stmt.cc +++ b/src/script_opt/Stmt.cc @@ -161,7 +161,8 @@ StmtPtr ExprStmt::DoReduce(Reducer* c) return TransformMe(make_intrusive(), c); if ( (t == EXPR_ASSIGN || t == EXPR_CALL || t == EXPR_INDEX_ASSIGN || - t == EXPR_FIELD_LHS_ASSIGN || t == EXPR_APPEND_TO) && + t == EXPR_FIELD_LHS_ASSIGN || t == EXPR_APPEND_TO || t == EXPR_ADD_TO || + t == EXPR_REMOVE_FROM) && e->IsReduced(c) ) return ThisPtr(); @@ -728,7 +729,7 @@ bool StmtList::IsReduced(Reducer* c) const StmtPtr StmtList::DoReduce(Reducer* c) { - StmtPList* f_stmts = new StmtPList; + StmtPList* f_stmts = new StmtPList{}; bool did_change = false; int n = Stmts().length(); @@ -749,7 +750,10 @@ StmtPtr StmtList::DoReduce(Reducer* c) } if ( f_stmts->length() == 0 ) + { + delete f_stmts; return TransformMe(make_intrusive(), c); + } if ( f_stmts->length() == 1 ) return (*f_stmts)[0]->Reduce(c); diff --git a/src/script_opt/UseDefs.cc b/src/script_opt/UseDefs.cc index de204cc558..21ebb59548 100644 --- a/src/script_opt/UseDefs.cc +++ b/src/script_opt/UseDefs.cc @@ -466,6 +466,15 @@ UDs UseDefs::ExprUDs(const Expr* e) AddInExprUDs(uds, e->GetOp1()->AsRefExprPtr()->GetOp1().get()); break; + case EXPR_ADD_TO: + case EXPR_REMOVE_FROM: + { + AddInExprUDs(uds, e->GetOp1().get()); + auto rhs_UDs = ExprUDs(e->GetOp2().get()); + uds = UD_Union(uds, rhs_UDs); + break; + } + case EXPR_RECORD_CONSTRUCTOR: { auto r = static_cast(e); @@ -527,6 +536,10 @@ void UseDefs::AddInExprUDs(UDs uds, const Expr* e) { switch ( e->Tag() ) { + case EXPR_REF: + AddInExprUDs(uds, e->GetOp1().get()); + break; + case EXPR_NAME: AddID(uds, e->AsNameExpr()->Id()); break; @@ -548,8 +561,9 @@ void UseDefs::AddInExprUDs(UDs uds, const Expr* e) AddInExprUDs(uds, e->GetOp1()->AsRefExprPtr()->GetOp1().get()); break; - case EXPR_ASSIGN: - // These occur inside of table constructors. + case EXPR_ASSIGN: // can occur inside a table constructor + case EXPR_ADD_TO: + case EXPR_REMOVE_FROM: AddInExprUDs(uds, e->GetOp1().get()); AddInExprUDs(uds, e->GetOp2().get()); break; diff --git a/src/script_opt/ZAM/AM-Opt.cc b/src/script_opt/ZAM/AM-Opt.cc index fa8b81ac57..3593da3dc5 100644 --- a/src/script_opt/ZAM/AM-Opt.cc +++ b/src/script_opt/ZAM/AM-Opt.cc @@ -18,7 +18,7 @@ namespace zeek::detail // can't do this when compiling individual functions since for event handlers // and hooks it needs to be computed across all of their bodies. // -// Note, this is now not really needed, because we no longer use any +// Note, this is now not actually needed, because we no longer use any // interpreter frame entries other than those for the function's arguments. // We keep the code in case that changes, for example when deciding to // compile functions that include "return when" conditions. @@ -26,40 +26,39 @@ std::unordered_map remapped_intrp_frame_sizes; void finalize_functions(const std::vector& funcs) { - // Given we've now compiled all of the function bodies, we - // can reset the interpreter frame sizes of each function - // to be the maximum needed to accommodate all of its - // remapped bodies. + // Given we've now compiled all of the function bodies, we can reset + // the interpreter frame sizes to what's actually used. This can be + // a huge win for massively inlined event handlers, which otherwise + // can have frames sized for 100s of variables, none of which (other + // than the arguments) need TLC such as via calls to Frame::Reset(). // Find any functions with bodies that weren't compiled and - // make sure we don't reduce their frame size. For any loaded - // from ZAM save files, use the associated maximum interpreter - // frame size as a minimum. + // make sure we don't reduce their frame size. + std::unordered_set leave_alone; + + for ( auto& f : funcs ) + if ( f.Body()->Tag() != STMT_ZAM ) + // This function has a body that wasn't compiled, + // don't mess with its size. + leave_alone.insert(f.Func()); + for ( auto& f : funcs ) { auto func = f.Func(); - // If we have non-compiled versions of the function's body, - // preserve the size they need. - int size = func->FrameSize(); - - if ( f.Body()->Tag() != STMT_ZAM && remapped_intrp_frame_sizes.count(func) > 0 && - size > remapped_intrp_frame_sizes[func] ) - remapped_intrp_frame_sizes[func] = size; - } - - for ( auto& f : funcs ) - { - auto func = f.Func(); + if ( leave_alone.count(func) > 0 ) + continue; if ( remapped_intrp_frame_sizes.count(func) == 0 ) // No entry for this function, keep current frame size. continue; - // Note, functions with multiple bodies appear in "funcs" - // multiple times, but the following doesn't hurt to do - // more than once. - func->SetFrameSize(remapped_intrp_frame_sizes[func]); + auto& ft = func->GetType(); + auto& params = ft->Params(); + func->SetFrameSize(params->NumFields()); + + // Don't bother processing any future instances. + leave_alone.insert(func); } } @@ -670,7 +669,15 @@ void ZAMCompiler::ReMapVar(const ID* id, int slot, bro_uint_t inst) // powerful allocation method like graph coloring. However, far and // away the bulk of our variables are short-lived temporaries, // for which greedy should work fine. - bool is_managed = ZVal::IsManagedType(id->GetType()); + // + // Note, we also need to make sure that denizens sharing a slot + // are all consistently either managed, or non-managed, types. + // One subtlety in this regard is that identifiers that are types + // should always be deemed "managed", even if the type they refer + // to is not managed, because what matters for uses of those + // identifiers is interpreting them as "any" values having an + // internal type of TYPE_TYPE. + bool is_managed = ZVal::IsManagedType(id->GetType()) || id->IsType(); int apt_slot = -1; for ( unsigned int i = 0; i < shared_frame_denizens.size(); ++i ) diff --git a/src/script_opt/ZAM/Compile.h b/src/script_opt/ZAM/Compile.h index ec6cad51bb..e1d06e128c 100644 --- a/src/script_opt/ZAM/Compile.h +++ b/src/script_opt/ZAM/Compile.h @@ -178,6 +178,8 @@ private: const ZAMStmt CompileIncrExpr(const IncrExpr* e); const ZAMStmt CompileAppendToExpr(const AppendToExpr* e); + const ZAMStmt CompileAddToExpr(const AddToExpr* e); + const ZAMStmt CompileRemoveFromExpr(const RemoveFromExpr* e); const ZAMStmt CompileAssignExpr(const AssignExpr* e); const ZAMStmt CompileAssignToIndex(const NameExpr* lhs, const IndexExpr* rhs); const ZAMStmt CompileFieldLHSAssignExpr(const FieldLHSAssignExpr* e); @@ -233,9 +235,6 @@ private: const ZAMStmt AssignVecElems(const Expr* e); const ZAMStmt AssignTableElem(const Expr* e); - const ZAMStmt AppendToField(const NameExpr* n1, const NameExpr* n2, const ConstExpr* c, - int offset); - const ZAMStmt ConstructTable(const NameExpr* n, const Expr* e); const ZAMStmt ConstructSet(const NameExpr* n, const Expr* e); const ZAMStmt ConstructRecord(const NameExpr* n, const Expr* e); diff --git a/src/script_opt/ZAM/Driver.cc b/src/script_opt/ZAM/Driver.cc index 4d6fc91f76..b44e094af2 100644 --- a/src/script_opt/ZAM/Driver.cc +++ b/src/script_opt/ZAM/Driver.cc @@ -261,7 +261,7 @@ void ZAMCompiler::ComputeLoopLevels() // We're extending an existing loop. Find // its current end. auto depth = t->loop_depth; - while ( j < i && insts1[j]->loop_depth == depth ) + while ( j < i && insts1[j]->loop_depth >= depth ) ++j; ASSERT(insts1[j]->loop_depth == depth - 1); diff --git a/src/script_opt/ZAM/Expr.cc b/src/script_opt/ZAM/Expr.cc index 3682e9f1ca..bacca511b8 100644 --- a/src/script_opt/ZAM/Expr.cc +++ b/src/script_opt/ZAM/Expr.cc @@ -20,6 +20,12 @@ const ZAMStmt ZAMCompiler::CompileExpr(const Expr* e) case EXPR_APPEND_TO: return CompileAppendToExpr(static_cast(e)); + case EXPR_ADD_TO: + return CompileAddToExpr(static_cast(e)); + + case EXPR_REMOVE_FROM: + return CompileRemoveFromExpr(static_cast(e)); + case EXPR_ASSIGN: return CompileAssignExpr(static_cast(e)); @@ -76,44 +82,72 @@ const ZAMStmt ZAMCompiler::CompileIncrExpr(const IncrExpr* e) const ZAMStmt ZAMCompiler::CompileAppendToExpr(const AppendToExpr* e) { - auto op1 = e->GetOp1(); + auto n1 = e->GetOp1()->AsNameExpr(); auto op2 = e->GetOp2(); + auto n2 = op2->Tag() == EXPR_NAME ? op2->AsNameExpr() : nullptr; + auto cc = op2->Tag() != EXPR_NAME ? op2->AsConstExpr() : nullptr; + if ( n1->GetType()->Yield()->Tag() == TYPE_ANY ) + return n2 ? AppendToAnyVecVV(n1, n2) : AppendToAnyVecVC(n1, cc); + + return n2 ? AppendToVV(n1, n2) : AppendToVC(n1, cc); + } + +const ZAMStmt ZAMCompiler::CompileAddToExpr(const AddToExpr* e) + { + auto op1 = e->GetOp1(); + auto t1 = op1->GetType()->Tag(); + + auto op2 = e->GetOp2(); auto n2 = op2->Tag() == EXPR_NAME ? op2->AsNameExpr() : nullptr; auto cc = op2->Tag() != EXPR_NAME ? op2->AsConstExpr() : nullptr; if ( op1->Tag() == EXPR_FIELD ) { + assert(t1 == TYPE_PATTERN); auto f = op1->AsFieldExpr()->Field(); auto n1 = op1->GetOp1()->AsNameExpr(); - return AppendToField(n1, n2, cc, f); + + ZInstI z; + + if ( n2 ) + { + z = ZInstI(OP_ADDPATTERNTOFIELD_VVi, FrameSlot(n1), FrameSlot(n2), f); + z.op_type = OP_VVV_I3; + } + else + { + z = ZInstI(OP_ADDPATTERNTOFIELD_VCi, FrameSlot(n1), f, cc); + z.op_type = OP_VVC_I2; + } + + z.SetType(n2 ? n2->GetType() : cc->GetType()); + + return AddInst(z); } auto n1 = op1->AsNameExpr(); - return n2 ? AppendToVV(n1, n2) : AppendToVC(n1, cc); + if ( t1 == TYPE_PATTERN ) + return n2 ? ExtendPatternVV(n1, n2) : ExtendPatternVC(n1, cc); + + if ( t1 == TYPE_VECTOR ) + return n2 ? AddVecToVecVV(n1, n2) : AddVecToVecVC(n1, cc); + + assert(t1 == TYPE_TABLE); + + return n2 ? AddTableToTableVV(n1, n2) : AddTableToTableVC(n1, cc); } -const ZAMStmt ZAMCompiler::AppendToField(const NameExpr* n1, const NameExpr* n2, const ConstExpr* c, - int offset) +const ZAMStmt ZAMCompiler::CompileRemoveFromExpr(const RemoveFromExpr* e) { - ZInstI z; + auto n1 = e->GetOp1()->AsNameExpr(); + auto op2 = e->GetOp2(); - if ( n2 ) - { - z = ZInstI(OP_APPENDTOFIELD_VVi, FrameSlot(n1), FrameSlot(n2), offset); - z.op_type = OP_VVV_I3; - } - else - { - ASSERT(c); - z = ZInstI(OP_APPENDTOFIELD_VCi, FrameSlot(n1), offset, c); - z.op_type = OP_VVC_I2; - } + auto n2 = op2->Tag() == EXPR_NAME ? op2->AsNameExpr() : nullptr; + auto cc = op2->Tag() != EXPR_NAME ? op2->AsConstExpr() : nullptr; - z.SetType(n2 ? n2->GetType() : c->GetType()); - - return AddInst(z); + return n2 ? RemoveTableFromTableVV(n1, n2) : RemoveTableFromTableVC(n1, cc); } const ZAMStmt ZAMCompiler::CompileAssignExpr(const AssignExpr* e) diff --git a/src/script_opt/ZAM/Gen-ZAM.cc b/src/script_opt/ZAM/Gen-ZAM.cc deleted file mode 100644 index 92948b4d84..0000000000 --- a/src/script_opt/ZAM/Gen-ZAM.cc +++ /dev/null @@ -1,2223 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/script_opt/ZAM/Gen-ZAM.h" - -#include -#include - -using namespace std; - -// Helper functions to convert dashes to underscores or vice versa. -static char dash_to_under(char c) - { - return c == '-' ? '_' : c; - } - -static char under_to_dash(char c) - { - return c == '_' ? '-' : c; - } - -// Structure for binding together Zeek script types, internal names Gen-ZAM -// uses to track them, mnemonics for referring to them in instruction names, -// the corresponding Val accessor, and whether the type requires memory -// management. -struct TypeInfo - { - string tag; - ZAM_ExprType et; - string suffix; - string accessor; // doesn't include "As" prefix or "()" suffix - bool is_managed; - }; - -static vector ZAM_type_info = { - {"TYPE_ADDR", ZAM_EXPR_TYPE_ADDR, "A", "Addr", true}, - {"TYPE_ANY", ZAM_EXPR_TYPE_ANY, "a", "Any", true}, - {"TYPE_COUNT", ZAM_EXPR_TYPE_UINT, "U", "Count", false}, - {"TYPE_DOUBLE", ZAM_EXPR_TYPE_DOUBLE, "D", "Double", false}, - {"TYPE_FILE", ZAM_EXPR_TYPE_FILE, "f", "File", true}, - {"TYPE_FUNC", ZAM_EXPR_TYPE_FUNC, "F", "Func", true}, - {"TYPE_INT", ZAM_EXPR_TYPE_INT, "I", "Int", false}, - {"TYPE_LIST", ZAM_EXPR_TYPE_LIST, "L", "List", true}, - {"TYPE_OPAQUE", ZAM_EXPR_TYPE_OPAQUE, "O", "Opaque", true}, - {"TYPE_PATTERN", ZAM_EXPR_TYPE_PATTERN, "P", "Pattern", true}, - {"TYPE_RECORD", ZAM_EXPR_TYPE_RECORD, "R", "Record", true}, - {"TYPE_STRING", ZAM_EXPR_TYPE_STRING, "S", "String", true}, - {"TYPE_SUBNET", ZAM_EXPR_TYPE_SUBNET, "N", "SubNet", true}, - {"TYPE_TABLE", ZAM_EXPR_TYPE_TABLE, "T", "Table", true}, - {"TYPE_TYPE", ZAM_EXPR_TYPE_TYPE, "t", "Type", true}, - {"TYPE_VECTOR", ZAM_EXPR_TYPE_VECTOR, "V", "Vector", true}, -}; - -// Given a ZAM_ExprType, returns the corresponding TypeInfo. -const TypeInfo& find_type_info(ZAM_ExprType et) - { - assert(et != ZAM_EXPR_TYPE_NONE); - - auto pred = [et](const TypeInfo& ti) -> bool - { - return ti.et == et; - }; - auto ti = std::find_if(ZAM_type_info.begin(), ZAM_type_info.end(), pred); - - assert(ti != ZAM_type_info.end()); - return *ti; - } - -// Given a ZAM_ExprType, return its ZVal accessor. Takes into account -// some naming inconsistencies between ZVal's and Val's. -string find_type_accessor(ZAM_ExprType et) - { - switch ( et ) - { - case ZAM_EXPR_TYPE_NONE: - return ""; - - case ZAM_EXPR_TYPE_UINT: - return "uint_val"; - - case ZAM_EXPR_TYPE_PATTERN: - return "re_val"; - - default: - { - string acc = find_type_info(et).accessor; - transform(acc.begin(), acc.end(), acc.begin(), ::tolower); - return acc + "_val"; - } - } - } - -// Maps ZAM operand types to pairs of (1) the C++ name used to declare -// the operand in a method declaration, and (2) the variable name to -// use for the operand. -unordered_map> ArgsManager::ot_to_args = { - {ZAM_OT_AUX, {"OpaqueVals*", "v"}}, - {ZAM_OT_CONSTANT, {"const ConstExpr*", "c"}}, - {ZAM_OT_EVENT_HANDLER, {"EventHandler*", "h"}}, - {ZAM_OT_INT, {"int", "i"}}, - {ZAM_OT_LIST, {"const ListExpr*", "l"}}, - {ZAM_OT_RECORD_FIELD, {"const NameExpr*", "n"}}, - {ZAM_OT_VAR, {"const NameExpr*", "n"}}, - - // The following gets special treatment. - {ZAM_OT_ASSIGN_FIELD, {"const NameExpr*", "n"}}, -}; - -ArgsManager::ArgsManager(const vector& ot, ZAM_InstClass zc) - { - int n = 0; - bool add_field = false; - - for ( const auto& ot_i : ot ) - { - if ( ot_i == ZAM_OT_NONE ) - { // it had better be the only operand type - assert(ot.size() == 1); - break; - } - - if ( n++ == 0 && zc == ZIC_COND ) - // Skip the conditional's nominal assignment slot. - continue; - - // Start off the argument info using the usual case - // of (1) same method parameter name as GenInst argument, - // and (2) not requiring a record field. - auto& arg_i = ot_to_args[ot_i]; - Arg arg = {arg_i.second, arg_i.first, arg_i.second, false}; - - if ( ot_i == ZAM_OT_ASSIGN_FIELD ) - { - arg.is_field = true; - - if ( n == 1 ) - { // special-case the parameter - arg.decl_name = "flhs"; - arg.decl_type = "const FieldLHSAssignExpr*"; - } - } - - args.emplace_back(move(arg)); - } - - Differentiate(); - } - -void ArgsManager::Differentiate() - { - // First, figure out which parameter names are used how often. - map name_count; // how often the name apepars - map usage_count; // how often the name's been used so far - for ( auto& arg : args ) - { - auto& name = arg.param_name; - if ( name_count.count(name) == 0 ) - { - name_count[name] = 1; - usage_count[name] = 0; - } - else - ++name_count[name]; - } - - // Now for each name - whether appearing as an argument or in - // a declaration - if it's used more than once, then differentiate - // it. Note, some names only appear multiple times as arguments - // when invoking methods, but not in the declarations of the methods - // themselves. - for ( auto& arg : args ) - { - auto& decl = arg.decl_name; - auto& name = arg.param_name; - bool decl_and_arg_same = decl == name; - - if ( name_count[name] == 1 ) - continue; // it's unique - - auto n = to_string(++usage_count[name]); - name += n; - if ( decl_and_arg_same ) - decl += n; - } - - // Finally, build the full versions of the declaration and parameters. - - // Tracks how many record fields we're dealing with. - int num_fields = 0; - - for ( auto& arg : args ) - { - if ( ! full_decl.empty() ) - full_decl += ", "; - - full_decl += arg.decl_type + " " + arg.decl_name; - - if ( ! full_params.empty() ) - full_params += ", "; - - full_params += arg.param_name; - params.push_back(arg.param_name); - - if ( arg.is_field ) - ++num_fields; - } - - assert(num_fields <= 2); - - // Add in additional arguments/parameters for record fields. - if ( num_fields == 1 ) - full_params += ", field"; - else if ( num_fields == 2 ) - { - full_decl += ", int field2"; - full_params += ", field1, field2"; - } - } - -ZAM_OpTemplate::ZAM_OpTemplate(ZAMGen* _g, string _base_name) : g(_g), base_name(move(_base_name)) - { - // Make the base name viable in a C++ name. - transform(base_name.begin(), base_name.end(), base_name.begin(), dash_to_under); - - cname = base_name; - transform(cname.begin(), cname.end(), cname.begin(), ::toupper); - } - -void ZAM_OpTemplate::Build() - { - op_loc = g->CurrLoc(); - - string line; - while ( g->ScanLine(line) ) - { - if ( line.size() <= 1 ) - break; - - auto words = g->SplitIntoWords(line); - if ( words.empty() ) - break; - - Parse(words[0], line, words); - } - } - -void ZAM_OpTemplate::Instantiate() - { - InstantiateOp(OperandTypes(), IncludesVectorOp()); - } - -void ZAM_OpTemplate::UnaryInstantiate() - { - // First operand is always the frame slot to which this operation - // assigns the result of the applying unary operator. - vector ots = {ZAM_OT_VAR}; - ots.resize(2); - - // Now build versions for a constant operand (maybe not actually - // needed due to constant folding, but sometimes that gets deferred - // to run-time) ... - if ( ! NoConst() ) - { - ots[1] = ZAM_OT_CONSTANT; - InstantiateOp(ots, IncludesVectorOp()); - } - - // ... and for a variable (frame-slot) operand. - ots[1] = ZAM_OT_VAR; - InstantiateOp(ots, IncludesVectorOp()); - } - -void ZAM_OpTemplate::Parse(const string& attr, const string& line, const Words& words) - { - int num_args = -1; // -1 = don't enforce - int nwords = words.size(); - - if ( attr == "type" ) - { - if ( nwords <= 1 ) - g->Gripe("missing argument", line); - - num_args = 1; - - const char* types = words[1].c_str(); - while ( *types ) - { - ZAM_OperandType ot = ZAM_OT_NONE; - switch ( *types ) - { - case 'C': - ot = ZAM_OT_CONSTANT; - break; - case 'F': - ot = ZAM_OT_ASSIGN_FIELD; - break; - case 'H': - ot = ZAM_OT_EVENT_HANDLER; - break; - case 'L': - ot = ZAM_OT_LIST; - break; - case 'O': - ot = ZAM_OT_AUX; - break; - case 'R': - ot = ZAM_OT_RECORD_FIELD; - break; - case 'V': - ot = ZAM_OT_VAR; - break; - case 'i': - ot = ZAM_OT_INT; - break; - - case 'X': - ot = ZAM_OT_NONE; - break; - - default: - g->Gripe("bad operand type", words[1]); - break; - } - - AddOpType(ot); - - ++types; - } - } - - else if ( attr == "op1-read" ) - { - num_args = 0; - SetOp1Flavor("OP1_READ"); - } - - else if ( attr == "op1-read-write" ) - { - num_args = 0; - SetOp1Flavor("OP1_READ_WRITE"); - } - - else if ( attr == "op1-internal" ) - { - num_args = 0; - SetOp1Flavor("OP1_INTERNAL"); - } - - else if ( attr == "set-type" ) - { - num_args = 1; - if ( nwords > 1 ) - SetTypeParam(ExtractTypeParam(words[1])); - } - - else if ( attr == "set-type2" ) - { - num_args = 1; - if ( nwords > 1 ) - SetType2Param(ExtractTypeParam(words[1])); - } - - else if ( attr == "custom-method" ) - SetCustomMethod(g->SkipWords(line, 1)); - - else if ( attr == "method-post" ) - SetPostMethod(g->SkipWords(line, 1)); - - else if ( attr == "side-effects" ) - { - if ( nwords == 3 ) - SetAssignmentLess(words[1], words[2]); - else - // otherwise shouldn't be any arguments - num_args = 0; - - SetHasSideEffects(); - } - - else if ( attr == "no-eval" ) - { - num_args = 0; - SetNoEval(); - } - - else if ( attr == "vector" ) - { - num_args = 0; - SetIncludesVectorOp(); - } - - else if ( attr == "assign-val" ) - { - num_args = 1; - if ( words.size() > 1 ) - SetAssignVal(words[1]); - } - - else if ( attr == "eval" ) - { - AddEval(g->SkipWords(line, 1)); - - auto addl = GatherEval(); - if ( ! addl.empty() ) - AddEval(addl); - } - - else if ( attr == "macro" ) - g->ReadMacro(line); - - else - g->Gripe("unknown template attribute", attr); - - if ( num_args >= 0 && num_args != nwords - 1 ) - g->Gripe("extraneous or missing arguments", line); - } - -string ZAM_OpTemplate::GatherEval() - { - string res; - string l; - while ( g->ScanLine(l) ) - { - if ( l.size() <= 1 || ! isspace(l.c_str()[0]) ) - { - g->PutBack(l); - return res; - } - - res += l; - } - - return res; - } - -int ZAM_OpTemplate::ExtractTypeParam(const string& arg) - { - if ( arg == "$$" ) - return 1; - - if ( arg[0] != '$' ) - g->Gripe("bad set-type parameter, should be $n", arg); - - int param = atoi(&arg[1]); - - if ( param <= 0 || param > 2 ) - g->Gripe("bad set-type parameter, should be $1 or $2", arg); - - // Convert operand to underlying instruction element, i.e., add - // one to account for the $$ assignment slot. - return param + 1; - } - -// Maps an operand type to a character mnemonic used to distinguish -// it from others. -unordered_map ZAM_OpTemplate::ot_to_char = { - {ZAM_OT_AUX, 'O'}, {ZAM_OT_CONSTANT, 'C'}, {ZAM_OT_EVENT_HANDLER, 'H'}, - {ZAM_OT_ASSIGN_FIELD, 'F'}, {ZAM_OT_INT, 'i'}, {ZAM_OT_LIST, 'L'}, - {ZAM_OT_NONE, 'X'}, {ZAM_OT_RECORD_FIELD, 'R'}, {ZAM_OT_VAR, 'V'}, -}; - -void ZAM_OpTemplate::InstantiateOp(const vector& ot, bool do_vec) - { - auto method = MethodName(ot); - - InstantiateOp(method, ot, ZIC_REGULAR); - - if ( IncludesFieldOp() ) - InstantiateOp(method, ot, ZIC_FIELD); - - if ( do_vec ) - InstantiateOp(method, ot, ZIC_VEC); - - if ( IsConditionalOp() ) - InstantiateOp(method, ot, ZIC_COND); - } - -void ZAM_OpTemplate::InstantiateOp(const string& method, const vector& ot, - ZAM_InstClass zc) - { - string suffix = ""; - - if ( zc == ZIC_FIELD ) - suffix = "_field"; - else if ( zc == ZIC_VEC ) - suffix = "_vec"; - else if ( zc == ZIC_COND ) - suffix = "_cond"; - - if ( ! IsInternalOp() ) - InstantiateMethod(method, suffix, ot, zc); - - if ( IsAssignOp() ) - InstantiateAssignOp(ot, suffix); - else - { - InstantiateEval(ot, suffix, zc); - - if ( HasAssignmentLess() ) - { - auto op_string = "_" + OpSuffix(ot); - auto op = g->GenOpCode(this, op_string); - GenAssignmentlessVersion(op); - } - } - } - -void ZAM_OpTemplate::GenAssignmentlessVersion(string op) - { - EmitTo(AssignFlavor); - Emit("assignmentless_op[" + op + "] = " + AssignmentLessOp() + ";"); - Emit("assignmentless_op_type[" + op + "] = " + AssignmentLessOpType() + ";"); - } - -void ZAM_OpTemplate::InstantiateMethod(const string& m, const string& suffix, - const vector& ot_orig, ZAM_InstClass zc) - { - if ( IsInternalOp() ) - return; - - auto ot = ot_orig; - if ( zc == ZIC_FIELD ) - // Need to make room for the field offset. - ot.emplace_back(ZAM_OT_INT); - - auto decls = MethodDeclare(ot, zc); - - EmitTo(MethodDecl); - Emit("const ZAMStmt " + m + suffix + "(" + decls + ");"); - - EmitTo(MethodDef); - Emit("const ZAMStmt ZAMCompiler::" + m + suffix + "(" + decls + ")"); - BeginBlock(); - - InstantiateMethodCore(ot, suffix, zc); - - if ( HasPostMethod() ) - Emit(GetPostMethod()); - - if ( ! HasCustomMethod() ) - Emit("return AddInst(z);"); - - EndBlock(); - NL(); - } - -void ZAM_OpTemplate::InstantiateMethodCore(const vector& ot, string suffix, - ZAM_InstClass zc) - { - if ( HasCustomMethod() ) - { - Emit(GetCustomMethod()); - return; - } - - assert(! ot.empty()); - - string full_suffix = "_" + OpSuffix(ot) + suffix; - - Emit("ZInstI z;"); - - if ( ot[0] == ZAM_OT_AUX ) - { - auto op = g->GenOpCode(this, full_suffix, zc); - Emit("z = ZInstI(" + op + ");"); - return; - } - - if ( ot[0] == ZAM_OT_NONE ) - { - auto op = g->GenOpCode(this, full_suffix, zc); - Emit("z = GenInst(" + op + ");"); - return; - } - - if ( ot.size() > 1 && ot[1] == ZAM_OT_AUX ) - { - auto op = g->GenOpCode(this, full_suffix, zc); - Emit("z = ZInstI(" + op + ", Frame1Slot(n, " + op + "));"); - return; - } - - ArgsManager args(ot, zc); - BuildInstruction(ot, args.Params(), full_suffix, zc); - - auto tp = GetTypeParam(); - if ( tp > 0 ) - Emit("z.SetType(" + args.NthParam(tp - 1) + "->GetType());"); - - auto tp2 = GetType2Param(); - if ( tp2 > 0 ) - Emit("z.t2 = " + args.NthParam(tp2 - 1) + "->GetType();"); - } - -void ZAM_OpTemplate::BuildInstruction(const vector& ot, const string& params, - const string& suffix, ZAM_InstClass zc) - { - auto op = g->GenOpCode(this, suffix, zc); - Emit("z = GenInst(" + op + ", " + params + ");"); - } - -void ZAM_OpTemplate::InstantiateEval(const vector& ot, const string& suffix, - ZAM_InstClass zc) - { - auto eval = GetEval(); - - if ( ot.size() > 1 ) - { // Check for use of "$1" to indicate the operand - string op1; - if ( ot[1] == ZAM_OT_CONSTANT ) - op1 = "z.c"; - else if ( ot[1] == ZAM_OT_VAR ) - op1 = "frame[z.v2]"; - - eval = regex_replace(eval, regex("\\$1"), op1); - } - - InstantiateEval(Eval, OpSuffix(ot) + suffix, eval, zc); - } - -void ZAM_OpTemplate::InstantiateEval(EmitTarget et, const string& op_suffix, const string& eval, - ZAM_InstClass zc) - { - auto op_code = g->GenOpCode(this, "_" + op_suffix, zc); - - EmitTo(et); - Emit("case " + op_code + ":"); - BeginBlock(); - Emit(eval); - EndBlock(); - EmitUp("break;"); - NL(); - } - -void ZAM_OpTemplate::InstantiateAssignOp(const vector& ot, const string& suffix) - { - // First, create a generic version of the operand, which the - // ZAM compiler uses to find specific-flavored versions. - auto op_string = "_" + OpSuffix(ot); - auto generic_op = g->GenOpCode(this, op_string); - auto flavor_ind = "assignment_flavor[" + generic_op + "]"; - - EmitTo(AssignFlavor); - Emit(flavor_ind + " = empty_map;"); - - auto eval = GetEval(); - auto v = GetAssignVal(); - - for ( auto& ti : ZAM_type_info ) - { - auto op = g->GenOpCode(this, op_string + "_" + ti.suffix); - - if ( IsInternalOp() ) - { - EmitTo(AssignFlavor); - Emit(flavor_ind + "[" + ti.tag + "] = " + op + ";"); - - if ( HasAssignmentLess() ) - GenAssignmentlessVersion(op); - } - - EmitTo(Eval); - Emit("case " + op + ":"); - BeginBlock(); - GenAssignOpCore(ot, eval, ti.accessor, ti.is_managed); - Emit("break;"); - EndBlock(); - } - } - -void ZAM_OpTemplate::GenAssignOpCore(const vector& ot, const string& eval, - const string& accessor, bool is_managed) - { - if ( HasAssignVal() ) - { - GenAssignOpValCore(eval, accessor, is_managed); - return; - } - - if ( ! eval.empty() ) - g->Gripe("assign-op should not have an \"eval\"", eval); - - auto lhs_field = (ot[0] == ZAM_OT_ASSIGN_FIELD); - auto rhs_field = lhs_field && ot.size() > 2 && (ot[2] == ZAM_OT_INT); - auto constant_op = (ot[1] == ZAM_OT_CONSTANT); - - string rhs = constant_op ? "z.c" : "frame[z.v2]"; - - auto acc = ".As" + accessor + "()"; - - if ( accessor == "Any" && constant_op && ! rhs_field ) - { - // "any_val = constant" or "x$any_val = constant". - // - // These require special-casing, because to avoid going - // through a CoerceToAny operation, we allow expressing - // these directly. They don't fit with the usual assignment - // paradigm since the RHS differs in type from the LHS. - Emit("auto v = z.c.ToVal(z.t);"); - - if ( lhs_field ) - { - Emit("auto r = frame[z.v1].AsRecord();"); - Emit("auto& f = r->RawField(z.v2);"); - } - else - Emit("auto& f = frame[z.v1];"); - - Emit("zeek::Unref(f.ManagedVal());"); - Emit("f = ZVal(v.release());"); - } - - else if ( rhs_field ) - { - // The following is counter-intuitive, but comes from the - // fact that we build out the instruction parameters as - // an echo of the method parameters, and for this case that - // means that the RHS field offset comes *before*, not after, - // the LHS field offset. - auto lhs_offset = constant_op ? 3 : 4; - auto rhs_offset = lhs_offset - 1; - - Emit("auto v = " + rhs + ".AsRecord()->RawOptField(z.v" + to_string(rhs_offset) + - "); // note, RHS field before LHS field"); - - Emit("if ( ! v )"); - BeginBlock(); - Emit("ZAM_run_time_error(z.loc, \"field value missing\");"); - Emit("break;"); - EndBlock(); - - auto slot = "z.v" + to_string(lhs_offset); - Emit("auto r = frame[z.v1].AsRecord();"); - Emit("auto& f = r->RawField(" + slot + "); // note, LHS field after RHS field"); - - if ( is_managed ) - { - Emit("zeek::Ref((*v)" + acc + ");"); - Emit("zeek::Unref(f.ManagedVal());"); - } - - Emit("f = *v;"); - } - - else - { - if ( is_managed ) - Emit("zeek::Ref(" + rhs + acc + ");"); - - if ( lhs_field ) - { - auto lhs_offset = constant_op ? 2 : 3; - auto slot = "z.v" + to_string(lhs_offset); - Emit("auto r = frame[z.v1].AsRecord();"); - Emit("auto& f = r->RawField(" + slot + ");"); - - if ( is_managed ) - Emit("zeek::Unref(f.ManagedVal());"); - - Emit("f = " + rhs + ";"); - } - - else - { - if ( is_managed ) - Emit("zeek::Unref(frame[z.v1].ManagedVal());"); - - Emit("frame[z.v1] = ZVal(" + rhs + acc + ");"); - } - } - - if ( lhs_field ) - Emit("r->Modified();"); - } - -void ZAM_OpTemplate::GenAssignOpValCore(const string& eval, const string& accessor, bool is_managed) - { - auto v = GetAssignVal(); - - Emit(eval); - - // Maps Zeek types to how to get the underlying value from a ValPtr. - static unordered_map val_accessors = { - {"Addr", "->AsAddrVal()"}, {"Any", ".get()"}, - {"Count", "->AsCount()"}, {"Double", "->AsDouble()"}, - {"Int", "->AsInt()"}, {"Pattern", "->AsPatternVal()"}, - {"String", "->AsStringVal()"}, {"SubNet", "->AsSubNetVal()"}, - {"Table", "->AsTableVal()"}, {"Vector", "->AsVectorVal()"}, - {"File", "->AsFile()"}, {"Func", "->AsFunc()"}, - {"List", "->AsListVal()"}, {"Opaque", "->AsOpaqueVal()"}, - {"Record", "->AsRecordVal()"}, {"Type", "->AsTypeVal()"}, - }; - - auto val_accessor = val_accessors[accessor]; - - string rhs; - if ( IsInternalOp() ) - rhs = v + val_accessor; - else - rhs = v + ".As" + accessor + "()"; - - if ( is_managed ) - { - Emit("auto rhs = " + rhs + ";"); - Emit("zeek::Ref(rhs);"); - Emit("Unref(frame[z.v1].ManagedVal());"); - Emit("frame[z.v1] = ZVal(rhs);"); - } - else - Emit("frame[z.v1] = ZVal(" + rhs + ");"); - } - -string ZAM_OpTemplate::MethodName(const vector& ot) const - { - return base_name + OpSuffix(ot); - } - -string ZAM_OpTemplate::MethodDeclare(const vector& ot, ZAM_InstClass zc) - { - ArgsManager args(ot, zc); - return args.Decls(); - } - -string ZAM_OpTemplate::OpSuffix(const vector& ot) const - { - string os; - for ( auto& o : ot ) - os += ot_to_char[o]; - return os; - } - -string ZAM_OpTemplate::SkipWS(const string& s) const - { - auto sp = s.c_str(); - while ( *sp && isspace(*sp) ) - ++sp; - - return sp; - } - -void ZAM_OpTemplate::Emit(const string& s) - { - g->Emit(curr_et, s); - } - -void ZAM_OpTemplate::EmitNoNL(const string& s) - { - g->SetNoNL(true); - Emit(s); - g->SetNoNL(false); - } - -void ZAM_OpTemplate::IndentUp() - { - g->IndentUp(); - } - -void ZAM_OpTemplate::IndentDown() - { - g->IndentDown(); - } - -void ZAM_UnaryOpTemplate::Instantiate() - { - UnaryInstantiate(); - } - -void ZAM_DirectUnaryOpTemplate::Instantiate() - { - EmitTo(DirectDef); - Emit("case EXPR_" + cname + ":\treturn " + direct + "(lhs, rhs);"); - } - -// Maps op-type mnemonics to the corresponding internal value used by Gen-ZAM. -static unordered_map expr_type_names = { - {'*', ZAM_EXPR_TYPE_DEFAULT}, {'A', ZAM_EXPR_TYPE_ADDR}, {'a', ZAM_EXPR_TYPE_ANY}, - {'D', ZAM_EXPR_TYPE_DOUBLE}, {'f', ZAM_EXPR_TYPE_FILE}, {'F', ZAM_EXPR_TYPE_FUNC}, - {'I', ZAM_EXPR_TYPE_INT}, {'L', ZAM_EXPR_TYPE_LIST}, {'X', ZAM_EXPR_TYPE_NONE}, - {'O', ZAM_EXPR_TYPE_OPAQUE}, {'P', ZAM_EXPR_TYPE_PATTERN}, {'R', ZAM_EXPR_TYPE_RECORD}, - {'S', ZAM_EXPR_TYPE_STRING}, {'N', ZAM_EXPR_TYPE_SUBNET}, {'T', ZAM_EXPR_TYPE_TABLE}, - {'t', ZAM_EXPR_TYPE_TYPE}, {'U', ZAM_EXPR_TYPE_UINT}, {'V', ZAM_EXPR_TYPE_VECTOR}, -}; - -// Inverse of the above. -static unordered_map expr_name_types; - -ZAM_ExprOpTemplate::ZAM_ExprOpTemplate(ZAMGen* _g, string _base_name) - : ZAM_OpTemplate(_g, _base_name) - { - static bool did_map_init = false; - - if ( ! did_map_init ) - { // Create the inverse mapping. - for ( auto& tn : expr_type_names ) - expr_name_types[tn.second] = tn.first; - - did_map_init = true; - } - } - -void ZAM_ExprOpTemplate::Parse(const string& attr, const string& line, const Words& words) - { - if ( attr == "op-type" ) - { - if ( words.size() == 1 ) - g->Gripe("op-type needs arguments", line); - - for ( auto i = 1U; i < words.size(); ++i ) - { - auto& w_i = words[i]; - if ( w_i.size() != 1 ) - g->Gripe("bad op-type argument", w_i); - - auto et_c = w_i.c_str()[0]; - if ( expr_type_names.count(et_c) == 0 ) - g->Gripe("bad op-type argument", w_i); - - AddExprType(expr_type_names[et_c]); - } - } - - else if ( attr == "includes-field-op" ) - { - if ( words.size() != 1 ) - g->Gripe("includes-field-op does not take any arguments", line); - - SetIncludesFieldOp(); - } - - else if ( attr == "eval-type" ) - { - if ( words.size() < 3 ) - g->Gripe("eval-type needs type and evaluation", line); - - auto& type = words[1]; - if ( type.size() != 1 ) - g->Gripe("bad eval-type type", type); - - auto type_c = type.c_str()[0]; - if ( expr_type_names.count(type_c) == 0 ) - g->Gripe("bad eval-type type", type); - - auto et = expr_type_names[type_c]; - - if ( expr_types.count(et) == 0 ) - g->Gripe("eval-type type not present in eval-type", type); - - auto eval = g->SkipWords(line, 2); - eval += GatherEval(); - AddEvalSet(et, eval); - } - - else if ( attr == "eval-mixed" ) - { - if ( words.size() < 4 ) - g->Gripe("eval-mixed needs types and evaluation", line); - - auto& type1 = words[1]; - auto& type2 = words[2]; - if ( type1.size() != 1 || type2.size() != 1 ) - g->Gripe("bad eval-mixed types", line); - - auto type_c1 = type1.c_str()[0]; - auto type_c2 = type2.c_str()[0]; - if ( expr_type_names.count(type_c1) == 0 || expr_type_names.count(type_c2) == 0 ) - g->Gripe("bad eval-mixed types", line); - - auto et1 = expr_type_names[type_c1]; - auto et2 = expr_type_names[type_c2]; - - if ( eval_set.count(et1) > 0 ) - g->Gripe("eval-mixed uses type also included in op-type", line); - - auto eval = g->SkipWords(line, 3); - eval += GatherEval(); - AddEvalSet(et1, et2, eval); - } - - else if ( attr == "eval-pre" ) - { - if ( words.size() < 2 ) - g->Gripe("eval-pre needs evaluation", line); - - auto eval = g->SkipWords(line, 1); - eval += GatherEval(); - - SetPreEval(eval); - } - - else - // Not an attribute specific to expr-op's. - ZAM_OpTemplate::Parse(attr, line, words); - } - -void ZAM_ExprOpTemplate::Instantiate() - { - InstantiateOp(OperandTypes(), IncludesVectorOp()); - - if ( op_types.size() > 1 && op_types[1] == ZAM_OT_CONSTANT ) - InstantiateC1(op_types, op_types.size() - 1); - if ( op_types.size() > 2 && op_types[2] == ZAM_OT_CONSTANT ) - InstantiateC2(op_types, op_types.size() - 1); - if ( op_types.size() > 3 && op_types[3] == ZAM_OT_CONSTANT ) - InstantiateC3(op_types); - - bool all_var = true; - for ( auto i = 1U; i < op_types.size(); ++i ) - if ( op_types[i] != ZAM_OT_VAR ) - all_var = false; - - if ( all_var ) - InstantiateV(op_types); - - if ( op_types.size() == 3 && op_types[1] == ZAM_OT_RECORD_FIELD && op_types[2] == ZAM_OT_INT ) - InstantiateV(op_types); - } - -void ZAM_ExprOpTemplate::InstantiateC1(const vector& ots, int arity, bool do_vec) - { - string args = "lhs, r1->AsConstExpr()"; - - if ( arity == 1 && ots[0] == ZAM_OT_RECORD_FIELD ) - args += ", rhs->AsFieldExpr()->Field()"; - - else if ( arity > 1 ) - { - args += ", "; - - if ( ots[2] == ZAM_OT_RECORD_FIELD ) - args += "rhs->AsFieldExpr()->Field()"; - else - args += "r2->AsNameExpr()"; - } - - auto m = MethodName(ots); - - EmitTo(C1Def); - - EmitNoNL("case EXPR_" + cname + ":"); - - if ( do_vec ) - DoVectorCase(m, args); - else - EmitUp("return " + m + "(" + args + ");"); - - if ( IncludesFieldOp() ) - { - EmitTo(C1FieldDef); - Emit("case EXPR_" + cname + ":\treturn " + m + "_field(" + args + ", field);"); - } - } - -void ZAM_ExprOpTemplate::InstantiateC2(const vector& ots, int arity) - { - string args = "lhs, r1->AsNameExpr(), r2->AsConstExpr()"; - - if ( arity == 3 ) - args += ", r3->AsNameExpr()"; - - auto method = MethodName(ots); - auto m = method.c_str(); - - EmitTo(C2Def); - Emit("case EXPR_" + cname + ":\treturn " + m + "(" + args + ");"); - - if ( IncludesFieldOp() ) - { - EmitTo(C2FieldDef); - Emit("case EXPR_" + cname + ":\treturn " + m + "_field(" + args + ", field);"); - } - } - -void ZAM_ExprOpTemplate::InstantiateC3(const vector& ots) - { - EmitTo(C3Def); - Emit("case EXPR_" + cname + ":\treturn " + MethodName(ots) + - "(lhs, r1->AsNameExpr(), r2->AsNameExpr(), r3->AsConstExpr());"); - } - -void ZAM_ExprOpTemplate::InstantiateV(const vector& ots) - { - auto m = MethodName(ots); - - string args = "lhs, r1->AsNameExpr()"; - - if ( ots.size() >= 3 ) - { - if ( ots[2] == ZAM_OT_INT ) - { - string acc_flav = IncludesFieldOp() ? "Has" : ""; - args += ", rhs->As" + acc_flav + "FieldExpr()->Field()"; - } - else - args += ", r2->AsNameExpr()"; - - if ( ots.size() == 4 ) - args += ", r3->AsNameExpr()"; - } - - EmitTo(VDef); - EmitNoNL("case EXPR_" + cname + ":"); - - if ( IncludesVectorOp() ) - DoVectorCase(m, args); - else - EmitUp("return " + m + "(" + args + ");"); - - if ( IncludesFieldOp() ) - { - EmitTo(VFieldDef); - Emit("case EXPR_" + cname + ":\treturn " + m + "_field(" + args + ", field);"); - } - } - -void ZAM_ExprOpTemplate::DoVectorCase(const string& m, const string& args) - { - NL(); - IndentUp(); - Emit("if ( rt->Tag() == TYPE_VECTOR )"); - EmitUp("return " + m + "_vec(" + args + ");"); - Emit("else"); - EmitUp("return " + m + "(" + args + ");"); - IndentDown(); - } - -void ZAM_ExprOpTemplate::BuildInstructionCore(const string& params, const string& suffix, - ZAM_InstClass zc) - { - Emit("auto tag = t->Tag();"); - Emit("auto i_t = t->InternalType();"); - - int ncases = 0; - - for ( auto& [et1, et2_map] : eval_mixed_set ) - for ( auto& [et2, eval] : et2_map ) - GenMethodTest(et1, et2, params, suffix, ++ncases > 1, zc); - - bool do_default = false; - - for ( auto et : ExprTypes() ) - { - if ( et == ZAM_EXPR_TYPE_DEFAULT ) - do_default = true; - else - GenMethodTest(et, et, params, suffix, ++ncases > 1, zc); - } - - Emit("else"); - - if ( do_default ) - { - auto op = g->GenOpCode(this, suffix, zc); - EmitUp("z = GenInst(" + op + ", " + params + ");"); - } - - else - EmitUp("reporter->InternalError(\"bad tag when generating method core\");"); - } - -void ZAM_ExprOpTemplate::GenMethodTest(ZAM_ExprType et1, ZAM_ExprType et2, const string& params, - const string& suffix, bool do_else, ZAM_InstClass zc) - { - // Maps ZAM_ExprType's to the information needed (variable name, - // constant to compare it against) to identify using an "if" test - // that a given AST Expr node employs the given type of operand. - static map> if_tests = { - {ZAM_EXPR_TYPE_ADDR, {"i_t", "TYPE_INTERNAL_ADDR"}}, - {ZAM_EXPR_TYPE_ANY, {"tag", "TYPE_ANY"}}, - {ZAM_EXPR_TYPE_DOUBLE, {"i_t", "TYPE_INTERNAL_DOUBLE"}}, - {ZAM_EXPR_TYPE_FILE, {"tag", "TYPE_FILE"}}, - {ZAM_EXPR_TYPE_FUNC, {"tag", "TYPE_FUNC"}}, - {ZAM_EXPR_TYPE_INT, {"i_t", "TYPE_INTERNAL_INT"}}, - {ZAM_EXPR_TYPE_LIST, {"tag", "TYPE_LIST"}}, - {ZAM_EXPR_TYPE_OPAQUE, {"tag", "TYPE_OPAQUE"}}, - {ZAM_EXPR_TYPE_PATTERN, {"tag", "TYPE_PATTERN"}}, - {ZAM_EXPR_TYPE_RECORD, {"tag", "TYPE_RECORD"}}, - {ZAM_EXPR_TYPE_STRING, {"i_t", "TYPE_INTERNAL_STRING"}}, - {ZAM_EXPR_TYPE_SUBNET, {"i_t", "TYPE_INTERNAL_SUBNET"}}, - {ZAM_EXPR_TYPE_TABLE, {"tag", "TYPE_TABLE"}}, - {ZAM_EXPR_TYPE_TYPE, {"tag", "TYPE_TYPE"}}, - {ZAM_EXPR_TYPE_UINT, {"i_t", "TYPE_INTERNAL_UNSIGNED"}}, - {ZAM_EXPR_TYPE_VECTOR, {"tag", "TYPE_VECTOR"}}, - }; - - if ( if_tests.count(et1) == 0 ) - g->Gripe("bad op-type", op_loc); - - auto if_test = if_tests[et1]; - auto if_var = if_test.first; - auto if_val = if_test.second; - - string test = "if ( " + if_var + " == " + if_val + " )"; - if ( do_else ) - test = "else " + test; - - Emit(test); - - auto op_suffix = suffix + "_" + expr_name_types[et1]; - if ( et2 != et1 ) - op_suffix += expr_name_types[et2]; - - auto op = g->GenOpCode(this, op_suffix, zc); - EmitUp("z = GenInst(" + op + ", " + params + ");"); - } - -EvalInstance::EvalInstance(ZAM_ExprType _lhs_et, ZAM_ExprType _op1_et, ZAM_ExprType _op2_et, - string _eval, bool _is_def) - { - lhs_et = _lhs_et; - op1_et = _op1_et; - op2_et = _op2_et; - eval = move(_eval); - is_def = _is_def; - } - -string EvalInstance::LHSAccessor(bool is_ptr) const - { - if ( lhs_et == ZAM_EXPR_TYPE_NONE || lhs_et == ZAM_EXPR_TYPE_DEFAULT ) - return ""; - - string deref = is_ptr ? "->" : "."; - string acc = find_type_accessor(lhs_et); - - return deref + acc; - } - -string EvalInstance::Accessor(ZAM_ExprType et, bool is_ptr) const - { - if ( et == ZAM_EXPR_TYPE_NONE || et == ZAM_EXPR_TYPE_DEFAULT ) - return ""; - - string deref = is_ptr ? "->" : "."; - return deref + "As" + find_type_info(et).accessor + "()"; - } - -string EvalInstance::OpMarker() const - { - if ( op1_et == ZAM_EXPR_TYPE_DEFAULT || op1_et == ZAM_EXPR_TYPE_NONE ) - return ""; - - if ( op1_et == op2_et ) - return "_" + find_type_info(op1_et).suffix; - - return "_" + find_type_info(op1_et).suffix + find_type_info(op2_et).suffix; - } - -void ZAM_ExprOpTemplate::InstantiateEval(const vector& ot_orig, - const string& suffix, ZAM_InstClass zc) - { - if ( expr_types.empty() ) - { // No operand types to expand over. - ZAM_OpTemplate::InstantiateEval(ot_orig, suffix, zc); - return; - } - - auto ot = ot_orig; - if ( zc == ZIC_FIELD ) - // Make room for the offset. - ot.emplace_back(ZAM_OT_INT); - - auto ot_str = OpSuffix(ot); - - // Some of these might not wind up being used, but no harm in - // initializing them in case they are. - string lhs, op1, op2; - string branch_target = "z.v"; - - EmitTarget emit_target = Eval; - - if ( zc == ZIC_VEC ) - { - lhs = "vec1[i]"; - op1 = "vec2[i]"; - op2 = "vec3[i]"; - - emit_target = Arity() == 1 ? Vec1Eval : Vec2Eval; - } - - else - { - lhs = "frame[z.v1]"; - - auto op1_offset = zc == ZIC_COND ? 1 : 2; - auto op2_offset = op1_offset + 1; - bool ot1_const = ot[1] == ZAM_OT_CONSTANT; - bool ot2_const = Arity() >= 2 && ot[2] == ZAM_OT_CONSTANT; - - if ( ot1_const ) - { - op1 = "z.c"; - --op2_offset; - branch_target += "2"; - } - else - { - op1 = "frame[z.v" + to_string(op1_offset) + "]"; - - if ( Arity() > 1 && ot[2] == ZAM_OT_VAR ) - branch_target += "3"; - else - branch_target += "2"; - } - - if ( ot2_const ) - op2 = "z.c"; - else - op2 = "frame[z.v" + to_string(op2_offset) + "]"; - - if ( zc == ZIC_FIELD ) - { - // Compute the slot holding the field offset. - - auto f = - // The first slots are taken up by the - // assignment slot and the operands ... - Arity() + 1 + - // ... and slots are numbered starting at 1. - +1; - - if ( ot1_const || ot2_const ) - // One of the operand slots won't be needed - // due to the presence of a constant. - // (It's never the case that both operands - // are constants - those instead get folded.) - --f; - - lhs += ".AsRecord()->RawField(z.v" + to_string(f) + ")"; - } - } - - vector eval_instances; - - for ( auto et : expr_types ) - { - auto is_def = eval_set.count(et) == 0; - string eval = is_def ? GetEval() : eval_set[et]; - auto lhs_et = IsConditionalOp() ? ZAM_EXPR_TYPE_INT : et; - eval_instances.emplace_back(lhs_et, et, et, eval, is_def); - } - - if ( zc != ZIC_VEC ) - for ( auto em1 : eval_mixed_set ) - { - auto et1 = em1.first; - for ( auto em2 : em1.second ) - { - auto et2 = em2.first; - - // For the LHS, either its expression type is - // ignored, or if it's a conditional, so just - // note it for the latter. - auto lhs_et = ZAM_EXPR_TYPE_INT; - eval_instances.emplace_back(lhs_et, et1, et2, em2.second, false); - } - } - - for ( auto& ei : eval_instances ) - { - auto lhs_accessor = ei.LHSAccessor(); - if ( HasExplicitResultType() ) - lhs_accessor = ""; - - string lhs_ei = lhs; - if ( zc != ZIC_VEC ) - lhs_ei += lhs_accessor; - - auto op1_ei = op1 + ei.Op1Accessor(zc == ZIC_VEC); - auto op2_ei = op2 + ei.Op2Accessor(zc == ZIC_VEC); - - auto eval = SkipWS(ei.Eval()); - - auto has_target = eval.find("$$") != string::npos; - - if ( zc == ZIC_VEC ) - { - const char* rhs; - if ( has_target ) - rhs = "\\$\\$ = ([^;\n]*)"; - else - rhs = "^[^;\n]*"; - - auto replacement = VecEvalRE(has_target); - - eval = regex_replace(eval, regex(rhs), replacement); - } - - auto is_none = ei.LHS_ET() == ZAM_EXPR_TYPE_NONE; - auto is_default = ei.LHS_ET() == ZAM_EXPR_TYPE_DEFAULT; - - if ( ! is_none && ! is_default && find_type_info(ei.LHS_ET()).is_managed && - ! HasExplicitResultType() ) - { - auto delim = zc == ZIC_VEC ? "->" : "."; - auto pre = "auto hold_lhs = " + lhs + delim + "ManagedVal();\n\t"; - auto post = "\tUnref(hold_lhs);"; - eval = pre + eval + post; - } - - eval = regex_replace(eval, regex("\\$1"), op1_ei); - eval = regex_replace(eval, regex("\\$2"), op2_ei); - - string pre = GetPreEval(); - pre = regex_replace(pre, regex("\\$1"), op1_ei); - pre = regex_replace(pre, regex("\\$2"), op2_ei); - - if ( has_target ) - eval = regex_replace(eval, regex("\\$\\$"), lhs_ei); - - else if ( zc == ZIC_COND ) - { // Aesthetics: get rid of trailing newlines. - eval = regex_replace(eval, regex("\n"), ""); - eval = "if ( ! (" + eval + ") ) " + "{ pc = " + branch_target + "; continue; }"; - } - - else if ( ! is_none && (ei.IsDefault() || IsConditionalOp()) ) - { - eval = lhs_ei + " = " + eval; - - // Ensure a single terminating semicolon. - eval = regex_replace(eval, regex(";*\n"), ";\n"); - } - - eval = pre + eval; - - auto full_suffix = ot_str + suffix + ei.OpMarker(); - - ZAM_OpTemplate::InstantiateEval(emit_target, full_suffix, eval, zc); - - if ( zc == ZIC_VEC ) - { - string dispatch_params = "frame[z.v1].AsVectorRef(), frame[z.v2].AsVector()"; - - if ( Arity() == 2 ) - dispatch_params += ", frame[z.v3].AsVector()"; - - auto op_code = g->GenOpCode(this, "_" + full_suffix); - auto dispatch = "vec_exec(" + op_code + ", z.t, " + dispatch_params + ", z);"; - - ZAM_OpTemplate::InstantiateEval(Eval, full_suffix, dispatch, zc); - } - } - } - -void ZAM_UnaryExprOpTemplate::Parse(const string& attr, const string& line, const Words& words) - { - if ( attr == "no-const" ) - { - if ( words.size() != 1 ) - g->Gripe("extraneous argument to no-const", line); - - SetNoConst(); - } - - else if ( attr == "explicit-result-type" ) - { - if ( words.size() != 1 ) - g->Gripe("extraneous argument to explicit-result-type", line); - SetHasExplicitResultType(); - } - - else - ZAM_ExprOpTemplate::Parse(attr, line, words); - } - -void ZAM_UnaryExprOpTemplate::Instantiate() - { - UnaryInstantiate(); - - vector ots = {ZAM_OT_VAR, ZAM_OT_CONSTANT}; - - if ( ! NoConst() ) - InstantiateC1(ots, 1, IncludesVectorOp()); - - ots[1] = ZAM_OT_VAR; - InstantiateV(ots); - } - -void ZAM_UnaryExprOpTemplate::BuildInstruction(const vector& ot, - const string& params, const string& suffix, - ZAM_InstClass zc) - { - const auto& ets = ExprTypes(); - - if ( ets.size() == 1 && ets.count(ZAM_EXPR_TYPE_NONE) == 1 ) - { - ZAM_ExprOpTemplate::BuildInstruction(ot, params, suffix, zc); - return; - } - - auto constant_op = ot[1] == ZAM_OT_CONSTANT; - string type_src = constant_op ? "c" : "n2"; - - if ( ot[0] == ZAM_OT_ASSIGN_FIELD ) - { - type_src = constant_op ? "n" : "n1"; - Emit("auto " + type_src + " = flhs->GetOp1()->AsNameExpr();"); - Emit("auto t = flhs->GetType();"); - Emit("int field = flhs->Field();"); - } - - else - { - if ( IsAssignOp() ) - type_src = constant_op ? "n" : "n1"; - - auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; - Emit("auto t = " + type_src + "->GetType()" + type_suffix); - } - - BuildInstructionCore(params, suffix, zc); - - if ( IsAssignOp() && IsFieldOp() ) - // These can't take the type from the LHS variable, since - // that's the enclosing record and not the field within it. - Emit("z.t = t;"); - - else if ( zc == ZIC_VEC ) - { - if ( constant_op ) - Emit("z.t = n->GetType();"); - else - Emit("z.t = n1->GetType();"); - } - } - -ZAM_AssignOpTemplate::ZAM_AssignOpTemplate(ZAMGen* _g, string _base_name) - : ZAM_UnaryExprOpTemplate(_g, _base_name) - { - // Assignments apply to every valid form of ExprType. - for ( auto& etn : expr_type_names ) - { - auto et = etn.second; - if ( et != ZAM_EXPR_TYPE_NONE && et != ZAM_EXPR_TYPE_DEFAULT ) - AddExprType(et); - } - } - -void ZAM_AssignOpTemplate::Parse(const string& attr, const string& line, const Words& words) - { - if ( attr == "field-op" ) - { - if ( words.size() != 1 ) - g->Gripe("field-op does not take any arguments", line); - - SetFieldOp(); - } - - else - ZAM_OpTemplate::Parse(attr, line, words); - } - -void ZAM_AssignOpTemplate::Instantiate() - { - if ( op_types.size() != 1 ) - g->Gripe("operation needs precisely one \"type\"", op_loc); - - vector ots; - ots.push_back(op_types[0]); - - // Build constant/variable versions ... - ots.push_back(ZAM_OT_CONSTANT); - - if ( ots[0] == ZAM_OT_RECORD_FIELD ) - ots.push_back(ZAM_OT_INT); - - InstantiateOp(ots, false); - if ( IsFieldOp() ) - InstantiateC1(ots, 1); - - ots[1] = ZAM_OT_VAR; - InstantiateOp(ots, false); - - // ... and for assignments to fields, additional field versions. - if ( ots[0] == ZAM_OT_ASSIGN_FIELD ) - { - ots.push_back(ZAM_OT_INT); - InstantiateOp(ots, false); - - ots[1] = ZAM_OT_CONSTANT; - InstantiateOp(ots, false); - } - - else if ( IsFieldOp() ) - InstantiateV(ots); - } - -void ZAM_BinaryExprOpTemplate::Instantiate() - { - // As usual, the first slot receives the operator's result. - vector ots = {ZAM_OT_VAR}; - ots.resize(3); - - // Build each combination for constant/variable operand, - // except skip constant/constant as that is always folded. - - // We only include vector operations when both operands - // are non-constants. - - ots[1] = ZAM_OT_CONSTANT; - ots[2] = ZAM_OT_VAR; - InstantiateOp(ots, false); - - if ( ! IsInternalOp() ) - InstantiateC1(ots, 2, false); - - ots[1] = ZAM_OT_VAR; - ots[2] = ZAM_OT_CONSTANT; - InstantiateOp(ots, false); - - if ( ! IsInternalOp() ) - InstantiateC2(ots, 2); - - ots[2] = ZAM_OT_VAR; - InstantiateOp(ots, IncludesVectorOp()); - - if ( ! IsInternalOp() ) - InstantiateV(ots); - } - -void ZAM_BinaryExprOpTemplate::BuildInstruction(const vector& ot, - const string& params, const string& suffix, - ZAM_InstClass zc) - { - auto constant_op = ot[1] == ZAM_OT_CONSTANT; - string type_src = constant_op ? "c" : "n2"; - auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; - Emit("auto t = " + type_src + "->GetType()" + type_suffix); - BuildInstructionCore(params, suffix, zc); - - if ( zc == ZIC_VEC ) - Emit("z.t = n1->GetType();"); - } - -void ZAM_RelationalExprOpTemplate::Instantiate() - { - ZAM_BinaryExprOpTemplate::Instantiate(); - - EmitTo(Cond); - - Emit("case EXPR_" + cname + ":"); - IndentUp(); - Emit("if ( n1 && n2 )"); - EmitUp("return " + cname + "VVV_cond(n1, n2);"); - Emit("else if ( n1 )"); - EmitUp("return " + cname + "VVC_cond(n1, c);"); - Emit("else"); - EmitUp("return " + cname + "VCV_cond(c, n2);"); - IndentDown(); - NL(); - } - -void ZAM_RelationalExprOpTemplate::BuildInstruction(const vector& ot, - const string& params, const string& suffix, - ZAM_InstClass zc) - { - string op1; - - if ( zc == ZIC_COND ) - { - if ( ot[1] == ZAM_OT_CONSTANT ) - op1 = "c"; - else if ( ot[2] == ZAM_OT_CONSTANT ) - op1 = "n"; - else - op1 = "n1"; - } - else - op1 = "n2"; - - auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; - Emit("auto t = " + op1 + "->GetType()" + type_suffix); - BuildInstructionCore(params, suffix, zc); - - if ( zc == ZIC_VEC ) - Emit("z.t = n1->GetType();"); - } - -void ZAM_InternalBinaryOpTemplate::Parse(const string& attr, const string& line, const Words& words) - { - if ( attr == "op-accessor" ) - { - if ( words.size() != 2 ) - g->Gripe("op-accessor takes one argument", line); - - SetOpAccessor(words[1]); - } - - else if ( attr == "op1-accessor" ) - { - if ( words.size() != 2 ) - g->Gripe("op-accessor1 takes one argument", line); - - SetOp1Accessor(words[1]); - } - - else if ( attr == "op2-accessor" ) - { - if ( words.size() != 2 ) - g->Gripe("op-accessor2 takes one argument", line); - - SetOp2Accessor(words[1]); - } - - else - ZAM_BinaryExprOpTemplate::Parse(attr, line, words); - } - -void ZAM_InternalBinaryOpTemplate::InstantiateEval(const vector& ot, - const string& suffix, ZAM_InstClass zc) - { - assert(ot.size() == 3); - - auto op1_const = ot[1] == ZAM_OT_CONSTANT; - auto op2_const = ot[2] == ZAM_OT_CONSTANT; - - string op1 = op1_const ? "z.c" : "frame[z.v2]"; - string op2 = op2_const ? "z.c" : (op1_const ? "frame[z.v2]" : "frame[z.v3]"); - - string prelude = "auto op1 = " + op1 + "." + op1_accessor + ";\n"; - prelude += "auto op2 = " + op2 + "." + op2_accessor + ";\n"; - - auto eval = prelude + GetEval(); - - auto& ets = ExprTypes(); - if ( ! ets.empty() ) - { - if ( ets.size() != 1 ) - g->Gripe("internal-binary-op's can have at most one op-type", op_loc); - - for ( auto& et : ets ) - { - auto acc = find_type_accessor(et); - auto lhs = "frame[z.v1]." + acc; - eval = regex_replace(eval, regex("\\$\\$"), lhs); - } - } - - ZAM_OpTemplate::InstantiateEval(Eval, OpSuffix(ot) + suffix, eval, zc); - } - -void ZAM_InternalOpTemplate::Parse(const string& attr, const string& line, const Words& words) - { - if ( attr != "num-call-args" ) - { - if ( attr == "indirect-call" ) - { - if ( words.size() != 1 ) - g->Gripe("indirect-call takes one argument", line); - // Note, currently only works with a *subsequent* - // num-call-args, whose setting needs to be 'n'. - is_indirect_call = true; - } - else - ZAM_OpTemplate::Parse(attr, line, words); - - return; - } - - if ( words.size() != 2 ) - g->Gripe("num-call-args takes one argument", line); - - eval = "std::vector args;\n"; - - auto& arg = words[1]; - int n = arg == "n" ? -1 : stoi(arg); - - auto arg_offset = HasAssignVal() ? 1 : 0; - auto arg_slot = arg_offset + 1; - - string func = "z.func"; - - if ( n == 1 ) - { - eval += "args.push_back("; - if ( op_types[arg_offset] == ZAM_OT_CONSTANT ) - eval += "z.c"; - else - eval += "frame[z.v" + to_string(arg_slot) + "]"; - - eval += ".ToVal(z.t));\n"; - } - - else if ( n != 0 ) - { - eval += "auto aux = z.aux;\n"; - - if ( n < 0 ) - { - if ( is_indirect_call ) - { - func = "func"; - - eval += "auto sel = z.v" + to_string(arg_slot) + ";\n"; - eval += "auto func = (sel < 0) ? "; - eval += "aux->id_val->GetVal()->AsFunc() : "; - eval += "frame[sel].AsFunc();\n"; - } - - eval += "auto n = aux->n;\n"; - eval += "for ( auto i = 0; i < n; ++i )\n"; - eval += "\targs.push_back(aux->ToVal(frame, i));\n"; - } - - else - for ( auto i = 0; i < n; ++i ) - { - eval += "args.push_back(aux->ToVal(frame, "; - eval += to_string(i); - eval += "));\n"; - } - } - - eval += "f->SetCallLoc(z.loc);\n"; - - if ( HasAssignVal() ) - { - auto av = GetAssignVal(); - eval += "auto " + av + " = " + func + "->Invoke(&args, f);\n"; - eval += "if ( ! " + av + " ) { ZAM_error = true; break; }\n"; - } - else - eval += "(void) " + func + "->Invoke(&args, f);\n"; - } - -bool TemplateInput::ScanLine(string& line) - { - if ( ! put_back.empty() ) - { - line = put_back; - put_back.clear(); - return true; - } - - char buf[8192]; - - // Read lines, discarding comments, which have to start at the - // beginning of a line. - do - { - if ( ! fgets(buf, sizeof buf, f) ) - return false; - ++loc.line_num; - } while ( buf[0] == '#' ); - - line = buf; - return true; - } - -vector TemplateInput::SplitIntoWords(const string& line) const - { - vector words; - - for ( auto start = line.c_str(); *start && *start != '\n'; ) - { - auto end = start + 1; - while ( *end && ! isspace(*end) ) - ++end; - - words.emplace_back(string(start, end - start)); - - start = end; - while ( *start && isspace(*start) ) - ++start; - } - - return words; - } - -string TemplateInput::SkipWords(const string& line, int n) const - { - auto s = line.c_str(); - - for ( int i = 0; i < n; ++i ) - { - // Find end of current word. - while ( *s && *s != '\n' ) - { - if ( isspace(*s) ) - break; - ++s; - } - - if ( *s == '\n' ) - break; - - // Find start of next word. - while ( *s && isspace(*s) ) - ++s; - } - - return string(s); - } - -void TemplateInput::Gripe(const char* msg, const string& input) const - { - auto input_s = input.c_str(); - int n = strlen(input_s); - - fprintf(stderr, "%s, line %d: %s - %s", loc.file_name, loc.line_num, msg, input_s); - if ( n == 0 || input_s[n - 1] != '\n' ) - fprintf(stderr, "\n"); - - exit(1); - } - -void TemplateInput::Gripe(const char* msg, const InputLoc& l) const - { - fprintf(stderr, "%s, line %d: %s\n", l.file_name, l.line_num, msg); - exit(1); - } - -ZAMGen::ZAMGen(int argc, char** argv) - { - auto prog_name = argv[0]; - - if ( argc != 2 ) - { - fprintf(stderr, "usage: %s \n", prog_name); - exit(1); - } - - auto file_name = argv[1]; - auto f = strcmp(file_name, "-") ? fopen(file_name, "r") : stdin; - - if ( ! f ) - { - fprintf(stderr, "%s: cannot open \"%s\"\n", prog_name, file_name); - exit(1); - } - - ti = make_unique(f, prog_name, file_name); - - InitEmitTargets(); - - while ( ParseTemplate() ) - ; - - for ( auto& t : templates ) - t->Instantiate(); - - GenMacros(); - - CloseEmitTargets(); - } - -void ZAMGen::ReadMacro(const string& line) - { - vector mac; - mac.emplace_back(SkipWords(line, 1)); - - string s; - while ( ScanLine(s) ) - { - if ( s.size() <= 1 || ! isspace(s.c_str()[0]) ) - { - PutBack(s); - break; - } - - mac.push_back(s); - } - - macros.emplace_back(move(mac)); - } - -void ZAMGen::GenMacros() - { - for ( auto& m : macros ) - { - for ( auto i = 0U; i < m.size(); ++i ) - { - auto ms = m[i]; - if ( i == 0 ) - ms = "#define " + ms; - - if ( i < m.size() - 1 ) - ms = regex_replace(ms, regex("\n"), " \\\n"); - - Emit(EvalMacros, ms); - } - - Emit(EvalMacros, "\n"); - } - } - -string ZAMGen::GenOpCode(const ZAM_OpTemplate* ot, const string& suffix, ZAM_InstClass zc) - { - auto op = "OP_" + ot->CanonicalName() + suffix; - - static unordered_set known_opcodes; - - if ( known_opcodes.count(op) > 0 ) - // We've already done this one, don't re-define its auxiliary - // information. - return op; - - known_opcodes.insert(op); - - IndentUp(); - - // Generate the enum defining the opcode ... - Emit(OpDef, op + ","); - - // ... the "flavor" of how it treats its first operand ... - auto op_comment = ",\t// " + op; - auto op1_always_read = (zc == ZIC_FIELD || zc == ZIC_COND); - auto flavor = op1_always_read ? "OP1_READ" : ot->GetOp1Flavor(); - Emit(Op1Flavor, flavor + op_comment); - - // ... whether it has side effects ... - auto se = ot->HasSideEffects() ? "true" : "false"; - Emit(OpSideEffects, se + op_comment); - - // ... and the switch case that maps the enum to a string - // representation. - auto name = ot->BaseName(); - transform(name.begin(), name.end(), name.begin(), ::tolower); - name += suffix; - transform(name.begin(), name.end(), name.begin(), under_to_dash); - Emit(OpName, "case " + op + ":\treturn \"" + name + "\";"); - - IndentDown(); - - return op; - } - -void ZAMGen::Emit(EmitTarget et, const string& s) - { - assert(et != None); - - if ( gen_files.count(et) == 0 ) - { - fprintf(stderr, "bad generation file type\n"); - exit(1); - } - - FILE* f = gen_files[et]; - - for ( auto i = indent_level; i > 0; --i ) - fputs("\t", f); - - fputs(s.c_str(), f); - - if ( ! no_NL && (s.empty() || s.back() != '\n') ) - fputs("\n", f); - } - -void ZAMGen::InitEmitTargets() - { - // Maps an EmitTarget enum to its corresponding filename. - static const unordered_map gen_file_names = { - {None, nullptr}, - {AssignFlavor, "ZAM-AssignFlavorsDefs.h"}, - {C1Def, "ZAM-GenExprsDefsC1.h"}, - {C1FieldDef, "ZAM-GenFieldsDefsC1.h"}, - {C2Def, "ZAM-GenExprsDefsC2.h"}, - {C2FieldDef, "ZAM-GenFieldsDefsC2.h"}, - {C3Def, "ZAM-GenExprsDefsC3.h"}, - {Cond, "ZAM-Conds.h"}, - {DirectDef, "ZAM-DirectDefs.h"}, - {Eval, "ZAM-EvalDefs.h"}, - {EvalMacros, "ZAM-EvalMacros.h"}, - {MethodDecl, "ZAM-MethodDecls.h"}, - {MethodDef, "ZAM-MethodDefs.h"}, - {Op1Flavor, "ZAM-Op1FlavorsDefs.h"}, - {OpDef, "ZAM-OpsDefs.h"}, - {OpName, "ZAM-OpsNamesDefs.h"}, - {OpSideEffects, "ZAM-OpSideEffects.h"}, - {VDef, "ZAM-GenExprsDefsV.h"}, - {VFieldDef, "ZAM-GenFieldsDefsV.h"}, - {Vec1Eval, "ZAM-Vec1EvalDefs.h"}, - {Vec2Eval, "ZAM-Vec2EvalDefs.h"}, - }; - - for ( auto& gfn : gen_file_names ) - { - auto fn = gfn.second; - if ( ! fn ) - continue; - - auto f = fopen(fn, "w"); - if ( ! f ) - { - fprintf(stderr, "can't open generation file %s\n", fn); - exit(1); - } - - gen_files[gfn.first] = f; - } - - InitSwitch(C1Def, "C1 assignment"); - InitSwitch(C2Def, "C2 assignment"); - InitSwitch(C3Def, "C3 assignment"); - InitSwitch(VDef, "V assignment"); - - InitSwitch(C1FieldDef, "C1 field assignment"); - InitSwitch(C2FieldDef, "C2 field assignment"); - InitSwitch(VFieldDef, "V field assignment"); - } - -void ZAMGen::InitSwitch(EmitTarget et, string desc) - { - Emit(et, "{"); - Emit(et, "switch ( rhs->Tag() ) {"); - - switch_targets[et] = desc; - } - -void ZAMGen::CloseEmitTargets() - { - FinishSwitches(); - - for ( auto& gf : gen_files ) - fclose(gf.second); - } - -void ZAMGen::FinishSwitches() - { - for ( auto& st : switch_targets ) - { - auto et = st.first; - auto& desc = st.second; - - Emit(et, "default:"); - IndentUp(); - Emit(et, "reporter->InternalError(\"inconsistency in " + desc + - ": %s\", obj_desc(rhs).c_str());"); - IndentDown(); - Emit(et, "}"); - Emit(et, "}"); - } - } - -bool ZAMGen::ParseTemplate() - { - string line; - - if ( ! ScanLine(line) ) - return false; - - if ( line.size() <= 1 ) - // A blank line - no template to parse. - return true; - - auto words = SplitIntoWords(line); - - if ( words.size() < 2 ) - Gripe("too few words at start of template", line); - - auto op = words[0]; - - if ( op == "macro" ) - { - ReadMacro(line); - return true; - } - - auto op_name = words[1]; - - // We track issues with the wrong number of template arguments - // up front, to avoid mis-invoking constructors, but we don't - // report these until later because if the template names a - // bad operation, it's better to report that as the core problem. - const char* args_mismatch = nullptr; - - if ( op == "direct-unary-op" ) - { - if ( words.size() != 3 ) - args_mismatch = "direct-unary-op takes 2 arguments"; - } - - else if ( words.size() != 2 ) - args_mismatch = "templates take 1 argument"; - - unique_ptr t; - - if ( op == "op" ) - t = make_unique(this, op_name); - else if ( op == "unary-op" ) - t = make_unique(this, op_name); - else if ( op == "direct-unary-op" && ! args_mismatch ) - t = make_unique(this, op_name, words[2]); - else if ( op == "assign-op" ) - t = make_unique(this, op_name); - else if ( op == "expr-op" ) - t = make_unique(this, op_name); - else if ( op == "unary-expr-op" ) - t = make_unique(this, op_name); - else if ( op == "binary-expr-op" ) - t = make_unique(this, op_name); - else if ( op == "rel-expr-op" ) - t = make_unique(this, op_name); - else if ( op == "internal-binary-op" ) - t = make_unique(this, op_name); - else if ( op == "internal-op" ) - t = make_unique(this, op_name); - else if ( op == "internal-assignment-op" ) - t = make_unique(this, op_name); - - else - Gripe("bad template name", op); - - if ( args_mismatch ) - Gripe(args_mismatch, line); - - t->Build(); - templates.emplace_back(move(t)); - - return true; - } - -int main(int argc, char** argv) - { - try - { - ZAMGen zg(argc, argv); - exit(0); - } - catch ( const std::regex_error& e ) - { - fprintf(stderr, "%s: regular expression error - %s\n", argv[0], e.what()); - exit(1); - } - } diff --git a/src/script_opt/ZAM/Gen-ZAM.h b/src/script_opt/ZAM/Gen-ZAM.h deleted file mode 100644 index e5f2eaec4d..0000000000 --- a/src/script_opt/ZAM/Gen-ZAM.h +++ /dev/null @@ -1,986 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -// Gen-ZAM is a standalone program that takes as input a file specifying -// ZAM operations and from them generates a (large) set of C++ include -// files used to instantiate those operations as low-level ZAM instructions. -// (Those files are described in the EmitTarget enumeration below.) -// -// See Ops.in for documentation regarding the format of the ZAM templates. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -using std::string; -using std::vector; - -// An instruction can have one of four basic classes. -enum ZAM_InstClass - { - ZIC_REGULAR, // a non-complicated instruction - ZIC_COND, // a conditional branch - ZIC_VEC, // a vector operation - ZIC_FIELD, // a record field assignment - }; - -// For a given instruction operand, its general type. -enum ZAM_OperandType - { - ZAM_OT_CONSTANT, // uses the instruction's associated constant - ZAM_OT_EVENT_HANDLER, // uses the associated event handler - ZAM_OT_INT, // directly specified integer - ZAM_OT_VAR, // frame slot associated with a variable - - ZAM_OT_ASSIGN_FIELD, // record field offset to assign to - ZAM_OT_RECORD_FIELD, // record field offset to access - - // The following wind up the same in the ultimate instruction, - // but they differ in the calling sequences used to generate - // the instruction. - ZAM_OT_AUX, // uses the instruction's "aux" field - ZAM_OT_LIST, // a list, managed via the "aux" field - - ZAM_OT_NONE, // instruction has no direct operands - }; - -// For instructions corresponding to evaluating expressions, the type -// of a given operand. The generator uses these to transform the operand's -// low-level ZVal into a higher-level type expected by the associated -// evaluation code. -enum ZAM_ExprType - { - ZAM_EXPR_TYPE_ADDR, - ZAM_EXPR_TYPE_ANY, - ZAM_EXPR_TYPE_DOUBLE, - ZAM_EXPR_TYPE_FUNC, - ZAM_EXPR_TYPE_INT, - ZAM_EXPR_TYPE_PATTERN, - ZAM_EXPR_TYPE_RECORD, - ZAM_EXPR_TYPE_STRING, - ZAM_EXPR_TYPE_SUBNET, - ZAM_EXPR_TYPE_TABLE, - ZAM_EXPR_TYPE_UINT, - ZAM_EXPR_TYPE_VECTOR, - ZAM_EXPR_TYPE_FILE, - ZAM_EXPR_TYPE_OPAQUE, - ZAM_EXPR_TYPE_LIST, - ZAM_EXPR_TYPE_TYPE, - - // Used to specify "apart from the explicitly specified operand - // types, do this action for any other types". - ZAM_EXPR_TYPE_DEFAULT, - - // Used for expressions where the evaluation code for the - // expression deals directly with the operand's ZVal, rather - // than the generator providing a higher-level version. - ZAM_EXPR_TYPE_NONE, - }; - -// We only use the following in the context where the vector's elements -// are individual words from the same line. We don't use it in other -// contexts where we're tracking a bunch of strings. -using Words = vector; - -// Used for error-reporting. -struct InputLoc - { - const char* file_name; - int line_num = 0; - }; - -// An EmitTarget is a generated file to which code will be emitted. -// The different values are used to instruct the generator which target -// is currently of interest. -enum EmitTarget - { - // Indicates that no generated file has yet been specified. - None, - - // Declares/defines methods that take AST nodes and generate - // corresponding ZAM instructions. - MethodDecl, - MethodDef, - - // Switch cases for expressions that are compiled directly, using - // custom methods rather than methods produced by the generator. - DirectDef, - - // Switch cases for invoking various flavors of methods produced - // by the generator for generating ZAM instructions for AST - // expressions. C1/C2/C3 refer to the first/second/third operand - // being a constant. V refers to none of the operands being - // a constant. - C1Def, - C2Def, - C3Def, - VDef, - - // The same, but for when the expression is being assigned to - // a record field rather than a variable. There's no "C3" option - // because of how we reduce AST ternary operations. - C1FieldDef, - C2FieldDef, - VFieldDef, - - // Switch cases for compiling relational operations used in - // conditionals. - Cond, - - // Switch cases that provide the C++ code for executing specific - // individual ZAM instructions. - Eval, - - // #define's used to provide the templator's macro functionality. - EvalMacros, - - // Switch cases the provide the C++ code for executing unary - // and binary vector operations. - Vec1Eval, - Vec2Eval, - - // A set of instructions to dynamically generate maps that - // translate a generic ZAM operation (e.g., OP_LOAD_GLOBAL_VV) - // to a specific ZAM instruction, given a specific type - // (e.g., for OP_LOAD_GLOBAL_VV plus TYPE_ADDR, the map yields - // OP_LOAD_GLOBAL_VV_A). - AssignFlavor, - - // A list of values, one per ZAM instruction, that indicate whether - // that instruction writes to its first operand (the most common - // case), reads the operand but doesn't write to it, both reads it - // and writes to it, or none of these apply because the first - // operand isn't a frame variable. See the ZAMOp1Flavor enum - // defined in ZOp.h. - Op1Flavor, - - // A list of boolean values, one per ZAM instruction, that indicate - // whether the instruction has side effects, and thus should not - // be deleted even if its associated assignment is to a dead value - // (one not subsequently used). - OpSideEffects, - - // A list of names enumerating each ZAM instruction. These - // are ZAM opcodes. - OpDef, - - // A list of cases, indexed by ZAM opcode, that return a - // human-readable string of naming the opcode, for use in debugging - // output. For example, for OP_NEGATE_VV_I the corresponding - // string is "negate-VV-I". - OpName, - }; - -// A helper class for managing the (ordered) collection of ZAM_OperandType's -// associated with an instruction in order to generate C++ calling sequences -// (both parameters for declarations, and arguments for invocations). -class ArgsManager - { -public: - // Constructed by providing the various ZAM_OperandType's along - // with the instruction's class. - ArgsManager(const vector& ot, ZAM_InstClass ic); - - // Returns a string defining the parameters for a declaration; - // these have full C++ type information along with the parameter - // name. - string Decls() const { return full_decl; } - - // Returns a string for passing the parameters in a function - // call. This is a comma-separated list of the parameter names, - // with no associated C++ types. - string Params() const { return full_params; } - - // Returns the name of the given parameter, indexed starting with 0. - const string& NthParam(int n) const { return params[n]; } - -private: - // Makes sure that each parameter has a unique name. For any - // parameter 'x' that occurs more than once, renames the instances - // "x1", "x2", etc. - void Differentiate(); - - // Maps ZAM_OperandType's to their associated C++ type and - // canonical parameter name. - static std::unordered_map> ot_to_args; - - // For a single argument/parameter, tracks its declaration name, - // C++ type, and the name to use when providing it as a parameter. - // These last two names are potentially distinct when we're - // assigning to record field (which is tracked by the is_field - // member variable), hence the need to track both. - struct Arg - { - string decl_name; - string decl_type; - string param_name; - bool is_field; - }; - - // All of the argument/parameters associated with the collection - // of ZAM_OperandType's. - vector args; - - // Each of the individual parameters. - vector params; - - // See Decls() and Params() above. - string full_decl; - string full_params; - }; - -// There are two mutually interacting classes: ZAMGen is the overall -// driver for the ZAM generator, while ZAM_OpTemplate represents a -// single operation template, with subclasses for specific types of -// operations. -class ZAMGen; - -class ZAM_OpTemplate - { -public: - // Instantiated by passing in the ZAMGen driver and the generic - // name for the operation. - ZAM_OpTemplate(ZAMGen* _g, string _base_name); - virtual ~ZAM_OpTemplate() { } - - // Constructs the template's data structures by parsing its - // description (beyond the initial description of the type of - // operation). - void Build(); - - // Tells the object to generate the code/files necessary for - // each of its underlying instructions. - virtual void Instantiate(); - - // Returns the generic name for the operation. - const string& BaseName() const { return base_name; } - - // Returns the canonical name for the operation. This is a - // version of the name that, for expression-based operations, - // can be concatenated with "EXPR_" to get the name of the - // corresponding AST node. - const string& CanonicalName() const { return cname; } - - // Returns a string version of the ZAMOp1Flavor associated - // with this operation. - const string& GetOp1Flavor() const { return op1_flavor; } - - // True if this is an operation with side effects (see OpSideEffects - // above). - bool HasSideEffects() const { return has_side_effects; } - -protected: - // Append to the list of operand types associated with this operation. - void AddOpType(ZAM_OperandType ot) { op_types.push_back(ot); } - // Retrieve the list of operand types associated with this operation. - const vector& OperandTypes() const { return op_types; } - - // Specify the ZAMOp1Flavor associated with this operation. See - // GetOp1Flavor() above for the corresponding accessor. - void SetOp1Flavor(string fl) { op1_flavor = fl; } - - // Specify/fetch the parameter (operand) from which to take the - // primary type of this operation. - void SetTypeParam(int param) { type_param = param; } - int GetTypeParam() const { return type_param; } - - // Specify/fetch the parameter (operand) from which to take the - // secondary type of this operation. - void SetType2Param(int param) { type2_param = param; } - int GetType2Param() const { return type2_param; } - - // Tracking of assignment values (C++ variables that hold the - // value that should be assigned to usual frame slot). - void SetAssignVal(string _av) { av = _av; } - bool HasAssignVal() const { return ! av.empty(); } - const string& GetAssignVal() const { return av; } - - // Management of C++ evaluation blocks. These are built up - // line-by-line. - void AddEval(string line) { eval += line; } - bool HasEval() const { return ! eval.empty(); } - const string& GetEval() const { return eval; } - - // Management of custom methods to be used rather than generating - // a method. - void SetCustomMethod(string cm) { custom_method = SkipWS(cm); } - bool HasCustomMethod() const { return ! custom_method.empty(); } - const string& GetCustomMethod() const { return custom_method; } - - // Management of code to execute at the end of a generated method. - void SetPostMethod(string cm) { post_method = SkipWS(cm); } - bool HasPostMethod() const { return ! post_method.empty(); } - const string& GetPostMethod() const { return post_method; } - - // Predicates indicating whether a subclass supports a given - // property. These are whether the operation: (1) should include - // a version that assigns to a record field as well as the normal - // assigning to a frame slot, (2) is a conditional branch, (3) does - // not have a corresponding AST node, (4) is a direct assignment - // (not an assignment to an expression), (5) is a direct assignment - // to a record field. - virtual bool IncludesFieldOp() const { return false; } - virtual bool IsConditionalOp() const { return false; } - virtual bool IsInternalOp() const { return false; } - virtual bool IsAssignOp() const { return false; } - virtual bool IsFieldOp() const { return false; } - - // Whether this operation does not have any C++ evaluation associated - // with it. Used for custom methods that compile into internal - // ZAM operations. - bool NoEval() const { return no_eval; } - void SetNoEval() { no_eval = true; } - - // Whether this operation does not have a version where one of - // its operands is a constant. - bool NoConst() const { return no_const; } - void SetNoConst() { no_const = true; } - - // Whether this operation also has a vectorized form. - bool IncludesVectorOp() const { return includes_vector_op; } - void SetIncludesVectorOp() { includes_vector_op = true; } - - // Whether this operation has side effects, and thus should - // not be elided even if its result is used in a dead assignment. - void SetHasSideEffects() { has_side_effects = true; } - - // An "assignment-less" operation is one that, if its result - // is used in a dead assignment, should be converted to a different - // operation that explictly omits any assignment. - bool HasAssignmentLess() const { return ! assignment_less_op.empty(); } - void SetAssignmentLess(string op, string op_type) - { - assignment_less_op = op; - assignment_less_op_type = op_type; - } - const string& AssignmentLessOp() const { return assignment_less_op; } - const string& AssignmentLessOpType() const { return assignment_less_op_type; } - - // Builds the instructions associated with this operation, assuming - // a single operand. - void UnaryInstantiate(); - - // Parses the next line in an operation template. "attr" is - // the first word on the line, which often specifies the - // attribute specified by the line. "line" is the entire line, - // for parsing when that's necessary, and for error reporting. - // "words" is "line" split into a vector of whitespace-delimited - // words. - virtual void Parse(const string& attr, const string& line, const Words& words); - - // Scans in a C++ evaluation block, which continues until encountering - // a line that does not start with whitespace, or that's empty. - string GatherEval(); - - // Parses a $-specifier of which operand to use to associate - // a Zeek scripting type with ZAM instructions. - int ExtractTypeParam(const string& arg); - - // Generates instructions for each of the different flavors of the - // given operation. "ot" specifies the types of operands for the - // instruction, and "do_vec" whether to generate a vector version. - void InstantiateOp(const vector& ot, bool do_vec); - - // Generates one specific flavor ("zc") of the given operation, - // using a method named 'm', the given operand types, and the class. - void InstantiateOp(const string& m, const vector& ot, ZAM_InstClass zc); - - // Generates the "assignment-less" version of the given op-code. - void GenAssignmentlessVersion(string op); - - // Generates the method 'm' for an operation, where "suffix" is - // a (potentially empty) string differentiating the method from - // others for that operation, and "ot" and "zc" are the same - // as above. - void InstantiateMethod(const string& m, const string& suffix, const vector& ot, - ZAM_InstClass zc); - - // Generates the main logic of an operation's method, given the - // specific operand types, an associated suffix for differentiating - // ZAM instructions, and the instruction class. - void InstantiateMethodCore(const vector& ot, string suffix, ZAM_InstClass zc); - - // Generates the specific code to create a ZInst for the given - // operation, operands, parameters to "GenInst", and suffix and - // class per the above. - virtual void BuildInstruction(const vector& ot, const string& params, - const string& suffix, ZAM_InstClass zc); - - // Top-level driver for generating the C++ evaluation code for - // a given flavor of operation. - virtual void InstantiateEval(const vector& ot, const string& suffix, - ZAM_InstClass zc); - - // Generates the C++ case statement for evaluating the given flavor - // of operation. - void InstantiateEval(EmitTarget et, const string& op_suffix, const string& eval, - ZAM_InstClass zc); - - // Generates a set of assignment C++ evaluations, one per each - // possible Zeek scripting type of operand. - void InstantiateAssignOp(const vector& ot, const string& suffix); - - // Generates a C++ evaluation for an assignment of the type - // corresponding to "accessor". If "is_managed" is true then - // generates the associated memory management, too. - void GenAssignOpCore(const vector& ot, const string& eval, - const string& accessor, bool is_managed); - - // The same, but for when there's an explicit assignment value. - void GenAssignOpValCore(const string& eval, const string& accessor, bool is_managed); - - // Returns the name of the method associated with the particular - // list of operand types. - string MethodName(const vector& ot) const; - - // Returns the parameter declarations to use in declaring a method. - string MethodDeclare(const vector& ot, ZAM_InstClass zc); - - // Returns a suffix that differentiates an operation name for - // a specific list of operand types. - string OpSuffix(const vector& ot) const; - - // Returns a copy of the given string with leading whitespace - // removed. - string SkipWS(const string& s) const; - - // Set the target to use for subsequent code emission. - void EmitTo(EmitTarget et) { curr_et = et; } - - // Emit the given string to the currently selected EmitTarget. - void Emit(const string& s); - - // Same, but temporarily indented up. - void EmitUp(const string& s) - { - IndentUp(); - Emit(s); - IndentDown(); - } - - // Same, but reframe from inserting a newline. - void EmitNoNL(const string& s); - - // Emit a newline. Implementation doesn't actually include a - // newline since that's implicit in a call to Emit(). - void NL() { Emit(""); } - - // Increase/decrease the indentation level, with the last two - // being used for brace-delimited code blocks. - void IndentUp(); - void IndentDown(); - void BeginBlock() - { - IndentUp(); - Emit("{"); - } - void EndBlock() - { - Emit("}"); - IndentDown(); - } - - // Maps an operand type to a character mnemonic used to distinguish - // it from others. - static std::unordered_map ot_to_char; - - // The associated driver object. - ZAMGen* g; - - // See BaseName() and CanonicalName() above. - string base_name; - string cname; - - // Tracks the beginning of this operation template's definition, - // for error reporting. - InputLoc op_loc; - - // The current emission target. - EmitTarget curr_et = None; - - // The operand types for operations that have a single fixed list. - // Some operations (like those evaluating expressions) instead have - // dynamically generated range of possible operand types. - vector op_types; - - // See the description of Op1Flavor above. - string op1_flavor = "OP1_WRITE"; - - // Tracks the result of ExtractTypeParam() used for "type" and - // "type2" attributes. - int type_param = 0; // 0 = not set - int type2_param = 0; - - // If non-empty, the value to assign to the target in an assignment - // operation. - string av; - - // The C++ evaluation; may span multiple lines. - string eval; - - // Any associated custom method. - string custom_method; - - // Any associated additional code to add at the end of a - // generated method. - string post_method; - - // If true, then this operation does not have C++ evaluation - // associated with it. - bool no_eval = false; - - // If true, then this operation should not include a version - // supporting operands of constant type. - bool no_const = false; - - // If true, then this operation includes a vectorized version. - bool includes_vector_op = false; - - // If true, then this operation has side effects. - bool has_side_effects = false; - - // If non-empty, then specifies the associated operation that - // is a version of this operation but without assigning the result; - // and the operand type (like "OP_V") of that associated operation. - string assignment_less_op; - string assignment_less_op_type; - }; - -// A subclass used for "unary-op" templates. -class ZAM_UnaryOpTemplate : public ZAM_OpTemplate - { -public: - ZAM_UnaryOpTemplate(ZAMGen* _g, string _base_name) : ZAM_OpTemplate(_g, _base_name) { } - -protected: - void Instantiate() override; - }; - -// A subclass for unary operations that are directly instantiated using -// custom methods. -class ZAM_DirectUnaryOpTemplate : public ZAM_OpTemplate - { -public: - ZAM_DirectUnaryOpTemplate(ZAMGen* _g, string _base_name, string _direct) - : ZAM_OpTemplate(_g, _base_name), direct(_direct) - { - } - -protected: - void Instantiate() override; - -private: - // The ZAMCompiler method to call to compile the operation. - string direct; - }; - -// A helper class for the ZAM_ExprOpTemplate class (which follows). -// This class tracks a single instance of creating an evaluation for -// an AST expression. -class EvalInstance - { -public: - // Initialized using the types of the LHS (result) and the - // first and second operand. Often all three types are the - // same, but they can differ for some particular expressions, - // and for relationals. "eval" provides the C++ evaluation code. - // "is_def" is true if this instance is for the default catch-all - // where the operand types don't match any of the explicitly - // specified evaluations; - EvalInstance(ZAM_ExprType lhs_et, ZAM_ExprType op1_et, ZAM_ExprType op2_et, string eval, - bool is_def); - - // Returns the accessor to use for assigning to the LHS. "is_ptr" - // indicates whether the value to which we're applying the - // accessor is a pointer, rather than a ZVal. - string LHSAccessor(bool is_ptr = false) const; - - // Same but for access to the first or second operand. - string Op1Accessor(bool is_ptr = false) const { return Accessor(op1_et, is_ptr); } - string Op2Accessor(bool is_ptr = false) const { return Accessor(op2_et, is_ptr); } - - // Provides an accessor for an operand of the given type. - string Accessor(ZAM_ExprType et, bool is_ptr = false) const; - - // Returns the "marker" use to make unique the opcode for this - // flavor of expression-evaluation instruction. - string OpMarker() const; - - const string& Eval() const { return eval; } - ZAM_ExprType LHS_ET() const { return lhs_et; } - bool IsDefault() const { return is_def; } - -private: - ZAM_ExprType lhs_et; - ZAM_ExprType op1_et; - ZAM_ExprType op2_et; - string eval; - bool is_def; - }; - -// A subclass for AST "Expr" nodes in reduced form. -class ZAM_ExprOpTemplate : public ZAM_OpTemplate - { -public: - ZAM_ExprOpTemplate(ZAMGen* _g, string _base_name); - - // The number of operands the operation takes (not including its - // assignment target). A value of 0 is used for expressions that - // require special handling. - virtual int Arity() const { return 0; } - - int HasExplicitResultType() const { return explicit_res_type; } - void SetHasExplicitResultType() { explicit_res_type = true; } - - void AddExprType(ZAM_ExprType et) { expr_types.insert(et); } - const std::unordered_set& ExprTypes() const { return expr_types; } - - void AddEvalSet(ZAM_ExprType et, string ev) { eval_set[et] += ev; } - void AddEvalSet(ZAM_ExprType et1, ZAM_ExprType et2, string ev) - { - eval_mixed_set[et1][et2] += ev; - } - - bool IncludesFieldOp() const override { return includes_field_op; } - void SetIncludesFieldOp() { includes_field_op = true; } - - bool HasPreEval() const { return ! pre_eval.empty(); } - void SetPreEval(string pe) { pre_eval = SkipWS(pe); } - const string& GetPreEval() const { return pre_eval; } - -protected: - // Returns a regular expression used to access the value of the - // expression suitable for assignment in a loop across the elements - // of a Zeek "vector" type. "have_target" is true if the template - // has an explicit "$$" assignment target. - virtual const char* VecEvalRE(bool have_target) const - { - return have_target ? "$$$$ = ZVal($1)" : "ZVal($&)"; - } - - void Parse(const string& attr, const string& line, const Words& words) override; - void Instantiate() override; - - // Instantiates versions of the operation that have a constant - // as the first, second, or third operand ... - void InstantiateC1(const vector& ots, int arity, bool do_vec = false); - void InstantiateC2(const vector& ots, int arity); - void InstantiateC3(const vector& ots); - - // ... or if all of the operands are non-constant. - void InstantiateV(const vector& ots); - - // Generates code that instantiates either the vectorized version - // of an operation, or the non-vector one, depending on whether - // the RHS of the reduced expression/assignment is a vector. - void DoVectorCase(const string& m, const string& args); - - // Iterates over the different Zeek types specified for an expression's - // operands and generates instructions for each. - void BuildInstructionCore(const string& params, const string& suffix, ZAM_InstClass zc); - - // Generates an if-else cascade element that matches one of the - // specific Zeek types associated with the instruction. - void GenMethodTest(ZAM_ExprType et1, ZAM_ExprType et2, const string& params, - const string& suffix, bool do_else, ZAM_InstClass zc); - - void InstantiateEval(const vector& ot, const string& suffix, - ZAM_InstClass zc) override; - -private: - // The Zeek types that can appear as operands for the expression. - std::unordered_set expr_types; - - // The C++ evaluation template for a given operand type. - std::unordered_map eval_set; - - // Some expressions take two operands of different types. This - // holds their C++ evaluation template. - std::unordered_map> eval_mixed_set; - - // Whether this expression's operand is a field access (and thus - // needs both the record as an operand and an additional constant - // offset into the record to get to the field). - bool includes_field_op = false; - - // If non-zero, code to generate prior to evaluating the expression. - string pre_eval; - - // If true, then the evaluations will take care of ensuring - // proper result types when assigning to $$. - bool explicit_res_type = false; - }; - -// A version of ZAM_ExprOpTemplate for unary expressions. -class ZAM_UnaryExprOpTemplate : public ZAM_ExprOpTemplate - { -public: - ZAM_UnaryExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_ExprOpTemplate(_g, _base_name) { } - - bool IncludesFieldOp() const override { return ExprTypes().count(ZAM_EXPR_TYPE_NONE) == 0; } - - int Arity() const override { return 1; } - -protected: - void Parse(const string& attr, const string& line, const Words& words) override; - void Instantiate() override; - - void BuildInstruction(const vector& ot, const string& params, - const string& suffix, ZAM_InstClass zc) override; - }; - -// A version of ZAM_UnaryExprOpTemplate where the point of the expression -// is to capture a direct assignment operation. -class ZAM_AssignOpTemplate : public ZAM_UnaryExprOpTemplate - { -public: - ZAM_AssignOpTemplate(ZAMGen* _g, string _base_name); - - bool IsAssignOp() const override { return true; } - bool IncludesFieldOp() const override { return false; } - bool IsFieldOp() const override { return field_op; } - void SetFieldOp() { field_op = true; } - -protected: - void Parse(const string& attr, const string& line, const Words& words) override; - void Instantiate() override; - -private: - bool field_op = false; - }; - -// A version of ZAM_ExprOpTemplate for binary expressions. -class ZAM_BinaryExprOpTemplate : public ZAM_ExprOpTemplate - { -public: - ZAM_BinaryExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_ExprOpTemplate(_g, _base_name) { } - - bool IncludesFieldOp() const override { return true; } - - int Arity() const override { return 2; } - -protected: - void Instantiate() override; - - void BuildInstruction(const vector& ot, const string& params, - const string& suffix, ZAM_InstClass zc) override; - }; - -// A version of ZAM_BinaryExprOpTemplate for relationals. -class ZAM_RelationalExprOpTemplate : public ZAM_BinaryExprOpTemplate - { -public: - ZAM_RelationalExprOpTemplate(ZAMGen* _g, string _base_name) - : ZAM_BinaryExprOpTemplate(_g, _base_name) - { - } - - bool IncludesFieldOp() const override { return false; } - bool IsConditionalOp() const override { return true; } - -protected: - const char* VecEvalRE(bool have_target) const override - { - if ( have_target ) - return "$$$$ = ZVal(bro_int_t($1))"; - else - return "ZVal(bro_int_t($&))"; - } - - void Instantiate() override; - - void BuildInstruction(const vector& ot, const string& params, - const string& suffix, ZAM_InstClass zc) override; - }; - -// A version of ZAM_BinaryExprOpTemplate for binary operations generated -// by custom methods rather than directly from the AST. -class ZAM_InternalBinaryOpTemplate : public ZAM_BinaryExprOpTemplate - { -public: - ZAM_InternalBinaryOpTemplate(ZAMGen* _g, string _base_name) - : ZAM_BinaryExprOpTemplate(_g, _base_name) - { - } - - bool IsInternalOp() const override { return true; } - - // The accessors used to get to the underlying Zeek script value - // of the first and second operand. - void SetOp1Accessor(string accessor) { op1_accessor = accessor; } - void SetOp2Accessor(string accessor) { op2_accessor = accessor; } - void SetOpAccessor(string accessor) - { - SetOp1Accessor(accessor); - SetOp2Accessor(accessor); - } - -protected: - void Parse(const string& attr, const string& line, const Words& words) override; - - void InstantiateEval(const vector& ot, const string& suffix, - ZAM_InstClass zc) override; - -private: - string op1_accessor; - string op2_accessor; - }; - -// A version of ZAM_OpTemplate for operations used internally (and not -// corresponding to AST elements). -class ZAM_InternalOpTemplate : public ZAM_OpTemplate - { -public: - ZAM_InternalOpTemplate(ZAMGen* _g, string _base_name) : ZAM_OpTemplate(_g, _base_name) { } - - bool IsInternalOp() const override { return true; } - -protected: - void Parse(const string& attr, const string& line, const Words& words) override; - -private: - // True if the internal operation corresponds to an indirect call, - // i.e., one through a variable rather than one directly specified. - bool is_indirect_call = false; - }; - -// An internal operation that assigns a result to a frame element. -class ZAM_InternalAssignOpTemplate : public ZAM_InternalOpTemplate - { -public: - ZAM_InternalAssignOpTemplate(ZAMGen* _g, string _base_name) - : ZAM_InternalOpTemplate(_g, _base_name) - { - } - - bool IsAssignOp() const override { return true; } - }; - -// Helper classes for managing input from the template file, including -// low-level scanning. - -class TemplateInput - { -public: - // Program name and file name are for generating error messages. - TemplateInput(FILE* _f, const char* _prog_name, const char* _file_name) - : f(_f), prog_name(_prog_name) - { - loc.file_name = _file_name; - } - - const InputLoc& CurrLoc() const { return loc; } - - // Fetch the next line of input, including trailing newline. - // Returns true on success, false on EOF or error. Skips over - // comments. - bool ScanLine(string& line); - - // Takes a line and splits it into white-space delimited words, - // returned in a vector. Removes trailing whitespace. - Words SplitIntoWords(const string& line) const; - - // Returns the line with the given number of initial words skipped. - string SkipWords(const string& line, int n) const; - - // Puts back the given line so that the next call to ScanLine will - // return it. Does not nest. - void PutBack(const string& line) { put_back = line; } - - // Report an error and exit. - [[noreturn]] void Gripe(const char* msg, const string& input) const; - [[noreturn]] void Gripe(const char* msg, const InputLoc& loc) const; - -private: - string put_back; // if non-empty, use this for the next ScanLine - - FILE* f; - const char* prog_name; - InputLoc loc; - }; - -// Driver class for the ZAM instruction generator. - -class ZAMGen - { -public: - ZAMGen(int argc, char** argv); - - // Reads in and records a macro definition, which ends upon - // encountering a blank line or a line that does not begin - // with whitespace. - void ReadMacro(const string& line); - - // Emits C++ #define's to implement the recorded macros. - void GenMacros(); - - // Generates a ZAM op-code for the given template, suffix, and - // instruction class. Also creates auxiliary information associated - // with the instruction. - string GenOpCode(const ZAM_OpTemplate* ot, const string& suffix, - ZAM_InstClass zc = ZIC_REGULAR); - - // These methods provide low-level parsing (and error-reporting) - // access to ZAM_OpTemplate objects. - const InputLoc& CurrLoc() const { return ti->CurrLoc(); } - bool ScanLine(string& line) { return ti->ScanLine(line); } - Words SplitIntoWords(const string& line) const { return ti->SplitIntoWords(line); } - string SkipWords(const string& line, int n) const { return ti->SkipWords(line, n); } - void PutBack(const string& line) { ti->PutBack(line); } - - // Methods made public to ZAM_OpTemplate objects for emitting code. - void Emit(EmitTarget et, const string& s); - - void IndentUp() { ++indent_level; } - void IndentDown() { --indent_level; } - void SetNoNL(bool _no_NL) { no_NL = _no_NL; } - - [[noreturn]] void Gripe(const char* msg, const string& input) const { ti->Gripe(msg, input); } - [[noreturn]] void Gripe(const char* msg, const InputLoc& loc) const { ti->Gripe(msg, loc); } - -private: - // Opens all of the code generation targets, and creates prologs - // for those requiring them (such as for embedding into switch - // statements). - void InitEmitTargets(); - void InitSwitch(EmitTarget et, string desc); - - // Closes all of the code generation targets, and creates epilogs - // for those requiring them. - void CloseEmitTargets(); - void FinishSwitches(); - - // Parses a single template, returning true on success and false - // if we've reached the end of the input. (Errors during parsing - // result instead in exiting.) - bool ParseTemplate(); - - // Maps code generation targets with their corresponding FILE*. - std::unordered_map gen_files; - - // Maps code generation targets to strings used to describe any - // associated switch (for error reporting). - std::unordered_map switch_targets; - - // The low-level TemplateInput object used to manage input. - std::unique_ptr ti; - - // Tracks all of the templates created so far. - vector> templates; - - // Tracks the macros recorded so far. - vector> macros; - - // Current indentation level. Maintained globally rather than - // per EmitTarget, so the caller needs to ensure it is managed - // consistently. - int indent_level = 0; - - // If true, refrain from appending a newline to any emitted lines. - bool no_NL = false; - }; diff --git a/src/script_opt/ZAM/Ops.in b/src/script_opt/ZAM/Ops.in index 3a191aded3..5d4529883a 100644 --- a/src/script_opt/ZAM/Ops.in +++ b/src/script_opt/ZAM/Ops.in @@ -353,31 +353,73 @@ unary-op AppendTo # value itself. op1-read set-type $1 -eval auto copy = CopyVal($1); - auto vv = frame[z.v1].vector_val; - vv->RawVec()->push_back(copy); - vv->Modified(); +eval auto vv = frame[z.v1].vector_val; + if ( vv->Size() == 0 ) + // Use the slightly more expensive Assign(), since it + // knows how to deal with empty vectors that do not yet + // have concrete types. + vv->Assign(0, $1.ToVal(z.t)); + else + { + vv->RawVec()->push_back(CopyVal($1)); + vv->Modified(); + } -internal-op AppendToField +# For vectors-of-any, we always go through the Assign() interface because +# it's needed for tracking the potentially differing types. +unary-op AppendToAnyVec +op1-read +set-type $1 +eval auto vv = frame[z.v1].vector_val; + vv->Assign(vv->Size(), $1.ToVal(z.t)); + +internal-op AddPatternToField type VVi op1-read -eval EvalAppendToField(frame[z.v2], v3) +eval EvalAddPatternToField(frame[z.v2], v3) -macro EvalAppendToField(val, f) - auto v = CopyVal(val); - auto fvv = frame[z.v1].record_val->GetField(z.f)->AsVectorVal(); - if ( fvv ) +macro EvalAddPatternToField(v, f) + auto fpat = frame[z.v1].record_val->GetField(z.f)->AsPatternVal(); + if ( fpat ) { - fvv->RawVec()->push_back(v); - fvv->Modified(); + v.re_val->AddTo(fpat, false); + frame[z.v1].record_val->Modified(); } else ZAM_run_time_error(z.loc, util::fmt("field value missing: $%s", frame[z.v1].record_val->GetType()->AsRecordType()->FieldName(z.f))); -internal-op AppendToField +internal-op AddPatternToField type VCi op1-read -eval EvalAppendToField(z.c, v2) +eval EvalAddPatternToField(z.c, v2) + +unary-op ExtendPattern +op1-read +eval $1.re_val->AddTo(frame[z.v1].re_val, false); + +unary-op AddVecToVec +op1-read +eval $1.vector_val->AddTo(frame[z.v1].vector_val, false); + +unary-op AddTableToTable +op1-read +eval auto t = frame[z.v1].table_val; + auto v = $1.table_val; + if ( v->Size() > 0 ) + { + v->AddTo(t, false); + t->Modified(); + } + +unary-op RemoveTableFromTable +op1-read +eval auto t = frame[z.v1].table_val; + auto v = $1.table_val; + if ( v->Size() > 0 ) + { + v->RemoveFrom(t); + t->Modified(); + } unary-expr-op Cast op-type X @@ -1714,7 +1756,7 @@ eval step_iters[z.v2].InitLoop(frame[z.v1].string_val->AsString()); internal-op Init-String-Loop type VC -eval step_iters[z.v2].InitLoop(z.c.string_val->AsString()); +eval step_iters[z.v1].InitLoop(z.c.string_val->AsString()); internal-op Next-String-Iter # v1 = iteration variable diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index ad25ca4d0e..fc729e34eb 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -711,9 +711,8 @@ const ZAMStmt ZAMCompiler::CompileDel(const DelStmt* ds) if ( index_list->Tag() != EXPR_LIST ) reporter->InternalError("non-list in \"delete\""); - auto internal_ind = BuildVals(index_list->AsListExprPtr()); - - return DelTableVO(aggr, internal_ind); + auto internal_ind = std::unique_ptr(BuildVals(index_list->AsListExprPtr())); + return DelTableVO(aggr, internal_ind.get()); } const ZAMStmt ZAMCompiler::CompileWhile(const WhileStmt* ws) diff --git a/src/script_opt/ZAM/ZBody.h b/src/script_opt/ZAM/ZBody.h index 32a9e43b3f..11699a848b 100644 --- a/src/script_opt/ZAM/ZBody.h +++ b/src/script_opt/ZAM/ZBody.h @@ -34,8 +34,8 @@ public: ~ZBody() override; // These are split out from the constructor to allow construction - // of a ZBody from either save-file full instructions (first method) - // or intermediary instructions (second method). + // of a ZBody from either save-file full instructions (first method, + // not currently supported) or intermediary instructions (second method). void SetInsts(std::vector& insts); void SetInsts(std::vector& instsI); @@ -70,7 +70,7 @@ protected: TraversalCode Traverse(TraversalCallback* cb) const override; private: - const char* func_name; + const char* func_name = nullptr; const ZInst* insts = nullptr; unsigned int ninst; @@ -109,7 +109,7 @@ private: // const method. std::vector* inst_count = nullptr; // for profiling double* CPU_time = nullptr; // cumulative CPU time for the program - std::vector* inst_CPU; // per-instruction CPU time. + std::vector* inst_CPU = nullptr; // per-instruction CPU time. CaseMaps int_cases; CaseMaps uint_cases; diff --git a/src/strings.bif b/src/strings.bif index e4a6e21851..11a38deaa3 100644 --- a/src/strings.bif +++ b/src/strings.bif @@ -541,7 +541,7 @@ function to_lower%(str: string%): string *ls++ = s[i]; } - *ls++ = '\0'; + *ls++ = '\0'; return zeek::make_intrusive(new zeek::String(1, lower_s, n)); %} @@ -570,7 +570,7 @@ function to_upper%(str: string%): string *us++ = s[i]; } - *us++ = '\0'; + *us++ = '\0'; return zeek::make_intrusive(new zeek::String(1, upper_s, n)); %} @@ -614,6 +614,7 @@ function to_string_literal%(str: string%): string %} ## Determines whether a given string contains only ASCII characters. +## The empty string is ASCII. ## ## str: The string to examine. ## @@ -1253,13 +1254,17 @@ function ends_with%(str: string, sub: string%) : bool return zeek::val_mgr->Bool(s.rfind(sub_s) == (s.size() - sub_s.size())); %} -## Returns whether an entire string consists only of digits. +## Returns whether a string consists entirely of digits. +## The empty string is not numeric. ## function is_num%(str: string%) : bool %{ // Python's version of this method (which this is based on) just checks to see if every // character in the string is a numeric value. If something more than this is desired, we // could use something like std::from_chars or std::strto{ul,f} to check it. + if ( str->Len() == 0 ) + return zeek::val_mgr->False(); + const char* s = str->CheckString(); for ( int i = 0; i < str->Len(); i++ ) if ( ! std::isdigit(s[i]) ) @@ -1268,10 +1273,14 @@ function is_num%(str: string%) : bool return zeek::val_mgr->True(); %} -## Returns whether an entire string is alphabetic characters. +## Returns whether a string consists entirely of alphabetic characters. +## The empty string is not alphabetic. ## function is_alpha%(str: string%) : bool %{ + if ( str->Len() == 0 ) + return zeek::val_mgr->False(); + const char* s = str->CheckString(); for ( int i = 0; i < str->Len(); i++ ) if ( ! std::isalpha(s[i]) ) @@ -1280,10 +1289,14 @@ function is_alpha%(str: string%) : bool return zeek::val_mgr->True(); %} -## Returns whether an entire string is alphanumeric characters +## Returns whether a string consists entirely of alphanumeric characters. +## The empty string is not alphanumeric. ## function is_alnum%(str: string%) : bool %{ + if ( str->Len() == 0 ) + return zeek::val_mgr->False(); + const char* s = str->CheckString(); for ( int i = 0; i < str->Len(); i++ ) if ( ! std::isalnum(s[i]) ) diff --git a/src/telemetry/CMakeLists.txt b/src/telemetry/CMakeLists.txt index c0c2153387..542b7a8528 100644 --- a/src/telemetry/CMakeLists.txt +++ b/src/telemetry/CMakeLists.txt @@ -6,11 +6,7 @@ include_directories(BEFORE ) set(telemetry_SRCS - Counter.cc - Gauge.cc - Histogram.cc Manager.cc - MetricFamily.cc ) bif_target(telemetry.bif) diff --git a/src/telemetry/Counter.cc b/src/telemetry/Counter.cc deleted file mode 100644 index 6ddc7ece5a..0000000000 --- a/src/telemetry/Counter.cc +++ /dev/null @@ -1,77 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/telemetry/Counter.h" - -#include "zeek/telemetry/Detail.h" - -#include "caf/telemetry/counter.hpp" -#include "caf/telemetry/metric_family.hpp" -#include "caf/telemetry/metric_family_impl.hpp" - -namespace zeek::telemetry - { - -// -- IntCounter --------------------------------------------------------------- - -void IntCounter::Inc() noexcept - { - deref(pimpl).inc(); - } - -void IntCounter::Inc(int64_t amount) noexcept - { - deref(pimpl).inc(amount); - } - -int64_t IntCounter::operator++() noexcept - { - return ++deref(pimpl); - } - -int64_t IntCounter::Value() const noexcept - { - return deref(pimpl).value(); - } - -IntCounterFamily::IntCounterFamily(Impl* ptr) : MetricFamily(upcast(ptr)) { } - -IntCounter IntCounterFamily::GetOrAdd(Span labels) - { - return with_native_labels(labels, - [this](auto nativeLabels) - { - auto hdl = opaque(deref(this, pimpl).get_or_add(nativeLabels)); - return IntCounter{hdl}; - }); - } - -// -- DblCounter --------------------------------------------------------------- - -void DblCounter::Inc() noexcept - { - deref(pimpl).inc(); - } - -void DblCounter::Inc(double amount) noexcept - { - deref(pimpl).inc(amount); - } - -double DblCounter::Value() const noexcept - { - return deref(pimpl).value(); - } - -DblCounterFamily::DblCounterFamily(Impl* ptr) : MetricFamily(upcast(ptr)) { } - -DblCounter DblCounterFamily::GetOrAdd(Span labels) - { - return with_native_labels(labels, - [this](auto nativeLabels) - { - auto hdl = opaque(deref(this, pimpl).get_or_add(nativeLabels)); - return DblCounter{hdl}; - }); - } - - } // namespace zeek::telemetry diff --git a/src/telemetry/Counter.h b/src/telemetry/Counter.h index 126816b523..6f5e360a89 100644 --- a/src/telemetry/Counter.h +++ b/src/telemetry/Counter.h @@ -9,6 +9,8 @@ #include "zeek/Span.h" #include "zeek/telemetry/MetricFamily.h" +#include "broker/telemetry/fwd.hh" + namespace zeek::telemetry { @@ -24,8 +26,6 @@ class IntCounter public: friend class IntCounterFamily; - struct Impl; - static inline const char* OpaqueName = "IntCounterMetricVal"; IntCounter() = delete; @@ -35,34 +35,36 @@ public: /** * Increments the value by 1. */ - void Inc() noexcept; + void Inc() noexcept { broker::telemetry::inc(hdl); } /** * Increments the value by @p amount. * @pre `amount >= 0` */ - void Inc(int64_t amount) noexcept; + void Inc(int64_t amount) noexcept { broker::telemetry::inc(hdl, amount); } /** * Increments the value by 1. * @return The new value. */ - int64_t operator++() noexcept; + int64_t operator++() noexcept { return broker::telemetry::inc(hdl); } /** * @return The current value. */ - int64_t Value() const noexcept; + int64_t Value() const noexcept { return broker::telemetry::value(hdl); } /** * @return Whether @c this and @p other refer to the same counter. */ - constexpr bool IsSameAs(IntCounter other) const noexcept { return pimpl == other.pimpl; } + constexpr bool IsSameAs(IntCounter other) const noexcept { return hdl == other.hdl; } private: - explicit IntCounter(Impl* ptr) noexcept : pimpl(ptr) { } + using Handle = broker::telemetry::int_counter_hdl*; - Impl* pimpl; + explicit IntCounter(Handle hdl) noexcept : hdl(hdl) { } + + Handle hdl; }; /** @@ -89,8 +91,6 @@ class IntCounterFamily : public MetricFamily public: friend class Manager; - class Impl; - static inline const char* OpaqueName = "IntCounterMetricFamilyVal"; using InstanceType = IntCounter; @@ -102,7 +102,10 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - IntCounter GetOrAdd(Span labels); + IntCounter GetOrAdd(Span labels) + { + return IntCounter{int_counter_get_or_add(hdl, labels)}; + } /** * @copydoc GetOrAdd @@ -113,7 +116,9 @@ public: } private: - explicit IntCounterFamily(Impl* ptr); + using Handle = broker::telemetry::int_counter_family_hdl*; + + explicit IntCounterFamily(Handle hdl) : MetricFamily(upcast(hdl)) { } }; /** @@ -125,8 +130,6 @@ class DblCounter public: friend class DblCounterFamily; - struct Impl; - static inline const char* OpaqueName = "DblCounterMetricVal"; DblCounter() = delete; @@ -136,28 +139,30 @@ public: /** * Increments the value by 1. */ - void Inc() noexcept; + void Inc() noexcept { broker::telemetry::inc(hdl); } /** * Increments the value by @p amount. * @pre `amount >= 0` */ - void Inc(double amount) noexcept; + void Inc(double amount) noexcept { broker::telemetry::inc(hdl, amount); } /** * @return The current value. */ - double Value() const noexcept; + double Value() const noexcept { return broker::telemetry::value(hdl); } /** * @return Whether @c this and @p other refer to the same counter. */ - constexpr bool IsSameAs(DblCounter other) const noexcept { return pimpl == other.pimpl; } + constexpr bool IsSameAs(DblCounter other) const noexcept { return hdl == other.hdl; } private: - explicit DblCounter(Impl* ptr) noexcept : pimpl(ptr) { } + using Handle = broker::telemetry::dbl_counter_hdl*; - Impl* pimpl; + explicit DblCounter(Handle hdl) noexcept : hdl(hdl) { } + + Handle hdl; }; /** @@ -184,8 +189,6 @@ class DblCounterFamily : public MetricFamily public: friend class Manager; - class Impl; - static inline const char* OpaqueName = "DblCounterMetricFamilyVal"; using InstanceType = DblCounter; @@ -197,7 +200,10 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - DblCounter GetOrAdd(Span labels); + DblCounter GetOrAdd(Span labels) + { + return DblCounter{dbl_counter_get_or_add(hdl, labels)}; + } /** * @copydoc GetOrAdd @@ -208,7 +214,9 @@ public: } private: - explicit DblCounterFamily(Impl* ptr); + using Handle = broker::telemetry::dbl_counter_family_hdl*; + + explicit DblCounterFamily(Handle hdl) : MetricFamily(upcast(hdl)) { } }; namespace detail diff --git a/src/telemetry/Detail.h b/src/telemetry/Detail.h deleted file mode 100644 index 829041d7ad..0000000000 --- a/src/telemetry/Detail.h +++ /dev/null @@ -1,259 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -// This header contains private implementation details for telemetry classes -// and should not get included outside of .cc files. - -#pragma once - -#include -#include -#include - -#include "zeek/Span.h" -#include "zeek/telemetry/Counter.h" -#include "zeek/telemetry/Gauge.h" -#include "zeek/telemetry/Histogram.h" -#include "zeek/telemetry/Manager.h" -#include "zeek/telemetry/MetricFamily.h" - -#include "caf/telemetry/label_view.hpp" -#include "caf/telemetry/metric_family.hpp" - -namespace zeek::telemetry - { - -// -- traits for converting between opaque handles and native pointers --------- - -/** - * This trait must provide the member types @c Native for referring to the CAF - * type, @c Opaque for referring to the @c Impl type. For instance types such as - * @c IntCounter, the trait must also provide the member type @c NativeFamily. - */ -template struct PimplTrait; - -template <> struct PimplTrait - { - using Native = caf::telemetry::int_counter; - using Oqaque = IntCounter::Impl; - using NativeFamily = caf::telemetry::metric_family_impl; - }; - -template <> struct PimplTrait : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = typename PimplTrait::NativeFamily; - using Oqaque = IntCounterFamily::Impl; - }; - -template <> -struct PimplTrait::NativeFamily> - : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = caf::telemetry::dbl_counter; - using Oqaque = DblCounter::Impl; - using NativeFamily = caf::telemetry::metric_family_impl; - }; - -template <> struct PimplTrait : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = typename PimplTrait::NativeFamily; - using Oqaque = DblCounterFamily::Impl; - }; - -template <> -struct PimplTrait::NativeFamily> - : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = caf::telemetry::int_gauge; - using Oqaque = IntGauge::Impl; - using NativeFamily = caf::telemetry::metric_family_impl; - }; - -template <> struct PimplTrait : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = typename PimplTrait::NativeFamily; - using Oqaque = IntGaugeFamily::Impl; - }; - -template <> -struct PimplTrait::NativeFamily> - : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = caf::telemetry::dbl_gauge; - using Oqaque = DblGauge::Impl; - using NativeFamily = caf::telemetry::metric_family_impl; - }; - -template <> struct PimplTrait : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = typename PimplTrait::NativeFamily; - using Oqaque = DblGaugeFamily::Impl; - }; - -template <> -struct PimplTrait::NativeFamily> - : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = caf::telemetry::int_histogram; - using Oqaque = IntHistogram::Impl; - using NativeFamily = caf::telemetry::metric_family_impl; - }; - -template <> struct PimplTrait : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = typename PimplTrait::NativeFamily; - using Oqaque = IntHistogramFamily::Impl; - }; - -template <> -struct PimplTrait::NativeFamily> - : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = caf::telemetry::dbl_histogram; - using Oqaque = DblHistogram::Impl; - using NativeFamily = caf::telemetry::metric_family_impl; - }; - -template <> struct PimplTrait : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = typename PimplTrait::NativeFamily; - using Oqaque = DblHistogramFamily::Impl; - }; - -template <> -struct PimplTrait::NativeFamily> - : PimplTrait - { - }; - -template <> struct PimplTrait - { - using Native = caf::telemetry::metric_registry; - using Oqaque = Manager::Impl; - }; - -template <> -struct PimplTrait::Native> : PimplTrait - { - }; - -// -- free functions ----------------------------------------------------------- - -template ::Native> auto& deref(T* ptr) - { - return *reinterpret_cast(ptr); - } - -template auto& deref(Family*, MetricFamily::Impl* ptr) - { - using InstanceType = typename Family::InstanceType; - using ImplType = typename InstanceType::Impl; - using NativeType = typename PimplTrait::NativeFamily; - return *reinterpret_cast(ptr); - } - -template ::Oqaque> auto opaque(T* ptr) - { - return reinterpret_cast(ptr); - } - -template auto opaque(const Family*, MetricFamily::Impl* ptr) - { - using InstanceType = typename Family::InstanceType; - using ImplType = typename InstanceType::Impl; - using OpaqueType = typename PimplTrait::NativeFamily; - return reinterpret_cast(ptr); - } - -template ::Native> auto upcast(T* ptr) - { - auto native = reinterpret_cast(ptr); - auto base_ptr = static_cast(native); - return reinterpret_cast(base_ptr); - } - -template auto with_native_labels(Span xs, F continuation) - { - namespace ct = caf::telemetry; - - if ( xs.size() <= 10 ) - { - ct::label_view buf[10] = { - {{}, {}}, {{}, {}}, {{}, {}}, {{}, {}}, {{}, {}}, - {{}, {}}, {{}, {}}, {{}, {}}, {{}, {}}, {{}, {}}, - }; - for ( size_t index = 0; index < xs.size(); ++index ) - buf[index] = ct::label_view{xs[index].first, xs[index].second}; - return continuation(Span{buf, xs.size()}); - } - else - { - std::vector buf; - for ( auto x : xs ) - buf.emplace_back(x.first, x.second); - return continuation(Span{buf}); - } - } - -template auto with_native_labels(Span xs, F continuation) - { - if ( xs.size() <= 10 ) - { - caf::string_view buf[10]; - for ( size_t index = 0; index < xs.size(); ++index ) - buf[index] = xs[index]; - return continuation(Span{buf, xs.size()}); - } - else - { - std::vector buf; - for ( auto x : xs ) - buf.emplace_back(x); - return continuation(Span{buf}); - } - } - - } // namespace zeek::telemetry diff --git a/src/telemetry/Gauge.cc b/src/telemetry/Gauge.cc deleted file mode 100644 index ad6ed5d6ea..0000000000 --- a/src/telemetry/Gauge.cc +++ /dev/null @@ -1,102 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/telemetry/Gauge.h" - -#include "zeek/telemetry/Detail.h" - -#include "caf/telemetry/gauge.hpp" -#include "caf/telemetry/metric_family.hpp" -#include "caf/telemetry/metric_family_impl.hpp" - -namespace zeek::telemetry - { - -// -- IntGauge --------------------------------------------------------------- - -void IntGauge::Inc() noexcept - { - deref(pimpl).inc(); - } - -void IntGauge::Inc(int64_t amount) noexcept - { - deref(pimpl).inc(amount); - } - -void IntGauge::Dec() noexcept - { - deref(pimpl).dec(); - } - -void IntGauge::Dec(int64_t amount) noexcept - { - deref(pimpl).dec(amount); - } - -int64_t IntGauge::operator++() noexcept - { - return ++deref(pimpl); - } - -int64_t IntGauge::operator--() noexcept - { - return --deref(pimpl); - } - -int64_t IntGauge::Value() const noexcept - { - return deref(pimpl).value(); - } - -IntGaugeFamily::IntGaugeFamily(Impl* ptr) : MetricFamily(upcast(ptr)) { } - -IntGauge IntGaugeFamily::GetOrAdd(Span labels) - { - return with_native_labels(labels, - [this](auto nativeLabels) - { - auto hdl = opaque(deref(this, pimpl).get_or_add(nativeLabels)); - return IntGauge{hdl}; - }); - } - -// -- DblGauge --------------------------------------------------------------- - -void DblGauge::Inc() noexcept - { - deref(pimpl).inc(); - } - -void DblGauge::Inc(double amount) noexcept - { - deref(pimpl).inc(amount); - } - -void DblGauge::Dec() noexcept - { - deref(pimpl).dec(); - } - -void DblGauge::Dec(double amount) noexcept - { - deref(pimpl).dec(amount); - } - -double DblGauge::Value() const noexcept - { - return deref(pimpl).value(); - } - -DblGaugeFamily::DblGaugeFamily(Impl* ptr) : MetricFamily(upcast(ptr)) { } - -DblGauge DblGaugeFamily::GetOrAdd(Span labels) - { - return with_native_labels(labels, - [this](auto nativeLabels) - { - auto hdl = opaque(deref(this, pimpl).get_or_add(nativeLabels)); - return DblGauge{hdl}; - }); - } - - } // namespace zeek::telemetry diff --git a/src/telemetry/Gauge.h b/src/telemetry/Gauge.h index f072178d98..294f9b1ab1 100644 --- a/src/telemetry/Gauge.h +++ b/src/telemetry/Gauge.h @@ -9,6 +9,8 @@ #include "zeek/Span.h" #include "zeek/telemetry/MetricFamily.h" +#include "broker/telemetry/fwd.hh" + namespace zeek::telemetry { @@ -17,7 +19,7 @@ class IntGaugeFamily; class Manager; /** - * A handle to a metric that represents an integer value. Gauges are less + * A handle to a metric that represents an integer value. Gauges are more * permissive than counters and also allow decrementing the value. */ class IntGauge @@ -25,8 +27,6 @@ class IntGauge public: friend class IntGaugeFamily; - struct Impl; - static inline const char* OpaqueName = "IntGaugeMetricVal"; IntGauge() = delete; @@ -36,49 +36,51 @@ public: /** * Increments the value by 1. */ - void Inc() noexcept; + void Inc() noexcept { broker::telemetry::inc(hdl); } /** * Increments the value by @p amount. */ - void Inc(int64_t amount) noexcept; + void Inc(int64_t amount) noexcept { broker::telemetry::inc(hdl, amount); } /** * Increments the value by 1. * @return The new value. */ - int64_t operator++() noexcept; + int64_t operator++() noexcept { return broker::telemetry::inc(hdl); } /** * Decrements the value by 1. */ - void Dec() noexcept; + void Dec() noexcept { broker::telemetry::dec(hdl); } /** * Decrements the value by @p amount. */ - void Dec(int64_t amount) noexcept; + void Dec(int64_t amount) noexcept { broker::telemetry::dec(hdl, amount); } /** * Decrements the value by 1. * @return The new value. */ - int64_t operator--() noexcept; + int64_t operator--() noexcept { return broker::telemetry::dec(hdl); } /** * @return The current value. */ - int64_t Value() const noexcept; + int64_t Value() const noexcept { return broker::telemetry::value(hdl); } /** * @return Whether @c this and @p other refer to the same counter. */ - constexpr bool IsSameAs(IntGauge other) const noexcept { return pimpl == other.pimpl; } + constexpr bool IsSameAs(IntGauge other) const noexcept { return hdl == other.hdl; } private: - explicit IntGauge(Impl* ptr) noexcept : pimpl(ptr) { } + using Handle = broker::telemetry::int_gauge_hdl*; - Impl* pimpl; + explicit IntGauge(Handle hdl) noexcept : hdl(hdl) { } + + Handle hdl; }; /** @@ -105,8 +107,6 @@ class IntGaugeFamily : public MetricFamily public: friend class Manager; - class Impl; - static inline const char* OpaqueName = "IntGaugeMetricFamilyVal"; using InstanceType = IntGauge; @@ -118,7 +118,10 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - IntGauge GetOrAdd(Span labels); + IntGauge GetOrAdd(Span labels) + { + return IntGauge{int_gauge_get_or_add(hdl, labels)}; + } /** * @copydoc GetOrAdd @@ -129,21 +132,20 @@ public: } private: - explicit IntGaugeFamily(Impl* ptr); + using Handle = broker::telemetry::int_gauge_family_hdl*; + + explicit IntGaugeFamily(Handle hdl) : MetricFamily(upcast(hdl)) { } }; /** - * A handle to a metric that represents a floating point value. Gauges are less + * A handle to a metric that represents a floating point value. Gauges are more * permissive than counters and also allow decrementing the value. - * up. */ class DblGauge { public: friend class DblGaugeFamily; - struct Impl; - static inline const char* OpaqueName = "DblGaugeMetricVal"; DblGauge() = delete; @@ -153,37 +155,39 @@ public: /** * Increments the value by 1. */ - void Inc() noexcept; + void Inc() noexcept { broker::telemetry::inc(hdl); } /** * Increments the value by @p amount. */ - void Inc(double amount) noexcept; + void Inc(double amount) noexcept { broker::telemetry::inc(hdl, amount); } /** * Increments the value by 1. */ - void Dec() noexcept; + void Dec() noexcept { broker::telemetry::dec(hdl); } /** * Increments the value by @p amount. */ - void Dec(double amount) noexcept; + void Dec(double amount) noexcept { broker::telemetry::dec(hdl, amount); } /** * @return The current value. */ - double Value() const noexcept; + double Value() const noexcept { return broker::telemetry::value(hdl); } /** * @return Whether @c this and @p other refer to the same counter. */ - constexpr bool IsSameAs(DblGauge other) const noexcept { return pimpl == other.pimpl; } + constexpr bool IsSameAs(DblGauge other) const noexcept { return hdl == other.hdl; } private: - explicit DblGauge(Impl* ptr) noexcept : pimpl(ptr) { } + using Handle = broker::telemetry::dbl_gauge_hdl*; - Impl* pimpl; + explicit DblGauge(Handle hdl) noexcept : hdl(hdl) { } + + Handle hdl; }; /** @@ -210,8 +214,6 @@ class DblGaugeFamily : public MetricFamily public: friend class Manager; - class Impl; - static inline const char* OpaqueName = "DblGaugeMetricFamilyVal"; using InstanceType = DblGauge; @@ -223,7 +225,10 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - DblGauge GetOrAdd(Span labels); + DblGauge GetOrAdd(Span labels) + { + return DblGauge{dbl_gauge_get_or_add(hdl, labels)}; + } /** * @copydoc GetOrAdd @@ -234,7 +239,9 @@ public: } private: - explicit DblGaugeFamily(Impl* ptr); + using Handle = broker::telemetry::dbl_gauge_family_hdl*; + + explicit DblGaugeFamily(Handle hdl) : MetricFamily(upcast(hdl)) { } }; namespace detail diff --git a/src/telemetry/Histogram.cc b/src/telemetry/Histogram.cc deleted file mode 100644 index e6f95a55f3..0000000000 --- a/src/telemetry/Histogram.cc +++ /dev/null @@ -1,102 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/telemetry/Histogram.h" - -#include - -#include "zeek/telemetry/Detail.h" - -#include "caf/telemetry/histogram.hpp" -#include "caf/telemetry/metric_family.hpp" -#include "caf/telemetry/metric_family_impl.hpp" - -namespace zeek::telemetry - { - -// -- IntHistogram --------------------------------------------------------------- - -void IntHistogram::Observe(int64_t value) noexcept - { - deref(pimpl).observe(value); - } - -int64_t IntHistogram::Sum() const noexcept - { - return deref(pimpl).sum(); - } - -size_t IntHistogram::NumBuckets() const noexcept - { - return deref(pimpl).buckets().size(); - } - -int64_t IntHistogram::CountAt(size_t index) const noexcept - { - auto xs = deref(pimpl).buckets(); - assert(index < xs.size()); - return xs[index].count.value(); - } - -int64_t IntHistogram::UpperBoundAt(size_t index) const noexcept - { - auto xs = deref(pimpl).buckets(); - assert(index < xs.size()); - return xs[index].upper_bound; - } - -IntHistogramFamily::IntHistogramFamily(Impl* ptr) : MetricFamily(upcast(ptr)) { } - -IntHistogram IntHistogramFamily::GetOrAdd(Span labels) - { - return with_native_labels(labels, - [this](auto nativeLabels) - { - auto hdl = opaque(deref(this, pimpl).get_or_add(nativeLabels)); - return IntHistogram{hdl}; - }); - } - -// -- DblHistogram --------------------------------------------------------------- - -void DblHistogram::Observe(double amount) noexcept - { - deref(pimpl).observe(amount); - } - -double DblHistogram::Sum() const noexcept - { - return deref(pimpl).sum(); - } - -size_t DblHistogram::NumBuckets() const noexcept - { - return deref(pimpl).buckets().size(); - } - -int64_t DblHistogram::CountAt(size_t index) const noexcept - { - auto xs = deref(pimpl).buckets(); - assert(index < xs.size()); - return xs[index].count.value(); - } - -double DblHistogram::UpperBoundAt(size_t index) const noexcept - { - auto xs = deref(pimpl).buckets(); - assert(index < xs.size()); - return xs[index].upper_bound; - } - -DblHistogramFamily::DblHistogramFamily(Impl* ptr) : MetricFamily(upcast(ptr)) { } - -DblHistogram DblHistogramFamily::GetOrAdd(Span labels) - { - return with_native_labels(labels, - [this](auto nativeLabels) - { - auto hdl = opaque(deref(this, pimpl).get_or_add(nativeLabels)); - return DblHistogram{hdl}; - }); - } - - } // namespace zeek::telemetry diff --git a/src/telemetry/Histogram.h b/src/telemetry/Histogram.h index 0097142bc2..069c2fc38d 100644 --- a/src/telemetry/Histogram.h +++ b/src/telemetry/Histogram.h @@ -9,6 +9,8 @@ #include "zeek/Span.h" #include "zeek/telemetry/MetricFamily.h" +#include "broker/telemetry/fwd.hh" + namespace zeek::telemetry { @@ -26,8 +28,6 @@ class IntHistogram public: friend class IntHistogramFamily; - struct Impl; - static inline const char* OpaqueName = "IntHistogramMetricVal"; IntHistogram() = delete; @@ -38,31 +38,36 @@ public: * Increments all buckets with an upper bound less than or equal to @p value * by one and adds @p value to the total sum of all observed values. */ - void Observe(int64_t value) noexcept; + void Observe(int64_t value) noexcept { return broker::telemetry::observe(hdl, value); } /// @return The sum of all observed values. - int64_t Sum() const noexcept; + int64_t Sum() const noexcept { return broker::telemetry::sum(hdl); } /// @return The number of buckets, including the implicit "infinite" bucket. - size_t NumBuckets() const noexcept; + size_t NumBuckets() const noexcept { return broker::telemetry::num_buckets(hdl); } /// @return The number of observations in the bucket at @p index. /// @pre index < NumBuckets() - int64_t CountAt(size_t index) const noexcept; + int64_t CountAt(size_t index) const noexcept { return broker::telemetry::count_at(hdl, index); } /// @return The upper bound of the bucket at @p index. /// @pre index < NumBuckets() - int64_t UpperBoundAt(size_t index) const noexcept; + int64_t UpperBoundAt(size_t index) const noexcept + { + return broker::telemetry::upper_bound_at(hdl, index); + } /** * @return Whether @c this and @p other refer to the same histogram. */ - constexpr bool IsSameAs(IntHistogram other) const noexcept { return pimpl == other.pimpl; } + constexpr bool IsSameAs(IntHistogram other) const noexcept { return hdl == other.hdl; } private: - explicit IntHistogram(Impl* ptr) noexcept : pimpl(ptr) { } + using Handle = broker::telemetry::int_histogram_hdl*; - Impl* pimpl; + explicit IntHistogram(Handle hdl) noexcept : hdl(hdl) { } + + Handle hdl; }; /** @@ -88,8 +93,6 @@ class IntHistogramFamily : public MetricFamily public: friend class Manager; - class Impl; - static inline const char* OpaqueName = "IntHistogramMetricFamilyVal"; using InstanceType = IntHistogram; @@ -101,7 +104,10 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - IntHistogram GetOrAdd(Span labels); + IntHistogram GetOrAdd(Span labels) + { + return IntHistogram{int_histogram_get_or_add(hdl, labels)}; + } /** * @copydoc GetOrAdd @@ -112,7 +118,9 @@ public: } private: - explicit IntHistogramFamily(Impl* ptr); + using Handle = broker::telemetry::int_histogram_family_hdl*; + + explicit IntHistogramFamily(Handle hdl) : MetricFamily(upcast(hdl)) { } }; /** @@ -125,8 +133,6 @@ class DblHistogram public: friend class DblHistogramFamily; - struct Impl; - static inline const char* OpaqueName = "DblHistogramMetricVal"; DblHistogram() = delete; @@ -137,31 +143,36 @@ public: * Increments all buckets with an upper bound less than or equal to @p value * by one and adds @p value to the total sum of all observed values. */ - void Observe(double value) noexcept; + void Observe(double value) noexcept { broker::telemetry::observe(hdl, value); } /// @return The sum of all observed values. - double Sum() const noexcept; + double Sum() const noexcept { return broker::telemetry::sum(hdl); } /// @return The number of buckets, including the implicit "infinite" bucket. - size_t NumBuckets() const noexcept; + size_t NumBuckets() const noexcept { return broker::telemetry::num_buckets(hdl); } /// @return The number of observations in the bucket at @p index. /// @pre index < NumBuckets() - int64_t CountAt(size_t index) const noexcept; + int64_t CountAt(size_t index) const noexcept { return broker::telemetry::count_at(hdl, index); } /// @return The upper bound of the bucket at @p index. /// @pre index < NumBuckets() - double UpperBoundAt(size_t index) const noexcept; + double UpperBoundAt(size_t index) const noexcept + { + return broker::telemetry::upper_bound_at(hdl, index); + } /** * @return Whether @c this and @p other refer to the same histogram. */ - constexpr bool IsSameAs(DblHistogram other) const noexcept { return pimpl == other.pimpl; } + constexpr bool IsSameAs(DblHistogram other) const noexcept { return hdl == other.hdl; } private: - explicit DblHistogram(Impl* ptr) noexcept : pimpl(ptr) { } + using Handle = broker::telemetry::dbl_histogram_hdl*; - Impl* pimpl; + explicit DblHistogram(Handle hdl) noexcept : hdl(hdl) { } + + Handle hdl; }; /** @@ -187,8 +198,6 @@ class DblHistogramFamily : public MetricFamily public: friend class Manager; - class Impl; - static inline const char* OpaqueName = "DblHistogramMetricFamilyVal"; using InstanceType = DblHistogram; @@ -200,7 +209,10 @@ public: * Returns the metrics handle for given labels, creating a new instance * lazily if necessary. */ - DblHistogram GetOrAdd(Span labels); + DblHistogram GetOrAdd(Span labels) + { + return DblHistogram{dbl_histogram_get_or_add(hdl, labels)}; + } /** * @copydoc GetOrAdd @@ -211,7 +223,9 @@ public: } private: - explicit DblHistogramFamily(Impl* ptr); + using Handle = broker::telemetry::dbl_histogram_family_hdl*; + + explicit DblHistogramFamily(Handle hdl) : MetricFamily(upcast(hdl)) { } }; namespace detail diff --git a/src/telemetry/Manager.cc b/src/telemetry/Manager.cc index d7eac6904e..66fb3d5b94 100644 --- a/src/telemetry/Manager.cc +++ b/src/telemetry/Manager.cc @@ -5,102 +5,37 @@ #include #include "zeek/3rdparty/doctest.h" -#include "zeek/telemetry/Detail.h" +#include "zeek/broker/Manager.h" #include "zeek/telemetry/Timer.h" -#include "caf/telemetry/metric_registry.hpp" +#include "broker/telemetry/metric_registry.hh" + +namespace + { +using NativeManager = broker::telemetry::metric_registry; +using NativeManagerImpl = broker::telemetry::metric_registry_impl; +using NativeManagerImplPtr = zeek::IntrusivePtr; + } namespace zeek::telemetry { +Manager::Manager() + { + auto reg = NativeManager::pre_init_instance(); + NativeManagerImplPtr ptr{NewRef{}, reg.pimpl()}; + pimpl.swap(ptr); + } + Manager::~Manager() { } void Manager::InitPostScript() { } -IntCounterFamily Manager::IntCounterFam(std::string_view prefix, std::string_view name, - Span labels, - std::string_view helptext, std::string_view unit, - bool is_sum) +void Manager::InitPostBrokerSetup(broker::endpoint& ep) { - return with_native_labels(labels, - [&, this](auto xs) - { - auto ptr = deref(pimpl).counter_family(prefix, name, xs, helptext, - unit, is_sum); - return IntCounterFamily{opaque(ptr)}; - }); - } - -DblCounterFamily Manager::DblCounterFam(std::string_view prefix, std::string_view name, - Span labels, - std::string_view helptext, std::string_view unit, - bool is_sum) - { - return with_native_labels(labels, - [&, this](auto xs) - { - auto ptr = deref(pimpl).counter_family( - prefix, name, xs, helptext, unit, is_sum); - return DblCounterFamily{opaque(ptr)}; - }); - } - -IntGaugeFamily Manager::IntGaugeFam(std::string_view prefix, std::string_view name, - Span labels, std::string_view helptext, - std::string_view unit, bool is_sum) - { - return with_native_labels(labels, - [&, this](auto xs) - { - auto ptr = deref(pimpl).gauge_family(prefix, name, xs, helptext, - unit, is_sum); - return IntGaugeFamily{opaque(ptr)}; - }); - } - -DblGaugeFamily Manager::DblGaugeFam(std::string_view prefix, std::string_view name, - Span labels, std::string_view helptext, - std::string_view unit, bool is_sum) - { - return with_native_labels(labels, - [&, this](auto xs) - { - auto ptr = deref(pimpl).gauge_family( - prefix, name, xs, helptext, unit, is_sum); - return DblGaugeFamily{opaque(ptr)}; - }); - } - -IntHistogramFamily Manager::IntHistoFam(std::string_view prefix, std::string_view name, - Span labels, - Span ubounds, std::string_view helptext, - std::string_view unit, bool is_sum) - { - return with_native_labels( - labels, - [&, this](auto xs) - { - auto bounds = caf::span{ubounds.data(), ubounds.size()}; - auto ptr = deref(pimpl).histogram_family(prefix, name, xs, bounds, helptext, unit, - is_sum); - return IntHistogramFamily{opaque(ptr)}; - }); - } - -DblHistogramFamily Manager::DblHistoFam(std::string_view prefix, std::string_view name, - Span labels, - Span ubounds, std::string_view helptext, - std::string_view unit, bool is_sum) - { - return with_native_labels( - labels, - [&, this](auto xs) - { - auto bounds = caf::span{ubounds.data(), ubounds.size()}; - auto ptr = deref(pimpl).histogram_family(prefix, name, xs, bounds, helptext, - unit, is_sum); - return DblHistogramFamily{opaque(ptr)}; - }); + auto reg = NativeManager::merge(NativeManager{pimpl.get()}, ep); + NativeManagerImplPtr ptr{NewRef{}, reg.pimpl()}; + pimpl.swap(ptr); } } // namespace zeek::telemetry @@ -110,8 +45,6 @@ DblHistogramFamily Manager::DblHistoFam(std::string_view prefix, std::string_vie using namespace std::literals; using namespace zeek::telemetry; -using NativeManager = caf::telemetry::metric_registry; - namespace { @@ -129,8 +62,7 @@ SCENARIO("telemetry managers provide access to counter singletons") { GIVEN("a telemetry manager") { - NativeManager native_mgr; - Manager mgr{opaque(&native_mgr)}; + Manager mgr; WHEN("retrieving an IntCounter singleton") { auto first = mgr.CounterSingleton("zeek", "int-count", "test"); @@ -184,8 +116,7 @@ SCENARIO("telemetry managers provide access to counter families") { GIVEN("a telemetry manager") { - NativeManager native_mgr; - Manager mgr{opaque(&native_mgr)}; + Manager mgr; WHEN("retrieving an IntCounter family") { auto family = mgr.CounterFamily("zeek", "requests", {"method"}, "test", "1", true); @@ -244,8 +175,7 @@ SCENARIO("telemetry managers provide access to gauge singletons") { GIVEN("a telemetry manager") { - NativeManager native_mgr; - Manager mgr{opaque(&native_mgr)}; + Manager mgr; WHEN("retrieving an IntGauge singleton") { auto first = mgr.GaugeSingleton("zeek", "int-gauge", "test"); @@ -309,8 +239,7 @@ SCENARIO("telemetry managers provide access to gauge families") { GIVEN("a telemetry manager") { - NativeManager native_mgr; - Manager mgr{opaque(&native_mgr)}; + Manager mgr; WHEN("retrieving an IntGauge family") { auto family = mgr.GaugeFamily("zeek", "open-connections", {"protocol"}, "test"); @@ -369,8 +298,7 @@ SCENARIO("telemetry managers provide access to histogram singletons") { GIVEN("a telemetry manager") { - NativeManager native_mgr; - Manager mgr{opaque(&native_mgr)}; + Manager mgr; WHEN("retrieving an IntHistogram singleton") { const auto max_int = std::numeric_limits::max(); @@ -456,8 +384,7 @@ SCENARIO("telemetry managers provide access to histogram families") { GIVEN("a telemetry manager") { - NativeManager native_mgr; - Manager mgr{opaque(&native_mgr)}; + Manager mgr; WHEN("retrieving an IntHistogram family") { int64_t buckets[] = {10, 20}; diff --git a/src/telemetry/Manager.h b/src/telemetry/Manager.h index 1643fe0fa7..b260838778 100644 --- a/src/telemetry/Manager.h +++ b/src/telemetry/Manager.h @@ -7,11 +7,24 @@ #include #include +#include "zeek/IntrusivePtr.h" #include "zeek/Span.h" #include "zeek/telemetry/Counter.h" #include "zeek/telemetry/Gauge.h" #include "zeek/telemetry/Histogram.h" +#include "broker/telemetry/fwd.hh" + +namespace broker + { +class endpoint; + } + +namespace zeek::Broker + { +class Manager; + } + namespace zeek::telemetry { @@ -21,9 +34,9 @@ namespace zeek::telemetry class Manager { public: - class Impl; + friend class Broker::Manager; - explicit Manager(Impl* ptr) : pimpl(ptr) { } + Manager(); Manager(const Manager&) = delete; @@ -32,7 +45,7 @@ public: virtual ~Manager(); /** - * Initialization of the manager. This is called late during Bro's + * Initialization of the manager. This is called late during Zeek's * initialization after any scripts are processed. */ virtual void InitPostScript(); @@ -54,13 +67,15 @@ public: { if constexpr ( std::is_same::value ) { - return IntCounterFam(prefix, name, labels, helptext, unit, is_sum); + auto fam = int_counter_fam(ptr(), prefix, name, labels, helptext, unit, is_sum); + return IntCounterFamily{fam}; } else { static_assert(std::is_same::value, "metrics only support int64_t and double values"); - return DblCounterFam(prefix, name, labels, helptext, unit, is_sum); + auto fam = dbl_counter_fam(ptr(), prefix, name, labels, helptext, unit, is_sum); + return DblCounterFamily{fam}; } } @@ -148,13 +163,15 @@ public: { if constexpr ( std::is_same::value ) { - return IntGaugeFam(prefix, name, labels, helptext, unit, is_sum); + auto fam = int_gauge_fam(ptr(), prefix, name, labels, helptext, unit, is_sum); + return IntGaugeFamily{fam}; } else { static_assert(std::is_same::value, "metrics only support int64_t and double values"); - return DblGaugeFam(prefix, name, labels, helptext, unit, is_sum); + auto fam = dbl_gauge_fam(ptr(), prefix, name, labels, helptext, unit, is_sum); + return DblGaugeFamily{fam}; } } @@ -264,13 +281,17 @@ public: { if constexpr ( std::is_same::value ) { - return IntHistoFam(prefix, name, labels, default_upper_bounds, helptext, unit, is_sum); + auto fam = int_histogram_fam(ptr(), prefix, name, labels, default_upper_bounds, + helptext, unit, is_sum); + return IntHistogramFamily{fam}; } else { static_assert(std::is_same::value, "metrics only support int64_t and double values"); - return DblHistoFam(prefix, name, labels, default_upper_bounds, helptext, unit, is_sum); + auto fam = dbl_histogram_fam(ptr(), prefix, name, labels, default_upper_bounds, + helptext, unit, is_sum); + return DblHistogramFamily{fam}; } } @@ -368,30 +389,6 @@ public: } protected: - IntCounterFamily IntCounterFam(std::string_view prefix, std::string_view name, - Span labels, std::string_view helptext, - std::string_view unit, bool is_sum); - - DblCounterFamily DblCounterFam(std::string_view prefix, std::string_view name, - Span labels, std::string_view helptext, - std::string_view unit, bool is_sum); - - IntGaugeFamily IntGaugeFam(std::string_view prefix, std::string_view name, - Span labels, std::string_view helptext, - std::string_view unit, bool is_sum); - - DblGaugeFamily DblGaugeFam(std::string_view prefix, std::string_view name, - Span labels, std::string_view helptext, - std::string_view unit, bool is_sum); - - IntHistogramFamily IntHistoFam(std::string_view prefix, std::string_view name, - Span labels, Span ubounds, - std::string_view helptext, std::string_view unit, bool is_sum); - - DblHistogramFamily DblHistoFam(std::string_view prefix, std::string_view name, - Span labels, Span ubounds, - std::string_view helptext, std::string_view unit, bool is_sum); - template static void WithLabelNames(Span xs, F continuation) { if ( xs.size() <= 10 ) @@ -410,7 +407,13 @@ protected: } } - Impl* pimpl; + broker::telemetry::metric_registry_impl* ptr() { return pimpl.get(); } + + // Connects all the dots after the Broker Manager constructed the endpoint + // for this Zeek instance. Called from Broker::Manager::InitPostScript(). + void InitPostBrokerSetup(broker::endpoint&); + + IntrusivePtr pimpl; }; } // namespace zeek::telemetry diff --git a/src/telemetry/MetricFamily.cc b/src/telemetry/MetricFamily.cc deleted file mode 100644 index cdd7f52e4e..0000000000 --- a/src/telemetry/MetricFamily.cc +++ /dev/null @@ -1,54 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#include "zeek/telemetry/MetricFamily.h" - -#include "caf/telemetry/metric_family.hpp" - -namespace zeek::telemetry - { - -namespace - { - -namespace ct = caf::telemetry; - -using NativeMetricFamily = ct::metric_family; - -auto& deref(MetricFamily::Impl* ptr) - { - return *reinterpret_cast(ptr); - } - - } // namespace - -std::string_view MetricFamily::Prefix() const noexcept - { - return deref(pimpl).prefix(); - } - -std::string_view MetricFamily::Name() const noexcept - { - return deref(pimpl).name(); - } - -Span MetricFamily::LabelNames() const noexcept - { - return deref(pimpl).label_names(); - } - -std::string_view MetricFamily::Helptext() const noexcept - { - return deref(pimpl).helptext(); - } - -std::string_view MetricFamily::Unit() const noexcept - { - return deref(pimpl).unit(); - } - -bool MetricFamily::IsSum() const noexcept - { - return deref(pimpl).is_sum(); - } - - } // namespace zeek::telemetry diff --git a/src/telemetry/MetricFamily.h b/src/telemetry/MetricFamily.h index 6ea546d5d3..7fdf65fd43 100644 --- a/src/telemetry/MetricFamily.h +++ b/src/telemetry/MetricFamily.h @@ -8,6 +8,8 @@ #include "zeek/Span.h" +#include "broker/telemetry/metric_family.hh" + namespace zeek::telemetry { @@ -23,8 +25,6 @@ using LabelView = std::pair; class MetricFamily { public: - struct Impl; - MetricFamily() = delete; MetricFamily(const MetricFamily&) noexcept = default; MetricFamily& operator=(const MetricFamily&) noexcept = default; @@ -35,42 +35,47 @@ public: * script, may use a prefix that represents the application/script * or protocol (e.g. @c http) name. */ - std::string_view Prefix() const noexcept; + std::string_view Prefix() const noexcept { return broker::telemetry::prefix(hdl); } /** * @return The human-readable name of the metric, e.g., * @p open-connections. */ - std::string_view Name() const noexcept; + std::string_view Name() const noexcept { return broker::telemetry::name(hdl); } /** * @return The names for all label dimensions. */ - Span LabelNames() const noexcept; + Span LabelNames() const noexcept + { + return broker::telemetry::label_names(hdl); + } /** * @return A short explanation of the metric. */ - std::string_view Helptext() const noexcept; + std::string_view Helptext() const noexcept { return broker::telemetry::helptext(hdl); } /** * @return The unit of measurement, preferably a base unit such as * @c bytes or @c seconds. Dimensionless counts return the * pseudo-unit @c 1. */ - std::string_view Unit() const noexcept; + std::string_view Unit() const noexcept { return broker::telemetry::unit(hdl); } /** * @return Whether metrics of this family accumulate values, where only the * total value is of interest. For example, the total number of * HTTP requests. */ - bool IsSum() const noexcept; + bool IsSum() const noexcept { return broker::telemetry::is_sum(hdl); } protected: - explicit MetricFamily(Impl* ptr) : pimpl(ptr) { } + using Handle = broker::telemetry::metric_family_hdl*; - Impl* pimpl; + explicit MetricFamily(Handle hdl) : hdl(hdl) { } + + Handle hdl; }; } // namespace zeek::telemetry diff --git a/src/threading/BasicThread.h b/src/threading/BasicThread.h index f009a457df..3c5d3de1e8 100644 --- a/src/threading/BasicThread.h +++ b/src/threading/BasicThread.h @@ -27,7 +27,7 @@ public: * Creates a new thread object. Instantiating the object does however * not yet start the actual OS thread, that requires calling Start(). * - * Only Bro's main thread may create new thread instances. + * Only Zeek's main thread may create new thread instances. * * @param name A descriptive name for thread the thread. This may * show up in messages to the user. @@ -68,7 +68,7 @@ public: * executing Run(). Note that one can't restart a thread after a * Stop(), doing so will be ignored. * - * Only Bro's main thread must call this method. + * Only Zeek's main thread must call this method. */ void Start(); @@ -83,7 +83,7 @@ public: * Calling this method has no effect if Start() hasn't been executed * yet. * - * Only Bro's main thread must call this method. + * Only Zeek's main thread must call this method. */ void SignalStop(); @@ -94,7 +94,7 @@ public: * yet. If this is executed without calling SignalStop() first, * results are undefined. * - * Only Bro's main thread must call this method. + * Only Zeek's main thread must call this method. */ void WaitForStop(); @@ -143,14 +143,14 @@ protected: /** * Executed with Start(). This is a hook into starting the thread. It - * will be called from Bro's main thread after the OS thread has been + * will be called from Zeek's main thread after the OS thread has been * started. */ virtual void OnStart() { } /** * Executed with SignalStop(). This is a hook into preparing the - * thread for stopping. It will be called from Bro's main thread + * thread for stopping. It will be called from Zeek's main thread * before the thread has been signaled to stop. */ virtual void OnSignalStop() { } @@ -159,7 +159,7 @@ protected: * Executed with WaitForStop(). This is a hook into waiting for the * thread to stop. It must be overridden by derived classes and only * return once the thread has indeed finished processing. The method - * will be called from Bro's main thread. + * will be called from Zeek's main thread. */ virtual void OnWaitForStop() = 0; @@ -171,7 +171,7 @@ protected: /** * Destructor. This will be called by the manager. * - * Only Bro's main thread may delete thread instances. + * Only Zeek's main thread may delete thread instances. * */ virtual ~BasicThread(); diff --git a/src/threading/Formatter.h b/src/threading/Formatter.h index 1d463a9c0c..aa5f697616 100644 --- a/src/threading/Formatter.h +++ b/src/threading/Formatter.h @@ -107,7 +107,7 @@ public: static std::string Render(const Value::subnet_t& subnet); /** - * Convert a double into a string. This renders the double with Bro's + * Convert a double into a string. This renders the double with Zeek's * standard precision. * * This is a helper function that formatter implementations may use. diff --git a/src/threading/Manager.h b/src/threading/Manager.h index 62716e88fa..6e21a2e600 100644 --- a/src/threading/Manager.h +++ b/src/threading/Manager.h @@ -35,7 +35,7 @@ protected: * In addition to basic threads, the manager also provides additional * functionality specific to MsgThread instances. In particular, it polls * their outgoing message queue on a regular basis and feeds data sent into - * the rest of Bro. It also triggers the regular heartbeats. + * the rest of Zeek. It also triggers the regular heartbeats. */ class Manager { diff --git a/src/threading/MsgThread.h b/src/threading/MsgThread.h index 43b0c1ca3b..121b19a2db 100644 --- a/src/threading/MsgThread.h +++ b/src/threading/MsgThread.h @@ -29,7 +29,7 @@ class KillMeMessage; /** * A specialized thread that provides bi-directional message passing between - * Bro's main thread and the child thread. Messages are instances of + * Zeek's main thread and the child thread. Messages are instances of * BasicInputMessage and BasicOutputMessage for message sent \a to the child * thread and received \a from the child thread, respectively. * @@ -45,7 +45,7 @@ public: * Constructor. It automatically registers the thread with the * threading::Manager. * - * Only Bro's main thread may instantiate a new thread. + * Only Zeek's main thread may instantiate a new thread. */ MsgThread(); @@ -77,7 +77,7 @@ public: * Allows the child thread to send a specified Zeek event. The given Vals * must match the values expected by the event. * - * @param name name of the bro event to send + * @param name name of the Zeek event to send * * @param num_vals number of entries in \a vals * @@ -129,7 +129,7 @@ public: /** * Reports a fatal error from the child thread. The main thread will - * pass this to the Reporter once received. Bro will terminate after + * pass this to the Reporter once received. Zeek will terminate after * the message has been reported. * * Only the child thread may call this method. @@ -140,7 +140,7 @@ public: /** * Reports a fatal error from the child thread. The main thread will - * pass this to the Reporter once received. Bro will terminate with a + * pass this to the Reporter once received. Zeek will terminate with a * core dump after the message has been reported. * * Only the child thread may call this method. @@ -151,7 +151,7 @@ public: /** * Reports a potential internal problem from the child thread. The - * main thread will pass this to the Reporter once received. Bro will + * main thread will pass this to the Reporter once received. Zeek will * continue normally. * * Only the child thread may call this method. @@ -162,7 +162,7 @@ public: /** * Reports an internal program error from the child thread. The main - * thread will pass this to the Reporter once received. Bro will + * thread will pass this to the Reporter once received. Zeek will * terminate with a core dump after the message has been reported. * * Only the child thread may call this method. @@ -353,7 +353,7 @@ private: }; /** - * Base class for all message between Bro's main process and a MsgThread. + * Base class for all message between Zeek's main process and a MsgThread. */ class Message { @@ -389,7 +389,7 @@ private: }; /** - * Base class for messages sent from Bro's main thread to a child MsgThread. + * Base class for messages sent from Zeek's main thread to a child MsgThread. */ class BasicInputMessage : public Message { @@ -404,7 +404,7 @@ protected: }; /** - * Base class for messages sent from a child MsgThread to Bro's main thread. + * Base class for messages sent from a child MsgThread to Zeek's main thread. */ class BasicOutputMessage : public Message { diff --git a/src/threading/Queue.h b/src/threading/Queue.h index 619177c405..b005476f57 100644 --- a/src/threading/Queue.h +++ b/src/threading/Queue.h @@ -21,7 +21,7 @@ namespace zeek::threading * The implementation uses multiple queues and reads/writes in rotary fashion * in an attempt to limit contention. * - * All Queue instances must be instantiated by Bro's main thread. + * All Queue instances must be instantiated by Zeek's main thread. * * TODO: Unclear how critical performance is for this qeueue. We could likely * optimize it further if helpful. diff --git a/src/threading/SerialTypes.cc b/src/threading/SerialTypes.cc index a68640bc2f..2418f718ed 100644 --- a/src/threading/SerialTypes.cc +++ b/src/threading/SerialTypes.cc @@ -73,7 +73,7 @@ std::string Field::TypeName() const { std::string n; - // We do not support tables, if the internal Bro type is table it + // We do not support tables, if the internal Zeek type is table it // always is a set. if ( type == TYPE_TABLE ) n = "set"; diff --git a/src/util.cc b/src/util.cc index ce8026ca2e..17fc590886 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1223,6 +1223,20 @@ char* get_word(char*& s) return w; } +TEST_CASE("util get_word 2") + { + char orig[10]; + strcpy(orig, "two words"); + + char* a = (char*)orig; + const char* b; + int blen; + + get_word(9, a, blen, b); + CHECK(blen == 3); + CHECK(a == b); + } + void get_word(int length, const char* s, int& pwlen, const char*& pw) { pw = s; diff --git a/src/util.h b/src/util.h index 019162a7bb..e715a665ec 100644 --- a/src/util.h +++ b/src/util.h @@ -169,7 +169,7 @@ void seed_random(unsigned int seed); void set_thread_name(const char* name, pthread_t tid = pthread_self()); // Each event source that may generate events gets an internally unique ID. -// This is always LOCAL for a local Bro. For remote event sources, it gets +// This is always LOCAL for a local Zeek. For remote event sources, it gets // assigned by the RemoteSerializer. // // FIXME: Find a nicer place for this type definition. diff --git a/src/version.c.in b/src/version.c.in index ef967f16aa..311a0ee58a 100644 --- a/src/version.c.in +++ b/src/version.c.in @@ -5,7 +5,7 @@ char version[] = "@VERSION@"; // A C function that has the current version built into its name. // One can link a shared library against this to ensure that it won't -// load if the version of the main Bro binary differs compared to +// load if the version of the main Zeek binary differs compared to // what the library was compiled against. const char* BRO_VERSION_FUNCTION() { diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index 871ced8e53..7a6b048571 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -5,6 +5,7 @@ #include "zeek/zeek-config.h" #include +#include #include #include #include @@ -18,6 +19,7 @@ #include "zeek/3rdparty/sqlite3.h" #define DOCTEST_CONFIG_IMPLEMENT + #include "zeek/3rdparty/doctest.h" #include "zeek/Anon.h" #include "zeek/DFA.h" @@ -26,6 +28,7 @@ #include "zeek/Desc.h" #include "zeek/Event.h" #include "zeek/EventRegistry.h" +#include "zeek/EventTrace.h" #include "zeek/File.h" #include "zeek/Frag.h" #include "zeek/Frame.h" @@ -85,6 +88,88 @@ int perftools_leaks = 0; int perftools_profile = 0; #endif +#if OPENSSL_VERSION_NUMBER < 0x10100000L +struct CRYPTO_dynlock_value + { + std::mutex mtx; + }; + +namespace + { + +std::unique_ptr ssl_mtx_tbl; + +void ssl_lock_fn(int mode, int n, const char*, int) + { + if ( mode & CRYPTO_LOCK ) + ssl_mtx_tbl[static_cast(n)].lock(); + else + ssl_mtx_tbl[static_cast(n)].unlock(); + } + +CRYPTO_dynlock_value* ssl_dynlock_create(const char*, int) + { + return new CRYPTO_dynlock_value; + } + +void ssl_dynlock_lock(int mode, CRYPTO_dynlock_value* ptr, const char*, int) + { + if ( mode & CRYPTO_LOCK ) + ptr->mtx.lock(); + else + ptr->mtx.unlock(); + } + +void ssl_dynlock_destroy(CRYPTO_dynlock_value* ptr, const char*, int) + { + delete ptr; + } + +void do_ssl_init() + { + ERR_load_crypto_strings(); + OPENSSL_add_all_algorithms_conf(); + SSL_library_init(); + SSL_load_error_strings(); + ssl_mtx_tbl.reset(new std::mutex[CRYPTO_num_locks()]); + CRYPTO_set_locking_callback(ssl_lock_fn); + CRYPTO_set_dynlock_create_callback(ssl_dynlock_create); + CRYPTO_set_dynlock_lock_callback(ssl_dynlock_lock); + CRYPTO_set_dynlock_destroy_callback(ssl_dynlock_destroy); + } + +void do_ssl_deinit() + { + ERR_free_strings(); + EVP_cleanup(); + CRYPTO_cleanup_all_ex_data(); + CRYPTO_set_locking_callback(nullptr); + CRYPTO_set_dynlock_create_callback(nullptr); + CRYPTO_set_dynlock_lock_callback(nullptr); + CRYPTO_set_dynlock_destroy_callback(nullptr); + ssl_mtx_tbl.reset(); + } + + } // namespace +#else +namespace + { + +void do_ssl_init() + { + OPENSSL_init_ssl(0, nullptr); + } + +void do_ssl_deinit() + { + ERR_free_strings(); + EVP_cleanup(); + CRYPTO_cleanup_all_ex_data(); + } + + } // namespace +#endif + zeek::ValManager* zeek::val_mgr = nullptr; zeek::packet_analysis::Manager* zeek::packet_mgr = nullptr; zeek::analyzer::Manager* zeek::analyzer_mgr = nullptr; @@ -281,13 +366,13 @@ static void done_with_network() ZEEK_LSAN_DISABLE(); } -static void terminate_bro() +static void terminate_zeek() { - util::detail::set_processing_status("TERMINATING", "terminate_bro"); + util::detail::set_processing_status("TERMINATING", "terminate_zeek"); run_state::terminating = true; - iosource_mgr->Wakeup("terminate_bro"); + iosource_mgr->Wakeup("terminate_zeek"); // File analysis termination may produce events, so do it early on in // the termination process. @@ -299,12 +384,23 @@ static void terminate_bro() event_mgr.Enqueue(zeek_done, Args{}); timer_mgr->Expire(); + + // Drain() limits how many "generations" of newly created events + // it will process. When we're terminating, however, we're okay + // with long chains of events, and this makes the workings of + // event-tracing simpler. + // + // That said, we also need to ensure that it runs at least once, + // as it has side effects such as tickling triggers. event_mgr.Drain(); + while ( event_mgr.HasEvents() ) + event_mgr.Drain(); + if ( profiling_logger ) { // FIXME: There are some occasional crashes in the memory - // allocation code when killing Bro. Disabling this for now. + // allocation code when killing Zeek. Disabling this for now. if ( ! (signal_val == SIGTERM || signal_val == SIGINT) ) profiling_logger->Log(); @@ -318,17 +414,18 @@ static void terminate_bro() input_mgr->Terminate(); thread_mgr->Terminate(); broker_mgr->Terminate(); - dns_mgr->Terminate(); event_mgr.Drain(); plugin_mgr->FinishPlugins(); + finish_script_execution(); + delete zeekygen_mgr; delete packet_mgr; delete analyzer_mgr; delete file_mgr; - // broker_mgr, timer_mgr, and supervisor are deleted via iosource_mgr + // broker_mgr, timer_mgr, supervisor, and dns_mgr are deleted via iosource_mgr delete iosource_mgr; delete event_registry; delete log_mgr; @@ -384,6 +481,22 @@ static std::vector get_script_signature_files() return rval; } +// Helper for masking/unmasking the set of signals that apply to our signal +// handlers: sig_handler() in this file, as well as stem_signal_handler() and +// supervisor_signal_handler() in the Supervisor. +static void set_signal_mask(bool do_block) + { + sigset_t mask_set; + + sigemptyset(&mask_set); + sigaddset(&mask_set, SIGCHLD); + sigaddset(&mask_set, SIGTERM); + sigaddset(&mask_set, SIGINT); + + int res = pthread_sigmask(do_block ? SIG_BLOCK : SIG_UNBLOCK, &mask_set, 0); + assert(res == 0); + } + SetupResult setup(int argc, char** argv, Options* zopts) { ZEEK_LSAN_DISABLE(); @@ -430,6 +543,8 @@ SetupResult setup(int argc, char** argv, Options* zopts) if ( dns_type == DNS_DEFAULT && fake_dns() ) dns_type = DNS_FAKE; + dns_mgr = new DNS_Mgr(dns_type); + RETSIGTYPE (*oldhandler)(int); zeek_script_prefixes = options.script_prefixes; @@ -486,6 +601,12 @@ SetupResult setup(int argc, char** argv, Options* zopts) } #endif + // Mask signals relevant for our signal handlers here. We unmask them + // again further down, when all components that launch threads have done + // so. The launched threads inherit the active signal mask and thus + // prevent our signal handlers from running in unintended threads. + set_signal_mask(true); + if ( options.supervisor_mode ) { Supervisor::Config cfg = {}; @@ -506,10 +627,7 @@ SetupResult setup(int argc, char** argv, Options* zopts) // DEBUG_MSG("HMAC key: %s\n", md5_digest_print(shared_hmac_md5_key)); init_hash_function(); - ERR_load_crypto_strings(); - OPENSSL_add_all_algorithms_conf(); - SSL_library_init(); - SSL_load_error_strings(); + do_ssl_init(); // FIXME: On systems that don't provide /dev/urandom, OpenSSL doesn't // seed the PRNG. We should do this here (but at least Linux, FreeBSD @@ -563,8 +681,6 @@ SetupResult setup(int argc, char** argv, Options* zopts) push_scope(nullptr, nullptr); - dns_mgr = new DNS_Mgr(dns_type); - // It would nice if this were configurable. This is similar to the // chicken and the egg problem. It would be configurable by parsing // policy, but we can't parse policy without DNS resolution. @@ -579,7 +695,7 @@ SetupResult setup(int argc, char** argv, Options* zopts) file_mgr = new file_analysis::Manager(); auto broker_real_time = ! options.pcap_file && ! options.deterministic_mode; broker_mgr = new Broker::Manager(broker_real_time); - telemetry_mgr = broker_mgr->NewTelemetryManager().release(); + telemetry_mgr = new telemetry::Manager; trigger_mgr = new trigger::Manager(); plugin_mgr->InitPreScript(); @@ -656,6 +772,9 @@ SetupResult setup(int argc, char** argv, Options* zopts) }; auto ipbb = make_intrusive(init_bifs, ipbid->Name(), false); + if ( options.event_trace_file ) + etm = make_unique(*options.event_trace_file); + run_state::is_parsing = true; yyparse(); run_state::is_parsing = false; @@ -717,9 +836,12 @@ SetupResult setup(int argc, char** argv, Options* zopts) file_mgr->InitPostScript(); dns_mgr->InitPostScript(); + // dns_mgr->LookupAddr("17.253.144.10"); + #ifdef USE_PERFTOOLS_DEBUG } #endif + set_signal_mask(false); if ( reporter->Errors() > 0 ) { @@ -819,9 +941,12 @@ SetupResult setup(int argc, char** argv, Options* zopts) if ( (oldhandler = setsignal(SIGHUP, sig_handler)) != SIG_DFL ) (void)setsignal(SIGHUP, oldhandler); + // If we were priming the DNS cache (i.e. -P was passed as an argument), flush anything + // remaining to be resolved and save the cache to disk. We can just exit now because + // we've done everything we need to do. The run loop isn't started in this case, so + // nothing else should be happening. if ( dns_type == DNS_PRIME ) { - dns_mgr->Verify(); dns_mgr->Resolve(); if ( ! dns_mgr->Save() ) @@ -941,13 +1066,11 @@ int cleanup(bool did_run_loop) done_with_network(); run_state::detail::delete_run(); - terminate_bro(); + terminate_zeek(); sqlite3_shutdown(); - ERR_free_strings(); - EVP_cleanup(); - CRYPTO_cleanup_all_ex_data(); + do_ssl_deinit(); // Close files after net_delete(), because net_delete() // might write to connection content files. @@ -972,7 +1095,7 @@ void zeek_terminate_loop(const char* reason) zeek::detail::done_with_network(); delete_run(); - zeek::detail::terminate_bro(); + zeek::detail::terminate_zeek(); // Close files after net_delete(), because net_delete() // might write to connection content files. diff --git a/src/zeek.bif b/src/zeek.bif index 3d28df7011..4665831d65 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -321,7 +321,7 @@ static int next_fmt(const char*& fmt, const zeek::Args* args, zeek::ODesc* d, in ## ## Returns: The wall-clock time. ## -## .. zeek:see:: network_time +## .. zeek:see:: network_time set_network_time function current_time%(%): time %{ return zeek::make_intrusive(zeek::util::current_time()); @@ -333,12 +333,26 @@ function current_time%(%): time ## ## Returns: The timestamp of the packet processed. ## -## .. zeek:see:: current_time +## .. zeek:see:: current_time set_network_time function network_time%(%): time %{ return zeek::make_intrusive(zeek::run_state::network_time); %} +## Sets the timestamp associated with the last packet processed. Used for +## event replaying. +## +## nt: The time to which to set "network time". +## +## Returns: The timestamp of the packet processed. +## +## .. zeek:see:: current_time network_time +function set_network_time%(nt: time%): bool + %{ + zeek::run_state::network_time = nt; + return zeek::val_mgr->True(); + %} + ## Returns a system environment variable. ## ## var: The name of the variable whose value to request. @@ -2574,7 +2588,7 @@ function count_to_port%(num: count, proto: transport_proto%): port ## Returns: The :zeek:type:`string` *ip* as :zeek:type:`addr`, or the unspecified ## address ``::`` if the input string does not parse correctly. ## -## .. zeek:see:: to_count to_int to_port count_to_v4_addr raw_bytes_to_v4_addr +## .. zeek:see:: to_count to_int to_port count_to_v4_addr raw_bytes_to_v4_addr raw_bytes_to_v6_addr ## to_subnet function to_addr%(ip: string%): addr %{ @@ -2614,7 +2628,7 @@ function is_valid_ip%(ip: string%): bool ## Returns: The *sn* string as a :zeek:type:`subnet`, or the unspecified subnet ## ``::/0`` if the input string does not parse correctly. ## -## .. zeek:see:: to_count to_int to_port count_to_v4_addr raw_bytes_to_v4_addr +## .. zeek:see:: to_count to_int to_port count_to_v4_addr raw_bytes_to_v4_addr raw_bytes_to_v6_addr ## to_addr function to_subnet%(sn: string%): subnet %{ @@ -2696,7 +2710,7 @@ function to_double%(str: string%): double ## ## Returns: The :zeek:type:`count` *ip* as :zeek:type:`addr`. ## -## .. zeek:see:: raw_bytes_to_v4_addr to_addr to_subnet +## .. zeek:see:: raw_bytes_to_v4_addr to_addr to_subnet raw_bytes_to_v6_addr function count_to_v4_addr%(ip: count%): addr %{ if ( ip > 4294967295LU ) @@ -2733,6 +2747,34 @@ function raw_bytes_to_v4_addr%(b: string%): addr return zeek::make_intrusive(htonl(a)); %} +## Converts a :zeek:type:`string` of bytes into an IPv6 address. In particular, +## this function interprets the first 16 bytes of the string as an IPv6 address +## in network order. +## +## b: The raw bytes (:zeek:type:`string`) to convert. +## +## Returns: The byte :zeek:type:`string` *b* as :zeek:type:`addr`. +## +## .. zeek:see:: raw_bytes_to_v6_addr to_addr to_subnet +function raw_bytes_to_v6_addr%(x: string%): addr + %{ + uint32_t bytes[4] = {0, 0, 0, 0}; + + if ( x->Len() < 16 ) + zeek::emit_builtin_error("too short a string as input to raw_bytes_to_v6_addr()"); + + else + { + const u_char* xp = x->Bytes(); + bytes[0] = htonl((xp[0] << 24) | (xp[1] << 16) | (xp[2] << 8) | xp[3]); + bytes[1] = htonl((xp[0+4] << 24) | (xp[1+4] << 16) | (xp[2+4] << 8) | xp[3+4]); + bytes[2] = htonl((xp[0+8] << 24) | (xp[1+8] << 16) | (xp[2+8] << 8) | xp[3+8]); + bytes[3] = htonl((xp[0+12] << 24) | (xp[1+12] << 16) | (xp[2+12] << 8) | xp[3+12]); + } + + return zeek::make_intrusive(bytes); + %} + ## Converts a :zeek:type:`string` to a :zeek:type:`port`. ## ## s: The :zeek:type:`string` to convert. @@ -3600,8 +3642,8 @@ function dump_packet%(pkt: pcap_packet, file_name: string%) : bool class LookupHostCallback : public zeek::detail::DNS_Mgr::LookupCallback { public: - LookupHostCallback(zeek::detail::trigger::Trigger* arg_trigger, const zeek::detail::CallExpr* arg_call, - bool arg_lookup_name) + LookupHostCallback(zeek::detail::trigger::Trigger* arg_trigger, + const zeek::detail::CallExpr* arg_call, bool arg_lookup_name) { Ref(arg_trigger); trigger = arg_trigger; @@ -3615,7 +3657,7 @@ public: } // Overridden from zeek::detail::DNS_Mgr:Lookup:Callback. - virtual void Resolved(const char* name) + void Resolved(const std::string& name) override { zeek::Val* result = new zeek::StringVal(name); trigger->Cache(call, result); @@ -3623,14 +3665,14 @@ public: trigger->Release(); } - virtual void Resolved(zeek::TableVal* addrs) + void Resolved(zeek::TableValPtr addrs) override { // No Ref() for addrs. - trigger->Cache(call, addrs); + trigger->Cache(call, addrs.get()); trigger->Release(); } - virtual void Timeout() + void Timeout() override { if ( lookup_name ) { @@ -3682,7 +3724,7 @@ function lookup_addr%(host: addr%) : string frame->SetDelayed(); trigger->Hold(); - zeek::detail::dns_mgr->AsyncLookupAddr(host->AsAddr(), + zeek::detail::dns_mgr->LookupAddr(host->AsAddr(), new LookupHostCallback(trigger, frame->GetCall(), true)); return nullptr; %} @@ -3711,7 +3753,7 @@ function lookup_hostname_txt%(host: string%) : string frame->SetDelayed(); trigger->Hold(); - zeek::detail::dns_mgr->AsyncLookupNameText(host->CheckString(), + zeek::detail::dns_mgr->Lookup(host->CheckString(), T_TXT, new LookupHostCallback(trigger, frame->GetCall(), true)); return nullptr; %} @@ -3740,7 +3782,7 @@ function lookup_hostname%(host: string%) : addr_set frame->SetDelayed(); trigger->Hold(); - zeek::detail::dns_mgr->AsyncLookupName(host->CheckString(), + zeek::detail::dns_mgr->LookupHost(host->CheckString(), new LookupHostCallback(trigger, frame->GetCall(), false)); return nullptr; %} diff --git a/src/zeekygen/Manager.cc b/src/zeekygen/Manager.cc index 903d543e6b..5eb2d7b668 100644 --- a/src/zeekygen/Manager.cc +++ b/src/zeekygen/Manager.cc @@ -61,11 +61,11 @@ Manager::Manager(const string& arg_config, const string& bro_command) if ( getenv("ZEEK_DISABLE_ZEEKYGEN") ) disabled = true; - // If running bro without the "-X" option, then we don't need bro_mtime. + // If running Zeek without the "-X" option, then we don't need bro_mtime. if ( disabled || arg_config.empty() ) return; - // Find the absolute or relative path to bro by checking each PATH + // Find the absolute or relative path to Zeek by checking each PATH // component and also the current directory (so that this works if // bro_command is a relative path). const char* env_path = getenv("PATH"); @@ -73,9 +73,9 @@ Manager::Manager(const string& arg_config, const string& bro_command) string path_to_bro = util::find_file(bro_command, path); struct stat s; - // One way that find_file() could fail is when bro is located in + // One way that find_file() could fail is when Zeek is located in // a PATH component that starts with a tilde (such as "~/bin"). A simple - // workaround is to just run bro with a relative or absolute path. + // workaround is to just run Zeek with a relative or absolute path. if ( path_to_bro.empty() || stat(path_to_bro.c_str(), &s) < 0 ) reporter->InternalError("Zeekygen can't get mtime of zeek binary %s (try again by " "specifying the absolute or relative path to Zeek): %s", diff --git a/src/zeekygen/Manager.h b/src/zeekygen/Manager.h index 54202d6b08..772079df3d 100644 --- a/src/zeekygen/Manager.h +++ b/src/zeekygen/Manager.h @@ -58,8 +58,8 @@ public: * Ctor. * @param config Path to a Zeekygen config file if documentation is to be * written to disk. - * @param bro_command The command used to invoke the bro process. - * It's used when checking for out-of-date targets. If the bro binary is + * @param bro_command The command used to invoke the Zeek process. + * It's used when checking for out-of-date targets. If the Zeek binary is * newer then a target, it needs to be rebuilt. */ Manager(const std::string& config, const std::string& bro_command); @@ -88,15 +88,15 @@ public: void GenerateDocs() const; /** - * Register Bro script for which information/documentation will be gathered. - * @param path Absolute path to Bro script. + * Register Zeek script for which information/documentation will be gathered. + * @param path Absolute path to Zeek script. */ void Script(const std::string& path); /** - * Register Bro script dependency ("@load"). - * @param path Absolute path to a Bro script. - * @param dep Absolute path to a Bro script being "@load"d from script given + * Register Zeek script dependency ("@load"). + * @param path Absolute path to a Zeek script. + * @param dep Absolute path to a Zeek script being "@load"d from script given * by \a path. */ void ScriptDependency(const std::string& path, const std::string& dep); @@ -104,7 +104,7 @@ public: /** * Register a module usage (script may export identifiers in to the * module namespace). - * @param path Absolute path to a Bro script. + * @param path Absolute path to a Zeek script. * @param module The module which script given by \a path is using. */ void ModuleUsage(const std::string& path, const std::string& module); @@ -128,7 +128,7 @@ public: * gathered. * @param id The identifier of the record type which has the field. * @param field The field name/type information. - * @param path Absolute path to a Bro script in which this field is + * @param path Absolute path to a Zeek script in which this field is * declared. This can be different from the place where the record type * is declared due to redefs. * @param from_redef The field is from a record redefinition. @@ -139,7 +139,7 @@ public: /** * Register a redefinition of a particular identifier. * @param id The identifier being redef'd. - * @param path Absolute path to a Bro script doing the redef. + * @param path Absolute path to a Zeek script doing the redef. * @param ic The initialization class that was used (e.g. =, +=, -=). * @param init_expr The intiialization expression that was used. */ @@ -150,7 +150,7 @@ public: /** * Register Zeekygen script summary content. - * @param path Absolute path to a Bro script. + * @param path Absolute path to a Zeek script. * @param comment Zeekygen-style summary comment ("##!") to associate with * script given by \a path. */ @@ -209,7 +209,7 @@ public: * @param target_file output file of a Zeekygen target. * @param dependencies all dependencies of the target. * @return true if modification time of \a target_file is newer than - * modification time of Bro binary, Zeekygen config file, and all + * modification time of Zeek binary, Zeekygen config file, and all * dependencies, else false. */ template diff --git a/src/zeekygen/ScriptInfo.cc b/src/zeekygen/ScriptInfo.cc index 61e2316cf3..f44232014f 100644 --- a/src/zeekygen/ScriptInfo.cc +++ b/src/zeekygen/ScriptInfo.cc @@ -340,7 +340,7 @@ void ScriptInfo::DoInitPostScript() state_vars.push_back(info); } - // The following enum types are automatically created internally in Bro, + // The following enum types are automatically created internally in Zeek, // so just manually associating them with scripts for now. if ( name == "base/frameworks/input/main.zeek" ) { diff --git a/src/zeekygen/ScriptInfo.h b/src/zeekygen/ScriptInfo.h index 6ad92fcb95..48ee3fde24 100644 --- a/src/zeekygen/ScriptInfo.h +++ b/src/zeekygen/ScriptInfo.h @@ -27,7 +27,7 @@ using id_info_set = std::set; using id_info_list = std::list; /** - * Information about a Bro script. + * Information about a Zeek script. */ class ScriptInfo : public Info { diff --git a/src/zeekygen/Target.cc b/src/zeekygen/Target.cc index a4e62b742d..ef5b32d092 100644 --- a/src/zeekygen/Target.cc +++ b/src/zeekygen/Target.cc @@ -260,7 +260,7 @@ void AnalyzerTarget::DoFindDependencies(const std::vector& infos) { // TODO: really should add to dependency list the tag type's ID and // all bif items for matching analyzer plugins, but that's all dependent - // on the bro binary itself, so I'm cheating. + // on the Zeek binary itself, so I'm cheating. } void AnalyzerTarget::DoGenerate() const diff --git a/src/zeekygen/Target.h b/src/zeekygen/Target.h index cbd71b518a..7ed0e08815 100644 --- a/src/zeekygen/Target.h +++ b/src/zeekygen/Target.h @@ -43,7 +43,7 @@ struct TargetFile /** * A Zeekygen target abstract base class. A target is generally any portion of - * documentation that Bro can build. It's identified by a type (e.g. script, + * documentation that Zeek can build. It's identified by a type (e.g. script, * identifier, package), a pattern (e.g. "example.zeek", "HTTP::Info"), and * a path to an output file. */ diff --git a/testing/benchmark/broker/node.zeek b/testing/benchmark/broker/node.zeek new file mode 100644 index 0000000000..14c7afd34a --- /dev/null +++ b/testing/benchmark/broker/node.zeek @@ -0,0 +1,33 @@ +redef exit_only_after_terminate = T; + +global event_count = 0; + +global event_1: event(val: count); + +event event_1(value: count) + { + ++event_count; + } + +event bye_bye() + { + print "received bye-bye event"; + terminate(); + } + +event print_stats() + { + print "received ", event_count, " events/s"; + event_count = 0; + schedule 1sec { print_stats() }; + } + +event zeek_init() + { + local broker_port = to_port(getenv("BROKER_PORT")); + print "trying to connect to port ", broker_port; + Broker::subscribe("benchmark/terminate"); + Broker::subscribe("benchmark/events"); + Broker::peer("127.0.0.1", broker_port); + schedule 1sec { print_stats() }; + } diff --git a/testing/benchmark/broker/sender.zeek b/testing/benchmark/broker/sender.zeek new file mode 100644 index 0000000000..da020e69e6 --- /dev/null +++ b/testing/benchmark/broker/sender.zeek @@ -0,0 +1,28 @@ +redef exit_only_after_terminate = T; + +global value = 0; + +global event_1: event(val: count); + +event bye_bye() + { + print "received bye-bye event"; + terminate(); + } + +event publish_next() + { + Broker::publish("benchmark/events", event_1, value); + ++value; + schedule 1msec { publish_next() }; + } + +event zeek_init() + { + local broker_port = to_port(getenv("BROKER_PORT")); + print fmt("trying to connect to port %s", broker_port); + Broker::subscribe("benchmark/terminate"); + Broker::peer("127.0.0.1", broker_port); + schedule 250usec { publish_next() }; + } + diff --git a/testing/benchmark/broker/server.zeek b/testing/benchmark/broker/server.zeek new file mode 100644 index 0000000000..d85e7e754f --- /dev/null +++ b/testing/benchmark/broker/server.zeek @@ -0,0 +1,33 @@ +redef exit_only_after_terminate = T; + +global event_count = 0; + +global event_1: event(val: count); + +event event_1(value: count) + { + ++event_count; + } + +event bye_bye() + { + print "received bye-bye event"; + terminate(); + } + +event print_stats() + { + print "received ", event_count, " events/s"; + event_count = 0; + schedule 1sec { print_stats() }; + } + +event zeek_init() + { + local broker_port = to_port(getenv("BROKER_PORT")); + Broker::subscribe("benchmark/terminate"); + Broker::subscribe("benchmark/events"); + Broker::listen("127.0.0.1", broker_port); + print fmt("listening on port %d", broker_port); + schedule 1sec { print_stats() }; + } diff --git a/testing/btest/Baseline.zam/bifs.to_addr/error b/testing/btest/Baseline.zam/bifs.to_addr/error new file mode 100644 index 0000000000..60ef944fd6 --- /dev/null +++ b/testing/btest/Baseline.zam/bifs.to_addr/error @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/to_addr.zeek, line 20: failed converting string to IP address (not an IP) diff --git a/testing/btest/Baseline.zam/bifs.to_addr/output b/testing/btest/Baseline.zam/bifs.to_addr/output new file mode 100644 index 0000000000..0ee35ee59b --- /dev/null +++ b/testing/btest/Baseline.zam/bifs.to_addr/output @@ -0,0 +1,10 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +to_addr(0.0.0.0) = 0.0.0.0 (SUCCESS) +to_addr(1.2.3.4) = 1.2.3.4 (SUCCESS) +to_addr(01.02.03.04) = 1.2.3.4 (SUCCESS) +to_addr(001.002.003.004) = 1.2.3.4 (SUCCESS) +to_addr(10.20.30.40) = 10.20.30.40 (SUCCESS) +to_addr(100.200.30.40) = 100.200.30.40 (SUCCESS) +to_addr(10.0.0.0) = 10.0.0.0 (SUCCESS) +to_addr(10.00.00.000) = 10.0.0.0 (SUCCESS) +to_addr(not an IP) = :: (SUCCESS) diff --git a/testing/btest/Baseline.zam/bifs.to_double_from_string/error b/testing/btest/Baseline.zam/bifs.to_double_from_string/error new file mode 100644 index 0000000000..55535e96f5 --- /dev/null +++ b/testing/btest/Baseline.zam/bifs.to_double_from_string/error @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/to_double_from_string.zeek, line 15: bad conversion to double (NotADouble) +error in <...>/to_double_from_string.zeek, line 16: bad conversion to double () diff --git a/testing/btest/Baseline.zam/bifs.to_double_from_string/output b/testing/btest/Baseline.zam/bifs.to_double_from_string/output new file mode 100644 index 0000000000..16797583f1 --- /dev/null +++ b/testing/btest/Baseline.zam/bifs.to_double_from_string/output @@ -0,0 +1,6 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +to_double(3.14) = 3.14 (SUCCESS) +to_double(-3.14) = -3.14 (SUCCESS) +to_double(0) = 0.0 (SUCCESS) +to_double(NotADouble) = 0.0 (SUCCESS) +to_double() = 0.0 (SUCCESS) diff --git a/testing/btest/Baseline/bifs.enum_to_int/out b/testing/btest/Baseline/bifs.enum_to_int/out index 203ded3505..2fcc6a9e5d 100644 --- a/testing/btest/Baseline/bifs.enum_to_int/out +++ b/testing/btest/Baseline/bifs.enum_to_int/out @@ -5,3 +5,15 @@ C, 2 AV, 10 BV, 11 CV, 12 +T +T +T +T +T +T +T +T +T +T +T +T diff --git a/testing/btest/Baseline/bifs.is_ascii/out b/testing/btest/Baseline/bifs.is_ascii/out index 1956db8698..34333b473a 100644 --- a/testing/btest/Baseline/bifs.is_ascii/out +++ b/testing/btest/Baseline/bifs.is_ascii/out @@ -1,3 +1,4 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. F T +T diff --git a/testing/btest/Baseline/bifs.raw_bytes_to_v6_addr/out b/testing/btest/Baseline/bifs.raw_bytes_to_v6_addr/out new file mode 100644 index 0000000000..059b996fd8 --- /dev/null +++ b/testing/btest/Baseline/bifs.raw_bytes_to_v6_addr/out @@ -0,0 +1,4 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +4142:4344:4546:4748:494b:4c4d:4e4f:5051 +:: +dada:beef::4e5e:cff:fe6a:8671 diff --git a/testing/btest/Baseline/bifs.string_utils/out b/testing/btest/Baseline/bifs.string_utils/out index 147d0e9ea7..7e51c14446 100644 --- a/testing/btest/Baseline/bifs.string_utils/out +++ b/testing/btest/Baseline/bifs.string_utils/out @@ -17,14 +17,17 @@ Content checking ---------------- is_num abc : 0 is_num 123 : 1 +is_num '' : 0 is_alpha ab : 1 is_alpha 1a : 0 is_alpha a1 : 0 +is_alpha '' : 0 is_alnum ab : 1 is_alnum 1a : 1 is_alnum a1 : 1 is_alnum 12 : 1 is_alnum ##12: 0 +is_alnum '' : 0 String counting (input str 'aabbaa') ------------------------------------ diff --git a/testing/btest/Baseline/broker.disconnect/recv.recv.out b/testing/btest/Baseline/broker.disconnect/recv.recv.out index c512d83470..73c3301683 100644 --- a/testing/btest/Baseline/broker.disconnect/recv.recv.out +++ b/testing/btest/Baseline/broker.disconnect/recv.recv.out @@ -1,3 +1,4 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +endpoint discovered, found a new peer in the network peer added, handshake successful receiver got event, 1 diff --git a/testing/btest/Baseline/broker.disconnect/recv2.recv2.out b/testing/btest/Baseline/broker.disconnect/recv2.recv2.out index 335187e4b6..6091d6a0e4 100644 --- a/testing/btest/Baseline/broker.disconnect/recv2.recv2.out +++ b/testing/btest/Baseline/broker.disconnect/recv2.recv2.out @@ -1,3 +1,4 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +endpoint discovered, found a new peer in the network peer added, handshake successful receiver got event, 2 diff --git a/testing/btest/Baseline/broker.disconnect/send.send.out b/testing/btest/Baseline/broker.disconnect/send.send.out index 95f77841f4..ce37aaa5a1 100644 --- a/testing/btest/Baseline/broker.disconnect/send.send.out +++ b/testing/btest/Baseline/broker.disconnect/send.send.out @@ -1,5 +1,9 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +endpoint discovered, found a new peer in the network peer added, handshake successful peer lost, lost connection to remote peer +endpoint unreachable, lost the last path +endpoint discovered, found a new peer in the network peer added, handshake successful peer lost, lost connection to remote peer +endpoint unreachable, lost the last path diff --git a/testing/btest/Baseline/broker.error/send.out b/testing/btest/Baseline/broker.error/send.out index da3c12e129..33955b2d4a 100644 --- a/testing/btest/Baseline/broker.error/send.out +++ b/testing/btest/Baseline/broker.error/send.out @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error, Broker::PEER_INVALID, (invalid-node, *1.2.3.4:1947, "cannot unpeer from unknown peer") +error, Broker::PEER_INVALID, (00000000-0000-0000-0000-000000000000, *1.2.3.4:1947, "cannot unpeer from unknown peer") diff --git a/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out b/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out index 861f3823e9..0085e61336 100644 --- a/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out +++ b/testing/btest/Baseline/broker.ssl_auth_failure/send.send.out @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -sender error: code=Broker::PEER_UNAVAILABLE msg=(invalid-node, *, "unable to connect to remote peer") +sender error: code=Broker::PEER_UNAVAILABLE msg=(00000000-0000-0000-0000-000000000000, *, "unable to connect to remote peer") diff --git a/testing/btest/Baseline/core.fake_dns/out b/testing/btest/Baseline/core.fake_dns/out index fe6c13aeb8..45539d8098 100644 --- a/testing/btest/Baseline/core.fake_dns/out +++ b/testing/btest/Baseline/core.fake_dns/out @@ -1,10 +1,10 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. { 51f3:f001:5b82:e802:c401:6750:7b95:89bb, -4cc7:de52:d869:b2f9:f215:19b8:c828:3bdd, -7a5f:b783:9808:380e:b1a2:ce20:b58e:2a4a +7a5f:b783:9808:380e:b1a2:ce20:b58e:2a4a, +4cc7:de52:d869:b2f9:f215:19b8:c828:3bdd } -lookup_hostname_txt, fake_text_lookup_result_bro.wp.dg.cx +lookup_hostname_txt, fake_lookup_result_T_TXT_bro.wp.dg.cx lookup_hostname, { ce06:236:f21f:587:8c10:121d:c47d:b412 } diff --git a/testing/btest/Baseline/core.scalar-vector/out b/testing/btest/Baseline/core.scalar-vector/out new file mode 100644 index 0000000000..4f5fc89efa --- /dev/null +++ b/testing/btest/Baseline/core.scalar-vector/out @@ -0,0 +1,16 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +warning in <...>/scalar-vector.zeek, line 11: mixing vector and scalar operands is deprecated (string) (vector) +warning in <...>/scalar-vector.zeek, line 12: mixing vector and scalar operands is deprecated (vector) (string) +warning in <...>/scalar-vector.zeek, line 13: mixing vector and scalar operands is deprecated (string) (vector) +warning in <...>/scalar-vector.zeek, line 17: mixing vector and scalar operands is deprecated (count) (vector) +warning in <...>/scalar-vector.zeek, line 18: mixing vector and scalar operands is deprecated (count) (vector) +warning in <...>/scalar-vector.zeek, line 19: mixing vector and scalar operands is deprecated (vector) (count) +warning in <...>/scalar-vector.zeek, line 20: mixing vector and scalar operands is deprecated (vector) (count) +[F, T, F] +[aa, ba, ca] +[aa, ab, ac] +[F, T, F] +[2, 4, 6] +[1, 0, 1] +[0, 1, 1] +[1, 2, 3, 1] diff --git a/testing/btest/Baseline/coverage.bare-mode-errors/errors b/testing/btest/Baseline/coverage.bare-mode-errors/errors index 31f9346536..bc9bd28f83 100644 --- a/testing/btest/Baseline/coverage.bare-mode-errors/errors +++ b/testing/btest/Baseline/coverage.bare-mode-errors/errors @@ -1,9 +1,9 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. ### NOTE: This file has been sorted with diff-sort. -warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:12 "Remove in v5.1. Use log-certs-base64.zeek instead." +warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:15 "Remove in v5.1. Use log-certs-base64.zeek instead." warning in <...>/extract-certs-pem.zeek, line 1: deprecated script loaded from command line arguments "Remove in v5.1. Use log-certs-base64.zeek instead." -warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:59 ("Remove in v5.1. OCSP logging is now enabled by default") -warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:59 ("Remove in v5.1. OCSP logging is now enabled by default") +warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default") +warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from <...>/test-all-policy.zeek:65 ("Remove in v5.1. OCSP logging is now enabled by default") warning in <...>/log-ocsp.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. OCSP logging is now enabled by default") -warning in <...>/notary.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:4 ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") +warning in <...>/notary.zeek, line 1: deprecated script loaded from <...>/__load__.zeek:5 ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") warning in <...>/notary.zeek, line 1: deprecated script loaded from command line arguments ("Remove in v5.1. Please switch to other more modern approaches like SCT validation (validate-sct.zeek).") diff --git a/testing/btest/Baseline/language.cross-product-init/output b/testing/btest/Baseline/language.cross-product-init/output index 3705de05a1..c4cff483cc 100644 --- a/testing/btest/Baseline/language.cross-product-init/output +++ b/testing/btest/Baseline/language.cross-product-init/output @@ -1,7 +1,7 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. { [bar, 1.2.0.0/19] , -[foo, 1.2.0.0/19] , [foo, 5.6.0.0/21] , +[foo, 1.2.0.0/19] , [bar, 5.6.0.0/21] } diff --git a/testing/btest/Baseline/language.expire-func-type-check/output b/testing/btest/Baseline/language.expire-func-type-check/output index 05eff0b0fe..091ce43451 100644 --- a/testing/btest/Baseline/language.expire-func-type-check/output +++ b/testing/btest/Baseline/language.expire-func-type-check/output @@ -1,23 +1,9 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. warning in <...>/expire-func-type-check.zeek, line 16: Wrong number of arguments for function. Expected 2, got 0. (function() : interval) error in <...>/expire-func-type-check.zeek, line 38: &expire_func argument type clash (&expire_func=invalid_expire_func_no_params) -warning in <...>/expire-func-type-check.zeek, line 16: Wrong number of arguments for function. Expected 2, got 0. (function() : interval) -error in <...>/expire-func-type-check.zeek, line 38: &expire_func argument type clash (&expire_func=invalid_expire_func_no_params) -warning in <...>/expire-func-type-check.zeek, line 16: Wrong number of arguments for function. Expected 2, got 0. (function() : interval) -error in <...>/expire-func-type-check.zeek, line 38: &expire_func argument type clash (&expire_func=invalid_expire_func_no_params) -error in <...>/expire-func-type-check.zeek, line 39: &expire_func must yield a value of type interval (&expire_func=invalid_expire_func_no_return) -error in <...>/expire-func-type-check.zeek, line 39: &expire_func must yield a value of type interval (&expire_func=invalid_expire_func_no_return) error in <...>/expire-func-type-check.zeek, line 39: &expire_func must yield a value of type interval (&expire_func=invalid_expire_func_no_return) warning in <...>/expire-func-type-check.zeek, line 22: Wrong number of arguments for function. Expected 3, got 2. (function(t:table[addr,port] of set[addr]; s:set[addr,port];) : interval) error in <...>/expire-func-type-check.zeek, line 40: &expire_func argument type clash (&expire_func=invalid_expire_func_index_params) -warning in <...>/expire-func-type-check.zeek, line 22: Wrong number of arguments for function. Expected 3, got 2. (function(t:table[addr,port] of set[addr]; s:set[addr,port];) : interval) -error in <...>/expire-func-type-check.zeek, line 40: &expire_func argument type clash (&expire_func=invalid_expire_func_index_params) -error in <...>/expire-func-type-check.zeek, line 41: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_an_event) -error in <...>/expire-func-type-check.zeek, line 41: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_an_event) error in <...>/expire-func-type-check.zeek, line 41: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_an_event) error in <...>/expire-func-type-check.zeek, line 42: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_a_hook) -error in <...>/expire-func-type-check.zeek, line 42: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_a_hook) -error in <...>/expire-func-type-check.zeek, line 42: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_a_hook) -error in <...>/expire-func-type-check.zeek, line 43: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_a_number) -error in <...>/expire-func-type-check.zeek, line 43: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_a_number) error in <...>/expire-func-type-check.zeek, line 43: &expire_func attribute is not a function (&expire_func=invalid_expire_func_because_its_a_number) diff --git a/testing/btest/Baseline/language.init-integration/.stderr b/testing/btest/Baseline/language.init-integration/.stderr new file mode 100644 index 0000000000..49d861c74c --- /dev/null +++ b/testing/btest/Baseline/language.init-integration/.stderr @@ -0,0 +1 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. diff --git a/testing/btest/Baseline/language.init-integration/out b/testing/btest/Baseline/language.init-integration/out new file mode 100644 index 0000000000..bf0bf88bb3 --- /dev/null +++ b/testing/btest/Baseline/language.init-integration/out @@ -0,0 +1,55 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +init_key in state: 1 +init_key2 in state2: 1 +{ +[worker-2] = [node_type=Cluster::WORKER, ip=127.0.0.1, p=7/tcp, manager=manager-1], +[worker-3] = [node_type=Cluster::WORKER, ip=1.2.3.4, p=9/udp, manager=], +[manager-1] = [node_type=Cluster::MANAGER, ip=127.0.0.1, p=3/tcp, manager=], +[worker-1] = [node_type=Cluster::WORKER, ip=127.0.0.1, p=5/udp, manager=manager-1] +} +{ +[worker-3] = [node_type=Cluster::WORKER, ip=1.2.3.4, p=9/udp, manager=], +[manager-1] = [node_type=Cluster::MANAGER, ip=127.0.0.1, p=3/tcp, manager=], +[worker-1] = [node_type=Cluster::WORKER, ip=127.0.0.1, p=5/udp, manager=manager-1] +} +{ +[worker-4] = [node_type=Cluster::WORKER, ip=2.3.4.5, zone_id=, p=13/udp, interface=, manager=, time_machine=, id=] +} +{ +[worker-4] = [node_type=Cluster::WORKER, ip=2.3.4.5, zone_id=, p=13/udp, interface=, manager=, time_machine=, id=], +[worker-5] = [node_type=Cluster::WORKER, ip=3.4.5.6, zone_id=, p=15/tcp, interface=, manager=, time_machine=, id=] +} +{ +[worker-4] = [node_type=Cluster::WORKER, ip=2.3.4.5, zone_id=, p=13/udp, interface=, manager=, time_machine=, id=], +[worker-6] = [node_type=Cluster::WORKER, ip=4.5.6.7, zone_id=, p=17/udp, interface=, manager=, time_machine=, id=] +} +{ +[3.0, 4] +} +{ +[3.0, 4] +} +{ + +} +{ +[9.0, 4] +} +{ +[3.0, 4.0] = 5.0 +} +{ +[3.0, 4.0] = 5.0 +} +{ + +} +{ +[bar, 1.2.0.0/19] , +[foo, 5.6.0.0/21] , +[foo, 1.2.0.0/19] , +[bar, 5.6.0.0/21] +} +/(^?(^?(bar)$?)$?)|(^?(^?(foo)$?)$?)/ +[1, 3, 5, 9, 2, 4, 6, 20, 21, 22, 23] +[[3, 2, 1], [1, 2, 3], [20, 21, 22, 23], [80, 81], [90, 91, 92]] diff --git a/testing/btest/Baseline/language.init-mismatch/.stderr b/testing/btest/Baseline/language.init-mismatch/.stderr new file mode 100644 index 0000000000..ee09bb0b27 --- /dev/null +++ b/testing/btest/Baseline/language.init-mismatch/.stderr @@ -0,0 +1,21 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/init-mismatch.zeek, line 6: invalid constructor list on RHS of assignment (a = 3, 5) +error in <...>/init-mismatch.zeek, line 6: assignment of non-arithmetic value to arithmetic (count/types) (a = 3, 5) +warning in <...>/init-mismatch.zeek, line 7: initialization not preceded by =<...>/-= is deprecated (4, 6) +error in <...>/init-mismatch.zeek, line 13: different number of indices (list of count,count and list of count,count,count) +error in <...>/init-mismatch.zeek, line 14: table constructor element lacks '=' structure (bar) +error in <...>/init-mismatch.zeek, line 17: empty list in untyped initialization () +error in <...>/init-mismatch.zeek, line 23: cannot expand constructor elements using a value that depends on local variables (subnets) +error in <...>/init-mismatch.zeek, line 23: type clash in assignment (my_subnets = set(foo, subnets)) +error in <...>/init-mismatch.zeek, line 26: invalid constructor list on RHS of assignment (c += 2, 4) +error in <...>/init-mismatch.zeek, line 27: constructor list not allowed for -= operations on vectors (v -= 3, 5) +error in <...>/init-mismatch.zeek, line 29: RHS type mismatch for table/set += (s1 += s2) +error in <...>/init-mismatch.zeek, line 30: RHS type mismatch for table/set -= (s1 -= s2) +error in <...>/init-mismatch.zeek, line 32: table constructor used in a non-table context (3 = F) +error in double and <...>/init-mismatch.zeek, line 32: arithmetic mixed with non-arithmetic (double and 3 = F) +error in <...>/init-mismatch.zeek, line 32 and double: type mismatch (3 = F and double) +error in <...>/init-mismatch.zeek, line 32: inconsistent type in set constructor (set(3 = F)) +error in <...>/init-mismatch.zeek, line 34: not a list of indices (s2) +error in <...>/init-mismatch.zeek, line 34: type clash in assignment (s3 = set(s2)) +error in <...>/init-mismatch.zeek, line 36: pattern += op requires op to be a pattern (p += 3) +error in <...>/init-mismatch.zeek, line 38: illegal table constructor element (1.2.3.4) diff --git a/testing/btest/Baseline/language.inlined-nested-loop/out b/testing/btest/Baseline/language.inlined-nested-loop/out new file mode 100644 index 0000000000..a14b6e7b57 --- /dev/null +++ b/testing/btest/Baseline/language.inlined-nested-loop/out @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +I compiled and ran! diff --git a/testing/btest/Baseline/language.mismatched-container-ctor-types/.stderr b/testing/btest/Baseline/language.mismatched-container-ctor-types/.stderr index 807ddb4f5a..8c193d4b3c 100644 --- a/testing/btest/Baseline/language.mismatched-container-ctor-types/.stderr +++ b/testing/btest/Baseline/language.mismatched-container-ctor-types/.stderr @@ -1,9 +1,9 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/mismatched-container-ctor-types.zeek, line 6: unexpected use of vector constructor in 'table' initialization (vector()) -error in <...>/mismatched-container-ctor-types.zeek, line 7: unexpected use of table constructor in 'vector' initialization (table()) -error in <...>/mismatched-container-ctor-types.zeek, line 8: unexpected use of set constructor in 'vector' initialization (set()) -error in <...>/mismatched-container-ctor-types.zeek, line 9: unexpected use of record constructor in 'vector' initialization ([]) -error in <...>/mismatched-container-ctor-types.zeek, line 10: unexpected use of record constructor in 'vector' initialization (R()) +error in <...>/mismatched-container-ctor-types.zeek, line 6: type clash in assignment (t = vector()) +error in <...>/mismatched-container-ctor-types.zeek, line 7: type clash in assignment (v0 = table()) +error in <...>/mismatched-container-ctor-types.zeek, line 8: type clash in assignment (v1 = set()) +error in <...>/mismatched-container-ctor-types.zeek, line 9: type clash in assignment (v2 = []) +error in <...>/mismatched-container-ctor-types.zeek, line 10: type clash in assignment (v3 = R()) error in <...>/mismatched-container-ctor-types.zeek, line 12: type clash in assignment (lt = vector()) error in <...>/mismatched-container-ctor-types.zeek, line 13: type clash in assignment (lv0 = table()) error in <...>/mismatched-container-ctor-types.zeek, line 14: type clash in assignment (lv1 = set()) diff --git a/testing/btest/Baseline/language.record-bad-ctor/out b/testing/btest/Baseline/language.record-bad-ctor/out index 40931641f1..cbb840cdcc 100644 --- a/testing/btest/Baseline/language.record-bad-ctor/out +++ b/testing/btest/Baseline/language.record-bad-ctor/out @@ -1,3 +1,4 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. error in <...>/record-bad-ctor.zeek, line 6: no type given (asdfasdf) -error in <...>/record-bad-ctor.zeek, line 7: non-optional field "ports" missing in initialization ([ports=]) +expression error in <...>/record-bad-ctor.zeek, line 11: value used but not set (asdfasdf2) +error in <...>/record-bad-ctor.zeek, line 11: initialization failed (blah2) diff --git a/testing/btest/Baseline/language.record-bad-ctor4/out b/testing/btest/Baseline/language.record-bad-ctor4/out new file mode 100644 index 0000000000..7368118fe8 --- /dev/null +++ b/testing/btest/Baseline/language.record-bad-ctor4/out @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +error in <...>/record-bad-ctor4.zeek, line 29: mandatory field "cnt" missing (info2($str=hello)) +error in <...>/record-bad-ctor4.zeek, line 29: mandatory field "a" missing ([$str=hello]) diff --git a/testing/btest/Baseline/language.record-type-checking/out b/testing/btest/Baseline/language.record-type-checking/out index 713a1a0a01..f625ea05fa 100644 --- a/testing/btest/Baseline/language.record-type-checking/out +++ b/testing/btest/Baseline/language.record-type-checking/out @@ -1,11 +1,8 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. error in port and <...>/record-type-checking.zeek, line 9: arithmetic mixed with non-arithmetic (port and 0) -error in <...>/record-type-checking.zeek, line 9: bad record initializer ([$a=]) error in port and <...>/record-type-checking.zeek, line 12: arithmetic mixed with non-arithmetic (port and 1) -error in <...>/record-type-checking.zeek, line 12: bad record initializer ((coerce [$a=] to error)) error in port and <...>/record-type-checking.zeek, line 18: arithmetic mixed with non-arithmetic (port and 2) error in <...>/record-type-checking.zeek, line 22 and count: type clash for field "a" ((coerce [$a=3] to MyRec) and count) -error in <...>/record-type-checking.zeek, line 22: bad record initializer ((coerce [$a=3] to error)) error in port and <...>/record-type-checking.zeek, line 27: arithmetic mixed with non-arithmetic (port and 1000) error in port and <...>/record-type-checking.zeek, line 33: arithmetic mixed with non-arithmetic (port and 1001) error in port and <...>/record-type-checking.zeek, line 40: arithmetic mixed with non-arithmetic (port and 1002) diff --git a/testing/btest/Baseline/language.redef-same-prefixtable-idx/.stderr b/testing/btest/Baseline/language.redef-same-prefixtable-idx/.stderr new file mode 100644 index 0000000000..2bf6110ff6 --- /dev/null +++ b/testing/btest/Baseline/language.redef-same-prefixtable-idx/.stderr @@ -0,0 +1,3 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +warning in <...>/redef-same-prefixtable-idx.zeek, line 7: initialization not preceded by =<...>/-= is deprecated (3.0.0.0/8 = 1.0.0.0/8) +warning in <...>/redef-same-prefixtable-idx.zeek, line 8: initialization not preceded by =<...>/-= is deprecated (3.0.0.0/8 = 2.0.0.0/8) diff --git a/testing/btest/Baseline/language.set-type-checking/out b/testing/btest/Baseline/language.set-type-checking/out index 3bd6cc0dc1..d930791048 100644 --- a/testing/btest/Baseline/language.set-type-checking/out +++ b/testing/btest/Baseline/language.set-type-checking/out @@ -8,8 +8,10 @@ error in <...>/set-type-checking.zeek, line 10: inconsistent type in set constru error in port and <...>/set-type-checking.zeek, line 16: arithmetic mixed with non-arithmetic (port and 2) error in <...>/set-type-checking.zeek, line 16 and port: type mismatch (2 and port) error in <...>/set-type-checking.zeek, line 16: inconsistent type in set constructor (set(2)) -error in port: arithmetic mixed with non-arithmetic (port and 3) -error in <...>/set-type-checking.zeek, line 20: initialization type mismatch in set (set(3) and 3) +error in port and <...>/set-type-checking.zeek, line 20: arithmetic mixed with non-arithmetic (port and 3) +error in <...>/set-type-checking.zeek, line 20 and port: type mismatch (3 and port) +error in <...>/set-type-checking.zeek, line 20: inconsistent type in set constructor (set(3)) +error in <...>/set-type-checking.zeek, line 20: type clash in assignment (gea = set(3)) error in port and <...>/set-type-checking.zeek, line 25: arithmetic mixed with non-arithmetic (port and 1000) error in <...>/set-type-checking.zeek, line 25 and port: type mismatch (1000 and port) error in <...>/set-type-checking.zeek, line 25: inconsistent type in set constructor (set(1000)) diff --git a/testing/btest/Baseline/language.table-aggr-init-type-check/output b/testing/btest/Baseline/language.table-aggr-init-type-check/output index 0fb4c3a300..6fbe2173f8 100644 --- a/testing/btest/Baseline/language.table-aggr-init-type-check/output +++ b/testing/btest/Baseline/language.table-aggr-init-type-check/output @@ -1,3 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in <...>/table-aggr-init-type-check.zeek, line 21: type mismatch in table value initialization: assigning 'types' to table with values of type 'record' (three = 1, 2) -error in <...>/table-aggr-init-type-check.zeek, line 25: type mismatch in table value initialization: incompatible record types (four = [$b=No.]) +error in <...>/table-aggr-init-type-check.zeek, lines 4-7 and <...>/table-aggr-init-type-check.zeek, line 21: type clash (MyRec and 1, 2) +error in <...>/table-aggr-init-type-check.zeek, line 21: inconsistent types in table constructor (table(three = 1, 2)) +error in <...>/table-aggr-init-type-check.zeek, lines 4-7 and <...>/table-aggr-init-type-check.zeek, line 25: incompatible record types (MyRec and [$b=No.]) +error in <...>/table-aggr-init-type-check.zeek, line 25: inconsistent types in table constructor (table(four = [$b=No.])) diff --git a/testing/btest/Baseline/language.table-init/output b/testing/btest/Baseline/language.table-init/output index b9a388fa54..cdf3f6ae34 100644 --- a/testing/btest/Baseline/language.table-init/output +++ b/testing/btest/Baseline/language.table-init/output @@ -9,3 +9,6 @@ global table default [3] = three } local table default +{ + +} diff --git a/testing/btest/Baseline/language.table-list-assign-type-check/output b/testing/btest/Baseline/language.table-list-assign-type-check/output index 4e61c25b09..b33c42c8ce 100644 --- a/testing/btest/Baseline/language.table-list-assign-type-check/output +++ b/testing/btest/Baseline/language.table-list-assign-type-check/output @@ -1,8 +1,11 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. error in count and <...>/table-list-assign-type-check.zeek, line 16: arithmetic mixed with non-arithmetic (count and Internal Web Server) error in <...>/table-list-assign-type-check.zeek, lines 15-20: inconsistent types in table constructor (table(www, 80 = Internal Web Server, dns1, 53 = Internal DNS 1, dns2, 53 = Internal DNS 2, dhcp-for-wifi, 443 = DHCP Management interface for WiFi)) +error in <...>/table-list-assign-type-check.zeek, lines 15-20: type clash in assignment (service_table_bad_yield = table(www, 80 = Internal Web Server, dns1, 53 = Internal DNS 1, dns2, 53 = Internal DNS 2, dhcp-for-wifi, 443 = DHCP Management interface for WiFi)) error in count and <...>/table-list-assign-type-check.zeek, line 24: arithmetic mixed with non-arithmetic (count and 80) error in <...>/table-list-assign-type-check.zeek, lines 23-28: inconsistent types in table constructor (table(www, 80 = Internal Web Server, dns1, 53 = Internal DNS 1, dns2, 53 = Internal DNS 2, dhcp-for-wifi, 443 = DHCP Management interface for WiFi)) +error in <...>/table-list-assign-type-check.zeek, lines 23-28: type clash in assignment (service_table_bad_index = table(www, 80 = Internal Web Server, dns1, 53 = Internal DNS 1, dns2, 53 = Internal DNS 2, dhcp-for-wifi, 443 = DHCP Management interface for WiFi)) error in string and <...>/table-list-assign-type-check.zeek, line 31: arithmetic mixed with non-arithmetic (string and 1) error in <...>/table-list-assign-type-check.zeek, line 31 and string: type mismatch (1 and string) error in <...>/table-list-assign-type-check.zeek, line 31: inconsistent type in set constructor (set(1, 2, 3)) +error in <...>/table-list-assign-type-check.zeek, line 31: type clash in assignment (test_set_bad = set(1, 2, 3)) diff --git a/testing/btest/Baseline/language.table-type-checking/out b/testing/btest/Baseline/language.table-type-checking/out index 82a60de830..0e97e6c56e 100644 --- a/testing/btest/Baseline/language.table-type-checking/out +++ b/testing/btest/Baseline/language.table-type-checking/out @@ -1,15 +1,17 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -error in port and <...>/table-type-checking.zeek, line 7: type clash (port and zero) -error in <...>/table-type-checking.zeek, line 7: inconsistent types in table constructor (table(zero = 0)) -error in port and <...>/table-type-checking.zeek, line 10: type clash (port and one) -error in <...>/table-type-checking.zeek, line 10: inconsistent types in table constructor (table(one = 1)) -error in <...>/table-type-checking.zeek, line 17: type clash in assignment (gda = gda2) -error in <...>/table-type-checking.zeek, line 21 and <...>/table-type-checking.zeek, line 4: index type doesn't match table (three and list of port) -expression error in <...>/table-type-checking.zeek, line 21: type clash in table assignment (three = 3) -error in port and <...>/table-type-checking.zeek, line 26: type clash (port and thousand) -error in <...>/table-type-checking.zeek, line 26: inconsistent types in table constructor (table(thousand = 1000)) -error in port and <...>/table-type-checking.zeek, line 32: type clash (port and thousand-one) -error in <...>/table-type-checking.zeek, line 32: inconsistent types in table constructor (table(thousand-one = 1001)) -error in port and <...>/table-type-checking.zeek, line 39: type clash (port and thousand-two) -error in <...>/table-type-checking.zeek, line 39: inconsistent types in table constructor (table(thousand-two = 1002)) -error in <...>/table-type-checking.zeek, line 45: type clash in assignment (lea = table(thousand-three = 1003)) +error in <...>/table-type-checking.zeek, line 7: empty constructor in untyped initialization (table()) +error in port and <...>/table-type-checking.zeek, line 10: type clash (port and zero) +error in <...>/table-type-checking.zeek, line 10: inconsistent types in table constructor (table(zero = 0)) +error in port and <...>/table-type-checking.zeek, line 13: type clash (port and one) +error in <...>/table-type-checking.zeek, line 13: inconsistent types in table constructor (table(one = 1)) +error in <...>/table-type-checking.zeek, line 20: type clash in assignment (gda = gda2) +error in <...>/table-type-checking.zeek, line 24: type clash in assignment (gea = table(three = 3)) +error in port and <...>/table-type-checking.zeek, line 29: type clash (port and thousand) +error in <...>/table-type-checking.zeek, line 29: inconsistent types in table constructor (table(thousand = 1000)) +error in port and <...>/table-type-checking.zeek, line 35: type clash (port and thousand-one) +error in <...>/table-type-checking.zeek, line 35: inconsistent types in table constructor (table(thousand-one = 1001)) +error in port and <...>/table-type-checking.zeek, line 42: type clash (port and thousand-two) +error in <...>/table-type-checking.zeek, line 42: inconsistent types in table constructor (table(thousand-two = 1002)) +error in <...>/table-type-checking.zeek, line 48: type clash in assignment (lea = table(thousand-three = 1003)) +error in count and <...>/table-type-checking.zeek, line 54: arithmetic mixed with non-arithmetic (count and foo) +error in <...>/table-type-checking.zeek, line 4 and <...>/table-type-checking.zeek, line 54: &default value has inconsistent type (table[port] of count and table()) diff --git a/testing/btest/Baseline/language.vector-slice-assign/out b/testing/btest/Baseline/language.vector-slice-assign/out new file mode 100644 index 0000000000..d494ea9828 --- /dev/null +++ b/testing/btest/Baseline/language.vector-slice-assign/out @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +[[seq=4], [seq=5], [seq=3], [seq=1], [seq=2], [seq=6], [seq=7]] diff --git a/testing/btest/Baseline/language.vector-type-checking/out b/testing/btest/Baseline/language.vector-type-checking/out index 588779a80e..71eb85a096 100644 --- a/testing/btest/Baseline/language.vector-type-checking/out +++ b/testing/btest/Baseline/language.vector-type-checking/out @@ -6,8 +6,7 @@ error in count and <...>/vector-type-checking.zeek, line 10: arithmetic mixed wi error in <...>/vector-type-checking.zeek, line 10 and count: type mismatch (one and count) error in <...>/vector-type-checking.zeek, line 10: inconsistent types in vector constructor (vector(one)) error in <...>/vector-type-checking.zeek, line 17: type clash in assignment (gda = gda2) -error in count and <...>/vector-type-checking.zeek, line 21: arithmetic mixed with non-arithmetic (count and three) -error in <...>/vector-type-checking.zeek, line 21: initialization type mismatch at index 0 (vector(three) and three) +error in <...>/vector-type-checking.zeek, line 21: type clash in assignment (gea = vector(three)) error in count and <...>/vector-type-checking.zeek, line 26: arithmetic mixed with non-arithmetic (count and thousand) error in <...>/vector-type-checking.zeek, line 26 and count: type mismatch (thousand and count) error in <...>/vector-type-checking.zeek, line 26: inconsistent types in vector constructor (vector(thousand)) diff --git a/testing/btest/Baseline/language.vector-unspecified/output b/testing/btest/Baseline/language.vector-unspecified/output index afe628909c..7d88d94649 100644 --- a/testing/btest/Baseline/language.vector-unspecified/output +++ b/testing/btest/Baseline/language.vector-unspecified/output @@ -1,2 +1,2 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -[5, Hi, 127.0.0.1] +error in <...>/vector-unspecified.zeek, line 7: empty constructor in untyped initialization (vector()) diff --git a/testing/btest/Baseline/plugins.hooks/output b/testing/btest/Baseline/plugins.hooks/output index 6ce199cb81..576ba4fb24 100644 --- a/testing/btest/Baseline/plugins.hooks/output +++ b/testing/btest/Baseline/plugins.hooks/output @@ -668,7 +668,7 @@ 0.000000 MetaHookPost CallFunction(SumStats::register_observe_plugin, , (SumStats::STD_DEV, lambda_<5704045257244168718>{ SumStats::calc_std_dev(SumStats::rv)})) -> 0.000000 MetaHookPost CallFunction(SumStats::register_observe_plugin, , (SumStats::SUM, lambda_<6958532551242393774>{ SumStats::rv$sum += SumStats::val})) -> 0.000000 MetaHookPost CallFunction(SumStats::register_observe_plugin, , (SumStats::TOPK, lambda_<6366101205573988923>{ topk_add(SumStats::rv$topk, to_any_coerceSumStats::obs)})) -> -0.000000 MetaHookPost CallFunction(SumStats::register_observe_plugin, , (SumStats::UNIQUE, lambda_<14393221830775341876>{ if (!SumStats::rv?$unique_vals) SumStats::rv$unique_vals = (coerce set() to set[SumStats::Observation])if (SumStats::r?$unique_max) SumStats::rv$unique_max = SumStats::r$unique_maxif (!SumStats::r?$unique_max || sizeofSumStats::rv$unique_vals <= SumStats::r$unique_max) add SumStats::rv$unique_vals[SumStats::obs]SumStats::rv$unique = sizeofSumStats::rv$unique_vals})) -> +0.000000 MetaHookPost CallFunction(SumStats::register_observe_plugin, , (SumStats::UNIQUE, lambda_<5561482868084494682>{ if (!SumStats::rv?$unique_vals) SumStats::rv$unique_vals = (coerce (coerce set() to set[SumStats::Observation]) to set[SumStats::Observation])if (SumStats::r?$unique_max) SumStats::rv$unique_max = SumStats::r$unique_maxif (!SumStats::r?$unique_max || sizeofSumStats::rv$unique_vals <= SumStats::r$unique_max) add SumStats::rv$unique_vals[SumStats::obs]SumStats::rv$unique = sizeofSumStats::rv$unique_vals})) -> 0.000000 MetaHookPost CallFunction(SumStats::register_observe_plugin, , (SumStats::VARIANCE, lambda_<6557258612059469785>{ if (1 < SumStats::rv$num) SumStats::rv$var_s += ((SumStats::val - SumStats::rv$prev_avg) * (SumStats::val - SumStats::rv$average))SumStats::calc_variance(SumStats::rv)SumStats::rv$prev_avg = SumStats::rv$average})) -> 0.000000 MetaHookPost CallFunction(SumStats::register_observe_plugins, , ()) -> 0.000000 MetaHookPost CallFunction(Supervisor::__is_supervisor, , ()) -> @@ -2125,7 +2125,7 @@ 0.000000 MetaHookPre CallFunction(SumStats::register_observe_plugin, , (SumStats::STD_DEV, lambda_<5704045257244168718>{ SumStats::calc_std_dev(SumStats::rv)})) 0.000000 MetaHookPre CallFunction(SumStats::register_observe_plugin, , (SumStats::SUM, lambda_<6958532551242393774>{ SumStats::rv$sum += SumStats::val})) 0.000000 MetaHookPre CallFunction(SumStats::register_observe_plugin, , (SumStats::TOPK, lambda_<6366101205573988923>{ topk_add(SumStats::rv$topk, to_any_coerceSumStats::obs)})) -0.000000 MetaHookPre CallFunction(SumStats::register_observe_plugin, , (SumStats::UNIQUE, lambda_<14393221830775341876>{ if (!SumStats::rv?$unique_vals) SumStats::rv$unique_vals = (coerce set() to set[SumStats::Observation])if (SumStats::r?$unique_max) SumStats::rv$unique_max = SumStats::r$unique_maxif (!SumStats::r?$unique_max || sizeofSumStats::rv$unique_vals <= SumStats::r$unique_max) add SumStats::rv$unique_vals[SumStats::obs]SumStats::rv$unique = sizeofSumStats::rv$unique_vals})) +0.000000 MetaHookPre CallFunction(SumStats::register_observe_plugin, , (SumStats::UNIQUE, lambda_<5561482868084494682>{ if (!SumStats::rv?$unique_vals) SumStats::rv$unique_vals = (coerce (coerce set() to set[SumStats::Observation]) to set[SumStats::Observation])if (SumStats::r?$unique_max) SumStats::rv$unique_max = SumStats::r$unique_maxif (!SumStats::r?$unique_max || sizeofSumStats::rv$unique_vals <= SumStats::r$unique_max) add SumStats::rv$unique_vals[SumStats::obs]SumStats::rv$unique = sizeofSumStats::rv$unique_vals})) 0.000000 MetaHookPre CallFunction(SumStats::register_observe_plugin, , (SumStats::VARIANCE, lambda_<6557258612059469785>{ if (1 < SumStats::rv$num) SumStats::rv$var_s += ((SumStats::val - SumStats::rv$prev_avg) * (SumStats::val - SumStats::rv$average))SumStats::calc_variance(SumStats::rv)SumStats::rv$prev_avg = SumStats::rv$average})) 0.000000 MetaHookPre CallFunction(SumStats::register_observe_plugins, , ()) 0.000000 MetaHookPre CallFunction(Supervisor::__is_supervisor, , ()) @@ -3581,7 +3581,7 @@ 0.000000 | HookCallFunction SumStats::register_observe_plugin(SumStats::STD_DEV, lambda_<5704045257244168718>{ SumStats::calc_std_dev(SumStats::rv)}) 0.000000 | HookCallFunction SumStats::register_observe_plugin(SumStats::SUM, lambda_<6958532551242393774>{ SumStats::rv$sum += SumStats::val}) 0.000000 | HookCallFunction SumStats::register_observe_plugin(SumStats::TOPK, lambda_<6366101205573988923>{ topk_add(SumStats::rv$topk, to_any_coerceSumStats::obs)}) -0.000000 | HookCallFunction SumStats::register_observe_plugin(SumStats::UNIQUE, lambda_<14393221830775341876>{ if (!SumStats::rv?$unique_vals) SumStats::rv$unique_vals = (coerce set() to set[SumStats::Observation])if (SumStats::r?$unique_max) SumStats::rv$unique_max = SumStats::r$unique_maxif (!SumStats::r?$unique_max || sizeofSumStats::rv$unique_vals <= SumStats::r$unique_max) add SumStats::rv$unique_vals[SumStats::obs]SumStats::rv$unique = sizeofSumStats::rv$unique_vals}) +0.000000 | HookCallFunction SumStats::register_observe_plugin(SumStats::UNIQUE, lambda_<5561482868084494682>{ if (!SumStats::rv?$unique_vals) SumStats::rv$unique_vals = (coerce (coerce set() to set[SumStats::Observation]) to set[SumStats::Observation])if (SumStats::r?$unique_max) SumStats::rv$unique_max = SumStats::r$unique_maxif (!SumStats::r?$unique_max || sizeofSumStats::rv$unique_vals <= SumStats::r$unique_max) add SumStats::rv$unique_vals[SumStats::obs]SumStats::rv$unique = sizeofSumStats::rv$unique_vals}) 0.000000 | HookCallFunction SumStats::register_observe_plugin(SumStats::VARIANCE, lambda_<6557258612059469785>{ if (1 < SumStats::rv$num) SumStats::rv$var_s += ((SumStats::val - SumStats::rv$prev_avg) * (SumStats::val - SumStats::rv$average))SumStats::calc_variance(SumStats::rv)SumStats::rv$prev_avg = SumStats::rv$average}) 0.000000 | HookCallFunction SumStats::register_observe_plugins() 0.000000 | HookCallFunction Supervisor::__is_supervisor() diff --git a/testing/btest/Baseline/plugins.plugin-load-file-extended/output b/testing/btest/Baseline/plugins.plugin-load-file-extended/output index d32f2141d1..3d5b86d6dd 100644 --- a/testing/btest/Baseline/plugins.plugin-load-file-extended/output +++ b/testing/btest/Baseline/plugins.plugin-load-file-extended/output @@ -1,7 +1,9 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -HookLoadExtended/script: file=|xxx| resolved=|./xxx.zeek| -HookLoadExtended/script: file=|yyy| resolved=|| -HookLoadExtended/signature: file=|abc.sig| resolved=|./abc.sig| +HookLoadExtended/script: file=|xxx| resolved=|./xxx.zeek| srcloc=|n/a| +HookLoadExtended/script: file=|xxx3| resolved=|./xxx3.zeek| srcloc=|./xxx2.zeek| +HookLoadExtended/script: file=|yyy| resolved=|| srcloc=|n/a| +HookLoadExtended/signature: file=|abc.sig| resolved=|./abc.sig| srcloc=|n/a| +HookLoadExtended/signature: file=|def.sig| resolved=|./def.sig| srcloc=|./xxx2.zeek| new zeek_init(): script has been replaced new zeek_init(): script has been added signature works! diff --git a/testing/btest/Baseline/plugins.reporter-hook/output b/testing/btest/Baseline/plugins.reporter-hook/output index bcc9b6a25a..57b265589c 100644 --- a/testing/btest/Baseline/plugins.reporter-hook/output +++ b/testing/btest/Baseline/plugins.reporter-hook/output @@ -1,11 +1,14 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. - | Hook Some Info <...>/reporter-hook.zeek, line 16 - | Hook error An Error <...>/reporter-hook.zeek, line 18 - | Hook error An Error that does not show up in the log <...>/reporter-hook.zeek, line 19 - | Hook expression error field value missing (b$a) <...>/reporter-hook.zeek, line 23 - | Hook warning A warning <...>/reporter-hook.zeek, line 17 -<...>/reporter-hook.zeek, line 16: Some Info -error in <...>/reporter-hook.zeek, line 18: An Error -error in <...>/reporter-hook.zeek, line 19: An Error that does not show up in the log -expression error in <...>/reporter-hook.zeek, line 23: field value missing (b$a) -warning in <...>/reporter-hook.zeek, line 17: A warning +Reporter::Hook - Exercise Reporter Hook (dynamic, version 1.0.0) + Implements Reporter (priority 0) + + | Hook Some Info <...>/reporter-hook.zeek, line 17 + | Hook error An Error <...>/reporter-hook.zeek, line 19 + | Hook error An Error that does not show up in the log <...>/reporter-hook.zeek, line 20 + | Hook expression error field value missing (b$a) <...>/reporter-hook.zeek, line 24 + | Hook warning A warning <...>/reporter-hook.zeek, line 18 +<...>/reporter-hook.zeek, line 17: Some Info +error in <...>/reporter-hook.zeek, line 19: An Error +error in <...>/reporter-hook.zeek, line 20: An Error that does not show up in the log +expression error in <...>/reporter-hook.zeek, line 24: field value missing (b$a) +warning in <...>/reporter-hook.zeek, line 18: A warning diff --git a/testing/btest/Baseline/plugins.reporter-hook/reporter.log b/testing/btest/Baseline/plugins.reporter-hook/reporter.log index 2b9dfb451d..1225fa1cfa 100644 --- a/testing/btest/Baseline/plugins.reporter-hook/reporter.log +++ b/testing/btest/Baseline/plugins.reporter-hook/reporter.log @@ -7,8 +7,8 @@ #open XXXX-XX-XX-XX-XX-XX #fields ts level message location #types time enum string string -XXXXXXXXXX.XXXXXX Reporter::INFO Some Info <...>/reporter-hook.zeek, line 16 -XXXXXXXXXX.XXXXXX Reporter::WARNING A warning <...>/reporter-hook.zeek, line 17 -XXXXXXXXXX.XXXXXX Reporter::ERROR An Error <...>/reporter-hook.zeek, line 18 -XXXXXXXXXX.XXXXXX Reporter::ERROR field value missing (b$a) <...>/reporter-hook.zeek, line 23 +XXXXXXXXXX.XXXXXX Reporter::INFO Some Info <...>/reporter-hook.zeek, line 17 +XXXXXXXXXX.XXXXXX Reporter::WARNING A warning <...>/reporter-hook.zeek, line 18 +XXXXXXXXXX.XXXXXX Reporter::ERROR An Error <...>/reporter-hook.zeek, line 19 +XXXXXXXXXX.XXXXXX Reporter::ERROR field value missing (b$a) <...>/reporter-hook.zeek, line 24 #close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log index 38bd8140a6..2244196074 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log +++ b/testing/btest/Baseline/scripts.base.frameworks.intel.read-file-dist-cluster/manager-1.intel.log @@ -1,4 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +### NOTE: This file has been sorted with diff-sort. #separator \x09 #set_separator , #empty_field (empty) @@ -7,8 +8,8 @@ #open XXXX-XX-XX-XX-XX-XX #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p seen.indicator seen.indicator_type seen.where seen.node matched sources fuid file_mime_type file_desc #types time string addr port addr port string enum enum string set[enum] set[string] string string string -XXXXXXXXXX.XXXXXX - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST worker-1 Intel::ADDR source1 - - - -XXXXXXXXXX.XXXXXX - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST worker-1 Intel::EMAIL source1 - - - -XXXXXXXXXX.XXXXXX - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST worker-2 Intel::ADDR source1 - - - -XXXXXXXXXX.XXXXXX - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST worker-2 Intel::EMAIL source1 - - - #close XXXX-XX-XX-XX-XX-XX +XXXXXXXXXX.XXXXXX - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST worker-1 Intel::ADDR source1 - - - +XXXXXXXXXX.XXXXXX - - - - - 1.2.3.4 Intel::ADDR Intel::IN_A_TEST worker-2 Intel::ADDR source1 - - - +XXXXXXXXXX.XXXXXX - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST worker-1 Intel::EMAIL source1 - - - +XXXXXXXXXX.XXXXXX - - - - - e@mail.com Intel::EMAIL Intel::IN_A_TEST worker-2 Intel::EMAIL source1 - - - diff --git a/testing/btest/Baseline/scripts.policy.frameworks.cluster.controller.agent-checkin/zeek.controller.stdout b/testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.controller.stdout similarity index 100% rename from testing/btest/Baseline/scripts.policy.frameworks.cluster.controller.agent-checkin/zeek.controller.stdout rename to testing/btest/Baseline/scripts.policy.frameworks.management.controller.agent-checkin/zeek.controller.stdout diff --git a/testing/btest/Baseline/scripts.policy.protocols.ssl.decryption/http.log b/testing/btest/Baseline/scripts.policy.protocols.ssl.decryption/http.log new file mode 100644 index 0000000000..a01d49a497 --- /dev/null +++ b/testing/btest/Baseline/scripts.policy.protocols.ssl.decryption/http.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path http +#open XXXX-XX-XX-XX-XX-XX +#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types +#types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string] +XXXXXXXXXX.XXXXXX CHhAvVGS1DHFjwGM9 192.168.100.70 48216 193.99.144.80 443 1 GET heise.de / - 1.1 Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0 - 0 229 301 Moved Permanently - - (empty) - - - - - - FaOfPL638bbaQ1KMh - text/html +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Traces/echo-connections.pcap.gz b/testing/btest/Traces/echo-connections.pcap.gz new file mode 100644 index 0000000000..27b8f0a296 Binary files /dev/null and b/testing/btest/Traces/echo-connections.pcap.gz differ diff --git a/testing/btest/Traces/tls/tls-1.2-stream-keylog.pcap b/testing/btest/Traces/tls/tls-1.2-stream-keylog.pcap new file mode 100644 index 0000000000..bbebc0a3e8 Binary files /dev/null and b/testing/btest/Traces/tls/tls-1.2-stream-keylog.pcap differ diff --git a/testing/btest/Traces/tls/tls12-decryption.pcap b/testing/btest/Traces/tls/tls12-decryption.pcap new file mode 100644 index 0000000000..73ffaa14bd Binary files /dev/null and b/testing/btest/Traces/tls/tls12-decryption.pcap differ diff --git a/testing/btest/bifs/enum_to_int.zeek b/testing/btest/bifs/enum_to_int.zeek index 17fd1ff8a9..ccfea91b54 100644 --- a/testing/btest/bifs/enum_to_int.zeek +++ b/testing/btest/bifs/enum_to_int.zeek @@ -18,12 +18,24 @@ export { event zeek_init() { - - print A, enum_to_int(A); print B, enum_to_int(B); print C, enum_to_int(C); print AV, enum_to_int(AV); print BV, enum_to_int(BV); print CV, enum_to_int(CV); + + print enum_to_int(A) != enum_to_int(B); + print enum_to_int(A) != enum_to_int(C); + print enum_to_int(B) != enum_to_int(C); + print enum_to_int(A) < enum_to_int(B); + print enum_to_int(A) < enum_to_int(C); + print enum_to_int(B) < enum_to_int(C); + + print enum_to_int(AV) != enum_to_int(BV); + print enum_to_int(AV) != enum_to_int(CV); + print enum_to_int(BV) != enum_to_int(CV); + print enum_to_int(AV) < enum_to_int(BV); + print enum_to_int(AV) < enum_to_int(CV); + print enum_to_int(BV) < enum_to_int(CV); } diff --git a/testing/btest/bifs/is_ascii.zeek b/testing/btest/bifs/is_ascii.zeek index 505e21e715..da1b1cfa7e 100644 --- a/testing/btest/bifs/is_ascii.zeek +++ b/testing/btest/bifs/is_ascii.zeek @@ -9,4 +9,5 @@ event zeek_init() print is_ascii(a); print is_ascii(b); + print is_ascii(""); } diff --git a/testing/btest/bifs/raw_bytes_to_v6_addr.zeek b/testing/btest/bifs/raw_bytes_to_v6_addr.zeek new file mode 100644 index 0000000000..38e0db9ee2 --- /dev/null +++ b/testing/btest/bifs/raw_bytes_to_v6_addr.zeek @@ -0,0 +1,10 @@ +# +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +event zeek_init() + { + print raw_bytes_to_v6_addr("ABCDEFGHIKLMNOPQ"); + print raw_bytes_to_v6_addr("ABCDEFGHIKLMNOP"); + print raw_bytes_to_v6_addr("\xda\xda\xbe\xef\x00\x00\x00\x00N^\x0c\xff\xfej\x86q"); + } diff --git a/testing/btest/bifs/string_utils.zeek b/testing/btest/bifs/string_utils.zeek index 5b5f70983e..021d7f68d0 100644 --- a/testing/btest/bifs/string_utils.zeek +++ b/testing/btest/bifs/string_utils.zeek @@ -23,14 +23,17 @@ event zeek_init() print "----------------"; print fmt("is_num abc : %d", is_num("abc")); print fmt("is_num 123 : %d", is_num("123")); + print fmt("is_num '' : %d", is_num("")); print fmt("is_alpha ab : %d", is_alpha("ab")); print fmt("is_alpha 1a : %d", is_alpha("1a")); print fmt("is_alpha a1 : %d", is_alpha("a1")); + print fmt("is_alpha '' : %d", is_alpha("")); print fmt("is_alnum ab : %d", is_alnum("ab")); print fmt("is_alnum 1a : %d", is_alnum("1a")); print fmt("is_alnum a1 : %d", is_alnum("a1")); print fmt("is_alnum 12 : %d", is_alnum("12")); print fmt("is_alnum ##12: %d", is_alnum("##12")); + print fmt("is_alnum '' : %d", is_alnum("")); print ""; print "String counting (input str 'aabbaa')"; diff --git a/testing/btest/broker/connect-on-retry.zeek b/testing/btest/broker/connect-on-retry.zeek index 42ecd21c4d..7fe700a2cb 100644 --- a/testing/btest/broker/connect-on-retry.zeek +++ b/testing/btest/broker/connect-on-retry.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/disconnect.zeek b/testing/btest/broker/disconnect.zeek index f44fad143b..701172eeb6 100644 --- a/testing/btest/broker/disconnect.zeek +++ b/testing/btest/broker/disconnect.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" @@ -7,7 +9,7 @@ # @TEST-EXEC: btest-bg-run recv2 "zeek -b ../recv.zeek >recv2.out" # @TEST-EXEC: btest-bg-wait 45 - +# # @TEST-EXEC: btest-diff send/send.out # @TEST-EXEC: btest-diff recv/recv.out # @TEST-EXEC: btest-diff recv2/recv2.out @@ -33,10 +35,6 @@ event zeek_init() event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) { print "peer lost", msg; - system("touch lost"); - - if ( peers == 2 ) - terminate(); } event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) @@ -46,6 +44,20 @@ event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) Broker::publish(test_topic, my_event, peers); } +event Broker::endpoint_discovered(endpoint: Broker::EndpointInfo, msg: string) + { + print "endpoint discovered", msg; + } + +event Broker::endpoint_unreachable(endpoint: Broker::EndpointInfo, msg: string) + { + print "endpoint unreachable", msg; + system("touch lost"); + + if ( peers == 2 ) + terminate(); + } + @TEST-END-FILE @@ -77,4 +89,14 @@ event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) print "peer added", msg; } +event Broker::endpoint_discovered(endpoint: Broker::EndpointInfo, msg: string) + { + print "endpoint discovered", msg; + } + +event Broker::endpoint_unreachable(endpoint: Broker::EndpointInfo, msg: string) + { + print "endpoint unreachable", msg; + } + @TEST-END-FILE diff --git a/testing/btest/broker/error.zeek b/testing/btest/broker/error.zeek index 4a7887707e..c2686274b7 100644 --- a/testing/btest/broker/error.zeek +++ b/testing/btest/broker/error.zeek @@ -1,6 +1,8 @@ +# @TEST-GROUP: broker +# # @TEST-EXEC: zeek -b send.zeek >send.out # @TEST-EXEC: btest-diff send.out -# +# @TEST-START-FILE send.zeek diff --git a/testing/btest/broker/opaque.zeek b/testing/btest/broker/opaque.zeek index 41823a59ed..374ab7dce5 100644 --- a/testing/btest/broker/opaque.zeek +++ b/testing/btest/broker/opaque.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-EXEC: zeek -b %INPUT >out # @TEST-EXEC: btest-diff out # @TEST-EXEC: btest-diff .stderr diff --git a/testing/btest/broker/remote_event.zeek b/testing/btest/broker/remote_event.zeek index e863af5523..ddc4092841 100644 --- a/testing/btest/broker/remote_event.zeek +++ b/testing/btest/broker/remote_event.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_event_any.zeek b/testing/btest/broker/remote_event_any.zeek index 28a5a1abbe..2a290376ca 100644 --- a/testing/btest/broker/remote_event_any.zeek +++ b/testing/btest/broker/remote_event_any.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_event_auto.zeek b/testing/btest/broker/remote_event_auto.zeek index c5497997ac..264f131708 100644 --- a/testing/btest/broker/remote_event_auto.zeek +++ b/testing/btest/broker/remote_event_auto.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_event_ssl_auth.zeek b/testing/btest/broker/remote_event_ssl_auth.zeek index 95ce393e0a..d3882ab2b1 100644 --- a/testing/btest/broker/remote_event_ssl_auth.zeek +++ b/testing/btest/broker/remote_event_ssl_auth.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_event_vector_any.zeek b/testing/btest/broker/remote_event_vector_any.zeek index 628180331d..36bc896a59 100644 --- a/testing/btest/broker/remote_event_vector_any.zeek +++ b/testing/btest/broker/remote_event_vector_any.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_id.zeek b/testing/btest/broker/remote_id.zeek index 35e2416912..c88fa80ac0 100644 --- a/testing/btest/broker/remote_id.zeek +++ b/testing/btest/broker/remote_id.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_log.zeek b/testing/btest/broker/remote_log.zeek index 2f50b0a766..6158e0572a 100644 --- a/testing/btest/broker/remote_log.zeek +++ b/testing/btest/broker/remote_log.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_log_batch.zeek b/testing/btest/broker/remote_log_batch.zeek index b378deb56b..898968bceb 100644 --- a/testing/btest/broker/remote_log_batch.zeek +++ b/testing/btest/broker/remote_log_batch.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_log_late_join.zeek b/testing/btest/broker/remote_log_late_join.zeek index 7e69bdd496..271c77f71a 100644 --- a/testing/btest/broker/remote_log_late_join.zeek +++ b/testing/btest/broker/remote_log_late_join.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/remote_log_types.zeek b/testing/btest/broker/remote_log_types.zeek index a14a12586d..e1ff5f304c 100644 --- a/testing/btest/broker/remote_log_types.zeek +++ b/testing/btest/broker/remote_log_types.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/ssl-mismatch.zeek b/testing/btest/broker/ssl-mismatch.zeek index 60ab15462a..d013b2b978 100644 --- a/testing/btest/broker/ssl-mismatch.zeek +++ b/testing/btest/broker/ssl-mismatch.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run listen "zeek -b %INPUT connect=F Broker::disable_ssl=T" diff --git a/testing/btest/broker/ssl_auth_failure.zeek b/testing/btest/broker/ssl_auth_failure.zeek index 1dffd27618..86275f0e96 100644 --- a/testing/btest/broker/ssl_auth_failure.zeek +++ b/testing/btest/broker/ssl_auth_failure.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" diff --git a/testing/btest/broker/store/brokerstore-backend-simple-reverse.zeek b/testing/btest/broker/store/brokerstore-backend-simple-reverse.zeek index 611e8f19cd..e435bc6f85 100644 --- a/testing/btest/broker/store/brokerstore-backend-simple-reverse.zeek +++ b/testing/btest/broker/store/brokerstore-backend-simple-reverse.zeek @@ -70,6 +70,9 @@ event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) @TEST-START-FILE clone.zeek +global has_node_up: bool = F; +global has_announce_masters: bool = F; + event dump_tables() { t["a"] = 5; @@ -95,7 +98,9 @@ event dump_tables() event Cluster::node_up(name: string, id: string) { Reporter::info(fmt("Node Up: %s", name)); - event dump_tables(); + has_node_up = T; + if ( has_announce_masters ) + event dump_tables(); } event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) @@ -106,6 +111,9 @@ event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) event Broker::announce_masters(masters: set[string]) { Reporter::info(fmt("Received announce_masters: %s", cat(masters))); + has_announce_masters = T; + if ( has_node_up ) + event dump_tables(); } @TEST-END-FILE diff --git a/testing/btest/broker/store/brokerstore-backend-sqlite.zeek b/testing/btest/broker/store/brokerstore-backend-sqlite.zeek index 39c781a347..042c5bec55 100644 --- a/testing/btest/broker/store/brokerstore-backend-sqlite.zeek +++ b/testing/btest/broker/store/brokerstore-backend-sqlite.zeek @@ -125,7 +125,12 @@ event dump_tables() event check_all_set() { - if ( "whatever" in t && "hi" in s && "b" in r ) + # Note: 'a' gets inserted first into 'r'. However, we may still observe 'r' + # with 'b' but without 'a'. This may happen if the clone completes + # its handshake with the server after 'a' and 'b' are already in 'r'. + # In this case, the master sends a snapshot of its state and the + # insertion events for 'a' and 'b' they may trigger in any order. + if ( "whatever" in t && "hi" in s && "a" in r && "b" in r ) event dump_tables(); else schedule 0.1sec { check_all_set() }; diff --git a/testing/btest/broker/unpeer.zeek b/testing/btest/broker/unpeer.zeek index fd057af241..6cb3cc9a8c 100644 --- a/testing/btest/broker/unpeer.zeek +++ b/testing/btest/broker/unpeer.zeek @@ -1,3 +1,5 @@ +# @TEST-GROUP: broker +# # @TEST-PORT: BROKER_PORT # # @TEST-EXEC: btest-bg-run recv "zeek -b ../recv.zeek >recv.out" @@ -6,7 +8,7 @@ # @TEST-EXEC: btest-bg-wait 45 # @TEST-EXEC: btest-diff recv/recv.out # @TEST-EXEC: btest-diff send/send.out -# +# # @TEST-EXEC: cat recv/broker.log | awk '/Broker::STATUS/ { $5="XXX"; print; }' >recv/broker.filtered.log # @TEST-EXEC: cat send/broker.log | awk '/Broker::STATUS/ { $5="XXX"; print; }' >send/broker.filtered.log # @TEST-EXEC: btest-diff recv/broker.filtered.log diff --git a/testing/btest/core/dict-iteration-expire1.zeek b/testing/btest/core/dict-iteration-expire1.zeek new file mode 100644 index 0000000000..44cd1de79c --- /dev/null +++ b/testing/btest/core/dict-iteration-expire1.zeek @@ -0,0 +1,52 @@ +# @TEST-EXEC: zeek %INPUT +# @TEST-DOC: Regression test #2017; no output check, just shouldn't crash + +redef table_expire_interval = 0.1sec; +redef table_incremental_step = 100; +redef table_expire_delay = 0.5sec; + +redef exit_only_after_terminate = T; + +global tbl: table[string] of vector of count &default = vector() &create_expire=1sec; + +const populates_per_second = 100; +const populates_num = 100; +global done = F; + +event do_terminate() { + terminate(); +} + +event cleanup(idx: string) { + delete tbl[idx]; + + # terminate a bit after all elements will finally have been expired + if ( done && |tbl| == 0 ) + schedule 1sec { do_terminate() }; +} + +event populate(round: count) { + + local i = 0; + while (++i < populates_num) { + local val = rand(1000000); + local val_str = cat(val); + # print(fmt("round %s %s val=%s", round, i, val)); + tbl[val_str] = vector(val); + + # Schedule an explicit delete at most a second away. + local random_cleanup_delay = double_to_interval(rand(100) / 100.0); + schedule random_cleanup_delay { cleanup(val_str) }; + } + + if ( round <= 200 ) { + print(fmt("round %s size=%s", round, |tbl|)); + schedule 1sec/populates_per_second { populate(++round) }; + } + else + done = T; +} + +event zeek_init() { + event populate(1); +} diff --git a/testing/btest/core/dict-iteration-expire4.zeek b/testing/btest/core/dict-iteration-expire4.zeek new file mode 100644 index 0000000000..e23d176e06 --- /dev/null +++ b/testing/btest/core/dict-iteration-expire4.zeek @@ -0,0 +1,108 @@ +# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek --load-seeds 1.seeds -Cr - %INPUT +# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek --load-seeds 2.seeds -Cr - %INPUT +# @TEST-DOC: Regression test #2032; no output check, just shouldn't crash + +redef table_expire_delay = 0.1sec; +redef table_incremental_step = 10; +redef table_expire_interval = 0.01sec; + +# redef exit_only_after_terminate = T; + +type Key: record { + c: count; + s1: string; + s2: string; + a1: addr; + a2: addr; +}; + +global insert_many: event(n: count); +global insert_many_f: function(n: count); + +function expire(t: table[Key] of Key, k: Key): interval { + print(fmt("Expiring %s sz=%s", k, |t|)); + schedule 0.2sec { insert_many(2 * |t| + 8) }; + #insert_many_f(2 * |t| + 8); + return 0sec; +} + +global tbl: table[Key] of Key &create_expire=0.1sec &expire_func=expire; + +function make_key(i: count): Key { + return Key( + $c=i, + $s1=cat(i), + $s2=cat(2 * i), + $a1=count_to_v4_addr(1000000 + i), + $a2=count_to_v4_addr(2000000 + i) + ); +} + +event insert_many(n: count) { + local i = n; + while (++i < n + 37) { + local k = make_key(i); + tbl[k] = k; + } +} + +function insert_many_f(n: count) { + local i = n; + while (++i < n + 37) { + local k = make_key(i); + tbl[k] = k; + } +} + +event zeek_init() { + local k = make_key(1); + tbl[k] = k; +} + +@TEST-START-FILE 1.seeds +3569182667 +3864322632 +2737717875 +4292737228 +959594593 +3440781012 +1483058089 +950202215 +611472157 +2218394723 +3885890563 +1396441520 +1851988456 +3540954895 +2626085489 +3793122452 +3535210719 +936980445 +3834222442 +2355333979 +113403102 +@TEST-END-FILE + +@TEST-START-FILE 2.seeds +4013930712 +1835775324 +3393047106 +3151534432 +2727962940 +3990820447 +792628001 +3844857817 +2661636943 +2621115293 +2909873159 +3909343487 +1003041063 +1365337823 +2042927118 +3623503659 +394335333 +302877509 +348858887 +14638654 +4267481449 +@TEST-END-FILE diff --git a/testing/btest/core/dict-iteration-expire5.zeek b/testing/btest/core/dict-iteration-expire5.zeek new file mode 100644 index 0000000000..7ccbda9d32 --- /dev/null +++ b/testing/btest/core/dict-iteration-expire5.zeek @@ -0,0 +1,68 @@ +# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek --load-seeds 1.seeds -Cr - %INPUT +# @TEST-EXEC: zcat <$TRACES/echo-connections.pcap.gz | zeek --load-seeds 2.seeds -Cr - %INPUT +# @TEST-DOC: Regression test #2032; no output check, just shouldn't crash + +redef table_expire_delay = 0.0001sec; +redef table_incremental_step = 1; +redef table_expire_interval = 0.001sec; + + +function expire(t: table[conn_id] of string, k: conn_id): interval { + # print(fmt("Expiring %s sz=%s", k, |t|)); + return 0sec; +} + +global recent_conns: table[conn_id] of string &create_expire=0.05sec &expire_func=expire; + +event new_connection(c: connection) { + # print(fmt("%s %s", c$id, network_time())); + recent_conns[c$id] = c$uid; +} + +@TEST-START-FILE 1.seeds +3569182667 +3864322632 +2737717875 +4292737228 +959594593 +3440781012 +1483058089 +950202215 +611472157 +2218394723 +3885890563 +1396441520 +1851988456 +3540954895 +2626085489 +3793122452 +3535210719 +936980445 +3834222442 +2355333979 +113403102 +@TEST-END-FILE + +@TEST-START-FILE 2.seeds +4013930712 +1835775324 +3393047106 +3151534432 +2727962940 +3990820447 +792628001 +3844857817 +2661636943 +2621115293 +2909873159 +3909343487 +1003041063 +1365337823 +2042927118 +3623503659 +394335333 +302877509 +348858887 +14638654 +4267481449 +@TEST-END-FILE diff --git a/testing/btest/core/fake_dns.zeek b/testing/btest/core/fake_dns.zeek index 46dd50c5ee..dd8a88ae40 100644 --- a/testing/btest/core/fake_dns.zeek +++ b/testing/btest/core/fake_dns.zeek @@ -1,4 +1,4 @@ -# @TEST-EXEC: ZEEK_DNS_FAKE=1 zeek -b %INPUT >out +# @TEST-EXEC: ZEEK_DNS_FAKE=1 zeek -D -b %INPUT >out # @TEST-EXEC: btest-diff out redef exit_only_after_terminate = T; diff --git a/testing/btest/core/scalar-vector.zeek b/testing/btest/core/scalar-vector.zeek new file mode 100644 index 0000000000..004ef5eec1 --- /dev/null +++ b/testing/btest/core/scalar-vector.zeek @@ -0,0 +1,24 @@ +# Skip this test when using ZAM, as it will generate a hard error in addition +# to the warning. +# @TEST-REQUIRES: test "${ZEEK_ZAM}" != "1" +# +# @TEST-EXEC: zeek -b %INPUT > out 2>&1 +# @TEST-EXEC: TEST_DIFF_CANONIFIER="$SCRIPTS/diff-remove-abspath" btest-diff out + +event zeek_init() + { + const sv = vector("a", "b", "c"); + print sv == "b"; + print sv + "a"; + print "a" + sv; + + + const nv = vector(1, 2, 3); + print nv == 2; + print nv * 2; + print nv % 2; + print nv / 2; + + const also_nv = nv += 1; + print nv; + } diff --git a/testing/btest/language/cross-product-init.zeek b/testing/btest/language/cross-product-init.zeek index f5027cfd3c..ec029e9f0c 100644 --- a/testing/btest/language/cross-product-init.zeek +++ b/testing/btest/language/cross-product-init.zeek @@ -1,4 +1,4 @@ -# @TEST-EXEC: zeek -b %INPUT >output 2>&1 +# @TEST-EXEC: zeek -b -D %INPUT >output 2>&1 # @TEST-EXEC: btest-diff output global my_subs = { 1.2.3.4/19, 5.6.7.8/21 }; diff --git a/testing/btest/language/init-integration.zeek b/testing/btest/language/init-integration.zeek new file mode 100644 index 0000000000..62a9846fc3 --- /dev/null +++ b/testing/btest/language/init-integration.zeek @@ -0,0 +1,115 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff .stderr + +# A bunch of tests for the unification of global initializations and +# =/+=/-= expressions. + +# This is used just to pull in an example that works for globals, to make +# sure it works for locals. +@load base/frameworks/cluster + +# This first covers the bug that motivated the unification. + +type Key: record { + k0: string; + k1: string &optional; +}; + +global init_key = [$k0="x"]; + +# This used to crash or produce an ASAN error. +global state: table[Key] of count = { + [init_key] = 5, +}; + +global my_subnets = { 1.2.3.4/19, 5.6.7.8/21 }; + +event zeek_init() + { + print(fmt("init_key in state: %d", init_key in state)); + + # Check that the local version works. + local init_key2 = [$k0="y"]; + local state2: table[Key] of count = { [init_key2] = 6 }; + print(fmt("init_key2 in state2: %d", init_key2 in state2)); + + # Now checking that a complex initialization that works for + # globals also works for locals. + local cluster_nodes = { + ["manager-1"] = [$node_type=Cluster::MANAGER, $ip=127.0.0.1, $p=3/tcp], + ["worker-1"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=5/udp, $manager="manager-1"], + ["worker-2"] = [$node_type=Cluster::WORKER, $ip=127.0.0.1, $p=7/tcp, $manager="manager-1"], +}; + + cluster_nodes += { ["worker-3"] = [$node_type=Cluster::WORKER, $ip=1.2.3.4, $p=9/udp] }; + print cluster_nodes; + + cluster_nodes -= { ["worker-2"] = [$node_type=Cluster::MANAGER, $ip=0.0.0.0, $p=11/tcp] }; + print cluster_nodes; + + # Similar, but without type inference. + local cluster_nodes2: table[string] of Cluster::Node; + cluster_nodes2 = { ["worker-4"] = [$node_type=Cluster::WORKER, $ip=2.3.4.5, $p=13/udp] }; + + local cluster_nodes3: table[string] of Cluster::Node = { + ["worker-5"] = [$node_type=Cluster::WORKER, $ip=3.4.5.6, $p=15/tcp] + }; + + print cluster_nodes2; + cluster_nodes2 += cluster_nodes3; + print cluster_nodes2; + cluster_nodes2 -= cluster_nodes3; + cluster_nodes2 += table(["worker-6"] = Cluster::Node($node_type=Cluster::WORKER, $ip=4.5.6.7, $p=17/udp)); + print cluster_nodes2; + + # Test automatic type conversions. + local s: set[double, int]; + s += { [3, 4] }; + print s; + s -= { [3, 3] }; + print s; + s -= { [3, 4] }; + print s; + # Note, the following correctly generates a type-mismatch error + # if we use set([9, 4]) since that's a set[count, count], not + # a set[double, int]. + s += set([9.0, +4]); + print s; + + # Similar, for tables. + local t: table[double, double] of double; + t += { [3, 4] = 5 }; + print t; + t -= { [3, 3] = 9 }; + print t; + t -= { [3, 4] = 7 }; + print t; + + # Test use of sets for expansion. my_subnets needs to be a global, + # because expansion happens at compile-time. + local x: set[string, subnet]; + x += { [["foo", "bar"], my_subnets] }; + print x; + + # Test adding to patterns dynamically. + local p = /foo/; + p += /bar/; + print p; + + # Tests for vectors. + local v: vector of count; + local v2 = vector(20, 21, 22, 23); + v = { 1, 3, 5 }; + v += 9; + v += { 2, 4, 6 }; + v += v2; + print v; + + local v3: vector of vector of count; + local v4 = vector(vector(80, 81), vector(90, 91, 92)); + v3 += { vector(3,2,1), vector(1,2,3) }; + v3 += v2; + v3 += v4; + print v3; + } diff --git a/testing/btest/language/init-mismatch.zeek b/testing/btest/language/init-mismatch.zeek new file mode 100644 index 0000000000..2153ca6efd --- /dev/null +++ b/testing/btest/language/init-mismatch.zeek @@ -0,0 +1,40 @@ +# @TEST-EXEC-FAIL: zeek -b %INPUT +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr + +# Tests for various mismatches in initializations. + +global a: count = [3, 5]; +global b [4, 6]; +global c = 9; +global s1: set[double]; +global s2: set[int]; +global s3: set[count, count]; +global t: table[addr] of bool; +global t2 = { [1, 3] = F, [2, 4, 6] = T }; +global t3 = table( ["foo"] = 3, "bar" ); +global v: vector of count; +global p: pattern; +global x = { }; + +function foo() + { + local subnets = { 1.2.3.4/24, 2.3.4.5/5 }; + local my_subnets: set[string, subnet]; + my_subnets = { ["foo", subnets] }; + } + +c += { 2, 4 }; +v -= { 3, 5 }; + +s1 += s2; +s1 -= s2; + +s1 += { [3] = F }; + +s3 = { s2 }; + +p += 3; + +t += { 1.2.3.4, F }; + +print a, b; diff --git a/testing/btest/language/inlined-nested-loop.zeek b/testing/btest/language/inlined-nested-loop.zeek new file mode 100644 index 0000000000..2e116afe21 --- /dev/null +++ b/testing/btest/language/inlined-nested-loop.zeek @@ -0,0 +1,24 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +# This used to lead to an assertion failure in the ZAM compiler due to +# a bug in how it computed the lifetime of loops nested via inlining. + +function is_local(host: addr): bool + { + for ( local_net in set(10.0.0.0/8) ) + if ( host in local_net ) + return T; + return F; + } + +event zeek_init() + { + for ( host_addr in set(127.0.0.1) ) + { + if ( is_local(host_addr) ) + next; + } + + print "I compiled and ran!"; + } diff --git a/testing/btest/language/record-bad-ctor.zeek b/testing/btest/language/record-bad-ctor.zeek index 40bafa47de..49de10ebb7 100644 --- a/testing/btest/language/record-bad-ctor.zeek +++ b/testing/btest/language/record-bad-ctor.zeek @@ -5,4 +5,9 @@ global asdfasdf; const blah = [$ports=asdfasdf]; -print blah; +const x = blah; + +global asdfasdf2: port; +const blah2 = [$ports=asdfasdf2]; + +print blah, blah2; diff --git a/testing/btest/language/record-bad-ctor4.zeek b/testing/btest/language/record-bad-ctor4.zeek new file mode 100644 index 0000000000..2e944c29d2 --- /dev/null +++ b/testing/btest/language/record-bad-ctor4.zeek @@ -0,0 +1,31 @@ +# @TEST-EXEC-FAIL: zeek -b %INPUT >out 2>&1 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff out + +# Named record constructors should include values for every non-optional, +# non-aggregate field. + +type info1 : record { + str: string; + cnt: count &optional; + a: addr &optional; + + v: vector of bool; + r: record { x: count; }; + s: set[bool]; + t: table[bool] of string; +}; + +type info2 : record { + str: string; + cnt: count; + a: addr; +}; + +event zeek_init() + { + local resp1 = info1($str="hello"); + print resp1; + + local resp2 = info2($str="hello"); + print resp2; + } diff --git a/testing/btest/language/redef-same-prefixtable-idx.zeek b/testing/btest/language/redef-same-prefixtable-idx.zeek index c96af48f3e..838f3fc0ba 100644 --- a/testing/btest/language/redef-same-prefixtable-idx.zeek +++ b/testing/btest/language/redef-same-prefixtable-idx.zeek @@ -1,4 +1,5 @@ # @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr # @TEST-EXEC: btest-diff out const my_table: table[subnet] of subnet &redef; diff --git a/testing/btest/language/table-init-attrs.zeek b/testing/btest/language/table-init-attrs.zeek index 5f1e742479..5f1ec64309 100644 --- a/testing/btest/language/table-init-attrs.zeek +++ b/testing/btest/language/table-init-attrs.zeek @@ -11,7 +11,7 @@ redef my_set_ctor_init += { "test3", }; -redef my_set_ctor_init += set("test4"); +redef my_set_ctor_init += { "test4" }; const my_table_ctor_init: table[count] of string = table([1] = "test1") &redef &default="nope"; @@ -29,7 +29,7 @@ redef my_set_init += { "test3", }; -redef my_set_init += set("test4"); +redef my_set_init += { "test4" }; const my_table_init: table[count] of string = { [1] = "test1" } &redef &default="nope"; @@ -38,7 +38,7 @@ redef my_table_init += { [3] = "test3", }; -redef my_table_init += table([4] = "test4"); +redef my_table_init += { [4] = "test4" }; # For tables that yield tables, we can apply attributes to the both other and # inner tables... diff --git a/testing/btest/language/table-init.zeek b/testing/btest/language/table-init.zeek index 0a2514e0b9..7c0a51d861 100644 --- a/testing/btest/language/table-init.zeek +++ b/testing/btest/language/table-init.zeek @@ -17,4 +17,7 @@ event zeek_init() print global_table[0]; print local_table; print local_table[0]; + + # Catch regression where this used to crash. + print table() &default=record($crash=F); } diff --git a/testing/btest/language/table-type-checking.zeek b/testing/btest/language/table-type-checking.zeek index faefaf3a60..7df9c63082 100644 --- a/testing/btest/language/table-type-checking.zeek +++ b/testing/btest/language/table-type-checking.zeek @@ -3,6 +3,9 @@ type MyTable: table[port] of count; +# global, type deduction, empty ctor. +global gempty = table(); + # global, type deduction, named ctor global gdn = MyTable(["zero"] = 0); # type clash in init @@ -44,3 +47,9 @@ event zeek_init() { local lea: MyTable = table(["thousand-three"] = 1003); # type clash } + +# local, type explicit, empty ctor has incompatible &default +event zeek_init() + { + local lei: MyTable = table() &default="foo"; # type clash + } diff --git a/testing/btest/language/type-check-operator-in.zeek b/testing/btest/language/type-check-operator-in.zeek index b0f0430eb6..c7121b3725 100644 --- a/testing/btest/language/type-check-operator-in.zeek +++ b/testing/btest/language/type-check-operator-in.zeek @@ -15,26 +15,26 @@ local string_records: set[string, MyRec] = set(); local record_strings: set[MyRec, string] = set(); # These are all valid. -["asdf"] in strings; -["hi", 0] in string_counts; -myrec in records; -[myrec] in records; -MyRec() in records; -[$a = 2] in records; -[MyRec()] in records; -[[$a = 2]] in records; -["hi", myrec] in string_records; +print ["asdf"] in strings; +print ["hi", 0] in string_counts; +print myrec in records; +print [myrec] in records; +print MyRec() in records; +print [$a = 2] in records; +print [MyRec()] in records; +print [[$a = 2]] in records; +print ["hi", myrec] in string_records; # All below should fail type-checking. -myrec in "asdf"; -myrec in string_records; -myrec in record_strings; +print myrec in "asdf"; +print myrec in string_records; +print myrec in record_strings; # Patterns do not apply transparently to collections of strings, so fail # to type-check too: -/foo/ in strings; +print /foo/ in strings; # Complex index types need to match, too. (For tests with matching types, # see set.zeek / table.zeek.) @@ -43,5 +43,5 @@ local table_set: set[table[string] of string] = set(); local stringvec_set: set[vector of string] = set(); local string_count_map: table[string] of count = table(); -string_count_map in table_set; -vector(1, 2, 3) in stringvec_set; +print string_count_map in table_set; +print vector(1, 2, 3) in stringvec_set; diff --git a/testing/btest/language/vector-slice-assign.zeek b/testing/btest/language/vector-slice-assign.zeek new file mode 100644 index 0000000000..dcf4b5537f --- /dev/null +++ b/testing/btest/language/vector-slice-assign.zeek @@ -0,0 +1,24 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +type testrec: record { + seq: count; +}; + +function make_recs(seqs: vector of count): vector of testrec +{ + local r: vector of testrec; + for (i in seqs) + r += testrec($seq=seqs[i]); + return r; +} + +event zeek_init() +{ + local seqs: vector of count = {1, 2, 3, 4, 5, 6, 7}; + local v = make_recs(seqs); + local tmp = v[0:2]; + v[0:2] = v[3:5]; + v[3:5] = tmp; + print v; +} diff --git a/testing/btest/language/vector-unspecified.zeek b/testing/btest/language/vector-unspecified.zeek index d0898b5d42..e28278ca48 100644 --- a/testing/btest/language/vector-unspecified.zeek +++ b/testing/btest/language/vector-unspecified.zeek @@ -1,7 +1,9 @@ -# @TEST-EXEC: zeek -b %INPUT >output 2>&1 -# @TEST-EXEC: btest-diff output +# @TEST-EXEC-FAIL: zeek -b %INPUT >output 2>&1 +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff output -# Test assignment behavior of unspecified vectors +# Test assignment behavior of unspecified vectors. This used to treat +# "a" as a vector-of-any, but that seems dangerous - if the user really +# wants that behavior, they can explicitly type it as such. local a = vector(); a[0] = 5; diff --git a/testing/btest/plugins/binpac-flowbuffer-frame-length-plugin/src/FOO.cc b/testing/btest/plugins/binpac-flowbuffer-frame-length-plugin/src/FOO.cc index 3eea2c8121..743f530665 100644 --- a/testing/btest/plugins/binpac-flowbuffer-frame-length-plugin/src/FOO.cc +++ b/testing/btest/plugins/binpac-flowbuffer-frame-length-plugin/src/FOO.cc @@ -38,8 +38,7 @@ void FOO_Analyzer::DeliverStream(int len, const u_char* data, bool orig) { tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - if ( TCP()->IsPartial() ) + if ( TCP() && TCP()->IsPartial() ) return; if ( had_gap ) diff --git a/testing/btest/plugins/plugin-load-file-extended.zeek b/testing/btest/plugins/plugin-load-file-extended.zeek index 0d3d297c5a..2550363fd3 100644 --- a/testing/btest/plugins/plugin-load-file-extended.zeek +++ b/testing/btest/plugins/plugin-load-file-extended.zeek @@ -1,7 +1,7 @@ # @TEST-EXEC: ${DIST}/auxil/zeek-aux/plugin-support/init-plugin -u . Testing LoadFileExtended # @TEST-EXEC: cp -r %DIR/plugin-load-file-extended/* . # @TEST-EXEC: ./configure --zeek-dist=${DIST} && make -# @TEST-EXEC: ZEEK_PLUGIN_PATH=$(pwd) zeek -r $TRACES/wikipedia.trace -b Testing::LoadFileExtended xxx yyy -s abc.sig >> output +# @TEST-EXEC: ZEEK_PLUGIN_PATH=$(pwd) zeek -r $TRACES/wikipedia.trace -b Testing::LoadFileExtended xxx xxx2 yyy -s abc.sig >> output # @TEST-EXEC: btest-diff output # @TEST-START-FILE xxx.zeek @@ -12,6 +12,20 @@ event zeek_init() { # @TEST-END-FILE +# @TEST-START-FILE xxx2.zeek +# Test loading from script land. +@load xxx3 +@load-sigs def.sig +# @TEST-END-FILE + +# @TEST-START-FILE xxx3.zeek +# empty +# @TEST-END-FILE + # @TEST-START-FILE abc.sig # empty # @TEST-END-FILE + +# @TEST-START-FILE def.sig +# empty +# @TEST-END-FILE diff --git a/testing/btest/plugins/plugin-load-file-extended/src/Plugin.cc b/testing/btest/plugins/plugin-load-file-extended/src/Plugin.cc index a86d4f50f2..2785b30c9e 100644 --- a/testing/btest/plugins/plugin-load-file-extended/src/Plugin.cc +++ b/testing/btest/plugins/plugin-load-file-extended/src/Plugin.cc @@ -26,9 +26,15 @@ std::pair> Plugin::HookLoadFileExtended(const Lo const std::string& file, const std::string& resolved) { + // Zeek implicitly provides the location where the current '@load' + // originated. If no location is available, filename will be a nullptr. + auto src = zeek::detail::GetCurrentLocation().filename; + if ( ! src ) + src = "n/a"; + if ( type == LoadType::SCRIPT && file == "xxx" ) { - printf("HookLoadExtended/script: file=|%s| resolved=|%s|\n", file.c_str(), resolved.c_str()); + printf("HookLoadExtended/script: file=|%s| resolved=|%s| srcloc=|%s|\n", file.c_str(), resolved.c_str(), src); return std::make_pair(1, R"( event zeek_init() { @@ -41,9 +47,16 @@ std::pair> Plugin::HookLoadFileExtended(const Lo )"); } + if ( type == LoadType::SCRIPT && file == "xxx3" ) + { + printf("HookLoadExtended/script: file=|%s| resolved=|%s| srcloc=|%s|\n", file.c_str(), resolved.c_str(), src); + // We don't replace this one. + return std::make_pair(-1, std::nullopt); + } + if ( type == LoadType::SCRIPT && file == "yyy" ) { - printf("HookLoadExtended/script: file=|%s| resolved=|%s|\n", file.c_str(), resolved.c_str()); + printf("HookLoadExtended/script: file=|%s| resolved=|%s| srcloc=|%s|\n", file.c_str(), resolved.c_str(), src); return std::make_pair(1, R"( event zeek_init() { @@ -54,7 +67,7 @@ std::pair> Plugin::HookLoadFileExtended(const Lo if ( type == LoadType::SIGNATURES && file == "abc.sig" ) { - printf("HookLoadExtended/signature: file=|%s| resolved=|%s|\n", file.c_str(), resolved.c_str()); + printf("HookLoadExtended/signature: file=|%s| resolved=|%s| srcloc=|%s|\n", file.c_str(), resolved.c_str(), src); return std::make_pair(1, R"( signature my-sig { @@ -65,6 +78,13 @@ std::pair> Plugin::HookLoadFileExtended(const Lo )"); } + if ( type == LoadType::SIGNATURES && file == "def.sig" ) + { + printf("HookLoadExtended/signature: file=|%s| resolved=|%s| srcloc=|%s|\n", file.c_str(), resolved.c_str(), src); + // We don't replace this one. + return std::make_pair(-1, std::nullopt); + } + return std::make_pair(-1, std::nullopt); } diff --git a/testing/btest/plugins/protocol-plugin/src/Foo.cc b/testing/btest/plugins/protocol-plugin/src/Foo.cc index d1cf72196e..aef658e7ec 100644 --- a/testing/btest/plugins/protocol-plugin/src/Foo.cc +++ b/testing/btest/plugins/protocol-plugin/src/Foo.cc @@ -36,10 +36,7 @@ void Foo::DeliverStream(int len, const u_char* data, bool orig) { zeek::analyzer::tcp::TCP_ApplicationAnalyzer::DeliverStream(len, data, orig); - assert(TCP()); - - if ( TCP()->IsPartial() ) - // punt on partial. + if ( TCP() && TCP()->IsPartial() ) return; try diff --git a/testing/btest/plugins/reporter-hook.zeek b/testing/btest/plugins/reporter-hook.zeek index 92a971eca8..4407a1013f 100644 --- a/testing/btest/plugins/reporter-hook.zeek +++ b/testing/btest/plugins/reporter-hook.zeek @@ -1,7 +1,8 @@ # @TEST-EXEC: ${DIST}/auxil/zeek-aux/plugin-support/init-plugin -u . Reporter Hook # @TEST-EXEC: cp -r %DIR/reporter-hook-plugin/* . # @TEST-EXEC: ./configure --zeek-dist=${DIST} && make -# @TEST-EXEC: ZEEK_PLUGIN_ACTIVATE="Reporter::Hook" ZEEK_PLUGIN_PATH=`pwd` zeek -b %INPUT 2>&1 | $SCRIPTS/diff-remove-abspath | sort | uniq >output +# @TEST-EXEC: ZEEK_PLUGIN_ACTIVATE="Reporter::Hook" ZEEK_PLUGIN_PATH=`pwd` zeek -NN Reporter::Hook >output +# @TEST-EXEC: ZEEK_PLUGIN_ACTIVATE="Reporter::Hook" ZEEK_PLUGIN_PATH=`pwd` zeek -b %INPUT 2>&1 | $SCRIPTS/diff-remove-abspath | sort | uniq >>output # @TEST-EXEC: btest-diff output # @TEST-EXEC: TEST_DIFF_CANONIFIER="$SCRIPTS/diff-remove-abspath | $SCRIPTS/diff-remove-timestamps" btest-diff reporter.log diff --git a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.zeek b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.zeek index 1346961395..3965214db4 100644 --- a/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.zeek +++ b/testing/btest/scripts/base/frameworks/intel/read-file-dist-cluster.zeek @@ -7,7 +7,7 @@ # @TEST-EXEC: btest-bg-run worker-2 ZEEKPATH=$ZEEKPATH:.. CLUSTER_NODE=worker-2 zeek -b %INPUT # @TEST-EXEC: btest-bg-wait 40 # @TEST-EXEC: btest-diff manager-1/.stdout -# @TEST-EXEC: btest-diff manager-1/intel.log +# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-timestamps-and-sort btest-diff manager-1/intel.log # @TEST-EXEC: btest-diff worker-1/.stdout # @TEST-EXEC: btest-diff worker-2/.stdout diff --git a/testing/btest/scripts/base/frameworks/software/version-parsing.zeek b/testing/btest/scripts/base/frameworks/software/version-parsing.zeek index 5730348c3d..852ae980eb 100644 --- a/testing/btest/scripts/base/frameworks/software/version-parsing.zeek +++ b/testing/btest/scripts/base/frameworks/software/version-parsing.zeek @@ -35,7 +35,7 @@ global matched_software: table[string] of Software::Description = { ["curl/7.15.1 (i486-pc-linux-gnu) libcurl/7.15.1 OpenSSL/0.9.8a zlib/1.2.3 libidn/0.5.18"] = [$name="curl", $version=[$major=7,$minor=15,$minor2=1,$addl="i486-pc-linux-gnu"], $unparsed_version=""], ["Apache"] = - [$name="Apache", $unparsed_version=""], + [$name="Apache", $version=[], $unparsed_version=""], ["Zope/(Zope 2.7.8-final, python 2.3.5, darwin) ZServer/1.1 Plone/Unknown"] = [$name="Zope/(Zope", $version=[$major=2,$minor=7,$minor2=8,$addl="final"], $unparsed_version=""], ["The Bat! (v2.00.9) Personal"] = diff --git a/testing/btest/scripts/base/protocols/ssl/dpd.test b/testing/btest/scripts/base/protocols/ssl/dpd.test index f7f76a6e1a..4a23aeb0d2 100644 --- a/testing/btest/scripts/base/protocols/ssl/dpd.test +++ b/testing/btest/scripts/base/protocols/ssl/dpd.test @@ -8,6 +8,7 @@ @load base/frameworks/dpd @load base/frameworks/signatures @load-sigs base/protocols/ssl/dpd.sig +@load-sigs policy/protocols/ssl/dpd-v2.sig event zeek_init() { diff --git a/testing/btest/scripts/policy/frameworks/cluster/controller/agent-checkin.zeek b/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek similarity index 64% rename from testing/btest/scripts/policy/frameworks/cluster/controller/agent-checkin.zeek rename to testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek index cf1304e3f3..ecdc3b17ed 100644 --- a/testing/btest/scripts/policy/frameworks/cluster/controller/agent-checkin.zeek +++ b/testing/btest/scripts/policy/frameworks/management/controller/agent-checkin.zeek @@ -1,7 +1,7 @@ -# This test verifies basic agent-controller communication. We launch agent and -# controller via the supervisor, add an extra handler for the notify_agent_hello -# event that travels agent -> controller, and verify its print output in the -# controller's stdout log. +# This test verifies basic agent-controller communication in the Management +# framework. We launch agent and controller via the supervisor, add an extra +# handler for the notify_agent_hello event that travels agent -> controller, and +# verify its print output in the controller's stdout log. # The following env vars is known to the controller framework # @TEST-PORT: ZEEK_CONTROLLER_PORT @@ -12,20 +12,20 @@ # @TEST-EXEC: btest-bg-wait 10 # @TEST-EXEC: btest-diff zeek/controller.stdout -@load policy/frameworks/cluster/agent -@load policy/frameworks/cluster/controller +@load policy/frameworks/management/agent +@load policy/frameworks/management/controller redef Broker::default_port = to_port(getenv("BROKER_PORT")); -redef ClusterController::name = "controller"; -redef ClusterAgent::name = "agent"; +redef Management::Controller::name = "controller"; +redef Management::Agent::name = "agent"; # Tell the agent where to locate the controller. -redef ClusterAgent::controller = [$address="127.0.0.1", $bound_port=to_port(getenv("ZEEK_CONTROLLER_PORT"))]; +redef Management::Agent::controller = [$address="127.0.0.1", $bound_port=to_port(getenv("ZEEK_CONTROLLER_PORT"))]; @if ( Supervisor::is_supervised() ) -@load policy/frameworks/cluster/agent/api +@load policy/frameworks/management/agent/api global logged = F; @@ -41,7 +41,7 @@ event zeek_init() } } -event ClusterAgent::API::notify_agent_hello(instance: string, host: addr, api_version: count) +event Management::Agent::API::notify_agent_hello(instance: string, host: addr, api_version: count) { if ( Supervisor::node()$name == "controller" ) { diff --git a/testing/btest/scripts/policy/protocols/ssl/decryption-keylog.zeek b/testing/btest/scripts/policy/protocols/ssl/decryption-keylog.zeek new file mode 100644 index 0000000000..70e2baf5c4 --- /dev/null +++ b/testing/btest/scripts/policy/protocols/ssl/decryption-keylog.zeek @@ -0,0 +1,42 @@ +# @TEST-REQUIRES: grep -q "#define OPENSSL_HAVE_KDF_H" $BUILD/zeek-config.h + +# @TEST-EXEC: ZEEK_TLS_KEYLOG_FILE=keylogfile.log zeek -B dpd -C -r $TRACES/tls/tls-1.2-stream-keylog.pcap %INPUT + +@TEST-START-FILE keylogfile.log +#fields client_random secret +\x0e\x78\x2d\x35\x63\x95\x5d\x8a\x30\xa9\xcf\xb6\x4f\x47\xf3\x96\x34\x8a\x1e\x79\x1a\xa2\x32\x55\xe2\x2f\xc5\x7a \x34\x4f\x12\x65\xbf\x43\x40\xb3\x61\x6b\xa0\x16\x5d\x2b\x4d\xb9\xb1\xe8\x4a\x3d\xa2\x42\x0e\x38\xab\x01\x50\x62\x84\xcc\x34\xcd\xe0\x34\x10\xfe\x1a\x02\x30\x49\x74\x6c\x46\x43\xa7\x0c\x67\x0d +\x24\x8c\x7e\x24\xee\xfb\x13\xcd\xee\xde\xb1\xf4\xb6\xd6\xd5\xee\x67\x8d\xd3\xff\xc7\xe7\x39\x23\x18\x3f\x99\xb4 \xe7\xed\x24\x26\x0d\x25\xd9\xfd\xf5\x0f\xc0\xf4\x56\x51\x0e\x4e\xec\x7f\x58\x9c\xaf\x39\x25\x14\x16\xa6\x71\xdd\xea\xfe\xe9\xc0\x93\xbe\x89\x4c\xab\xcc\xff\xb2\xf0\x9a\xea\x98\xf5\xb2\x53\x1e +\x57\xd7\xc7\x7a\x2d\x5e\x35\x29\x2c\xd7\xe7\x94\xee\xf8\x6f\x31\x45\xf6\xbe\x25\x08\xed\x1d\x92\xd2\x0b\x9b\x04 \xc1\x93\x17\x93\xd9\x7d\xd2\x98\xb3\xe0\xdb\x2c\x5d\xbe\x71\x31\xa7\x9a\xf5\x91\xf9\x87\x90\xee\xb7\x79\x9f\x6b\xb4\x1f\x47\xa7\x69\x62\x4b\xa3\x99\x0c\xa9\x43\xf9\xea\x3b\x4d\x5f\x2f\xfe\xfb +\x30\xd7\xb8\x92\xc1\xec\x17\x90\x5b\x0f\xcb\xda\xe6\x42\xb2\x09\x4c\xdd\x7d\x2e\xa1\x9f\x1a\x3b\x70\x23\x7d\xf2 \xc1\x93\x17\x93\xd9\x7d\xd2\x98\xb3\xe0\xdb\x2c\x5d\xbe\x71\x31\xa7\x9a\xf5\x91\xf9\x87\x90\xee\xb7\x79\x9f\x6b\xb4\x1f\x47\xa7\x69\x62\x4b\xa3\x99\x0c\xa9\x43\xf9\xea\x3b\x4d\x5f\x2f\xfe\xfb +\x49\xc7\x71\x25\xdc\xb0\xa7\xbc\xd6\xb6\x67\x5c\x30\x58\x8d\xad\x47\x5a\x93\x60\xac\xa5\x78\xf5\x62\x7e\xff\x62 \xc1\x93\x17\x93\xd9\x7d\xd2\x98\xb3\xe0\xdb\x2c\x5d\xbe\x71\x31\xa7\x9a\xf5\x91\xf9\x87\x90\xee\xb7\x79\x9f\x6b\xb4\x1f\x47\xa7\x69\x62\x4b\xa3\x99\x0c\xa9\x43\xf9\xea\x3b\x4d\x5f\x2f\xfe\xfb +\x38\x1c\x49\xcc\xf9\x62\xd0\x5c\xf0\xd4\xe2\xd5\xa1\x15\xc1\x5e\x8d\x02\xcc\x50\xed\x6c\x90\x63\x73\x9d\xfb\x96 \xdc\xf5\xfc\x10\xf2\xb3\x8b\xd8\x87\xae\xcf\xb5\xcd\x1a\xe3\xa8\x06\x8e\x85\xfc\xbb\xfc\x22\xec\x0f\x79\x99\x04\x13\x5b\x6b\x03\x52\x02\xee\xe9\x04\x59\x78\x44\xf1\xf3\xc8\xac\x22\x68\x6c\x7e +\x61\x9e\x08\x51\xee\x36\x3c\x2c\xf3\x71\x87\x22\x82\x27\xca\x4e\x68\x0f\x9a\x7c\x0b\xd1\x50\x69\xaa\x7a\x59\x70 \xad\x03\xce\xda\x48\x90\xfa\x58\x1e\x98\x9f\x5e\x38\x62\x02\x3e\x2a\x4e\x3e\x8a\xd8\x13\x25\x23\x8d\x90\x80\x66\xe1\xd3\x5c\xc8\x75\x97\x9e\x34\xc0\x8e\x6f\xdf\xd9\xd8\xc6\xf3\x56\xe3\x85\xc1 +\xcb\x3f\x93\xd2\x55\xcb\xb6\x56\x25\x87\xf0\xdd\x01\x02\x12\xfd\xee\x9d\x23\x3a\xff\x64\xe6\xed\x36\xcd\x5c\x45 \x0d\x36\xfa\xaa\x2e\xad\xbd\xa2\xa8\x09\x5f\x95\x1d\xe1\xcb\xac\x46\xb8\x1b\x00\x8f\xbf\x39\x1d\x91\x95\x1b\x34\x85\x47\x6b\xab\x73\x28\x8a\x1e\x17\xcd\x0c\xe8\x0e\x0f\xc0\x40\x1d\xbe\x9e\x3f +\xf9\x7e\x7d\x38\x56\xe2\xfc\xcb\xbe\x80\x79\x8e\xc2\xe3\xf5\x15\x25\x10\x82\xad\x63\xbb\xc7\xc2\x31\xd8\xbe\xe0 \x9a\x7c\xf9\x46\xa0\x47\x18\xa1\x9f\x4d\x20\xc3\xf8\x0c\x1c\xf8\xc8\x23\xc3\xe2\xb1\xc3\x37\xef\x64\x32\x2d\x75\x1b\x41\x05\x43\x31\x5f\x6e\xcf\x7d\xbf\x45\xec\x9b\xe1\x94\xa3\xcc\x7c\x1a\x0f +\x57\x97\x63\x67\xf2\xea\x9c\x95\x46\x7a\x6c\xc5\x59\xda\x6f\xeb\xbc\x44\x2e\x11\x3a\xc5\xea\xa7\xed\x97\xad\x38 \x0e\x5e\xc0\x6c\xa5\x4e\xe3\x86\x05\x5a\xaa\x97\x1c\x7e\x09\x39\xba\x3e\x1f\xb1\x62\x4d\x0a\x5b\x9c\x0c\xae\x97\x5f\x0e\x25\xbc\x4c\x51\x21\xfa\x34\x5e\xa1\x26\x47\xc4\x7a\x5a\x1c\xe5\xbd\xce +\x70\x18\x17\x27\xd6\xe2\x04\xd1\xd8\xa5\xb8\x2a\x05\x01\xaf\x7b\x13\x6d\x3a\x9c\x56\x6c\x32\x5b\x3f\xef\xb5\x04 \x92\x3d\x8a\x93\xba\xc5\x54\xc1\x04\x9a\x8d\xeb\x63\x28\x8c\xd7\x4d\x60\x51\xb0\x7a\x10\x67\x84\x8d\xac\x15\xc8\x75\xf2\x5c\x2a\x60\xe3\x38\xde\xb3\x27\x37\x44\xb1\x53\xe6\x9d\x42\x06\x0e\x18 +\x4f\x12\x67\xb1\x13\xdc\x1a\x3e\x5d\xee\xbf\xff\xa7\x4d\xaa\xa1\x96\xff\x43\x0a\x30\xbe\x04\x07\x60\x29\x5f\x5e \x1d\x61\x52\xa6\x1e\x86\x75\x53\x04\xb8\x8e\x12\x6f\xdb\xa4\x49\x05\xeb\x5e\x4b\x33\xf6\xaf\xee\x67\x20\x37\xfd\x84\x48\x9a\xaa\x62\xa6\xb2\x64\x0f\x62\x87\x12\xe8\x05\x98\xae\x0c\xbf\xae\x5f +\xfe\x13\x61\x60\x80\x41\x0b\x9d\xc2\xcc\xc2\xc3\x00\xab\x20\x6b\xb8\x43\xc4\xc4\x22\x81\x1f\x15\xd4\xed\x34\xc3 \x39\xfb\x4d\x9c\x1d\xff\x4d\xe4\x1c\x86\xf9\x67\x9b\x32\xca\xa3\x99\x9c\x91\xcd\x7a\xf5\x4d\xc5\x58\x98\x1c\xcf\xf6\xd9\xa7\x4c\x92\x6e\x93\x7f\x98\x02\x96\x22\x20\x52\x5e\x9d\xe0\xec\x4a\xc1 +\x92\xc2\x33\xdd\xf3\xf4\x31\xd6\x0c\x9b\x90\x86\x6a\xde\x5d\x80\x32\x22\xb8\x18\x45\xf5\x11\x72\xa0\x4f\xe9\x65 \xda\x22\x06\x86\xef\x25\x99\xb4\x65\x2c\x45\x94\x73\xcd\xe9\xc6\x64\x55\x84\x21\x42\x35\x86\x57\x9a\x60\xd4\xc7\x88\xd8\x1b\x3a\xbe\xdf\x53\x7b\xd7\x9c\xf9\x29\x47\x05\x07\x0f\x23\x3b\x22\xc4 +\x39\x8e\xeb\xdf\x69\xd9\xe3\xe2\xce\xd8\xe9\xb2\x93\xa6\xb7\x58\x30\x9b\xaf\x14\x98\xbd\x27\xa0\xe1\x12\x54\x3f \xa9\xcc\x51\xa6\x83\xf1\xbb\x6b\x37\xf0\xe2\x8b\xa5\xea\x31\xc8\xdc\x19\x5e\xb1\xaf\xa0\x5c\x51\xa1\x4a\x73\x22\xc0\x24\xf1\x41\x4a\xd9\x15\x16\xa8\x83\x38\x84\xe1\xca\x9d\xf0\xd5\x35\x40\x73 +\xdc\xf5\x87\xb0\x6d\x66\xd6\xab\x66\x34\xd7\x64\xc8\x51\xa1\x22\xe3\x97\x3d\x4b\x16\xee\x8e\x1e\x0b\xfb\xfc\x13 \xd5\xaf\x0d\xed\x74\x58\x8d\xe8\x97\x6d\xa0\xb2\x46\x83\x58\x0f\x52\xbc\xc7\x66\xb1\x19\x74\x70\x0d\xaa\xd1\x10\x9b\x71\x53\xe6\x80\x34\x5d\x81\xd2\x86\x8a\x33\xfc\x62\x88\xa7\x80\xac\x63\xb6 +\x51\xcb\xcc\x61\xae\xd0\xeb\x08\x75\x09\xde\x68\x3c\x36\x03\xf5\xa3\xd5\xa5\x15\xdc\x3e\x87\xdb\xcf\xc7\x7a\x1e \x25\x90\xa9\x7e\x5a\x93\xe9\xdd\x61\x6c\x46\xf2\xf6\x03\x7c\x19\xb1\xf5\x9a\x4a\x6c\x58\x71\x8e\xfe\xa4\xfe\xa6\x30\x70\x5f\xaf\xd4\xf9\xb9\x3a\x16\xa8\x0f\x69\x8d\x29\xfb\x1a\x34\x62\x87\x36 +\x01\x01\x12\xfb\x01\x61\xc6\xcd\xde\xdd\x2a\x9b\x2a\x2f\x02\x65\xa5\x0f\x62\xb1\x1b\x26\xd3\xa2\x69\x78\xe0\x17 \x8a\x67\x2f\xc6\xc1\x75\xed\xb9\x2f\x8c\xb5\x3d\xdc\x56\xb4\x3e\xab\x11\xa7\xb6\xff\x32\x47\x7b\x9c\x9c\x32\xe9\xbe\xa6\xb1\xed\xe1\x29\x7e\x4b\x89\xb7\xb0\xd6\x21\xc1\xda\x5c\x90\x70\x1b\xe4 +\x7a\xf0\xf4\x6e\x91\x8e\x38\x51\xfd\xd6\x42\xfb\x3e\x9b\x78\x29\x49\x3f\x78\x19\xd6\x2b\x61\xd5\x8b\xad\xfd\x70 \x78\xd8\x68\x51\x05\xc5\x3c\xeb\xcd\x22\xe0\x2e\x4b\x6f\xae\x53\x3f\xe8\x23\x73\xeb\xeb\x1b\xb2\x9a\x76\xca\x65\x01\x16\xa2\x97\x93\x60\xd5\x5d\xd4\xac\x52\x22\x16\x40\x15\x03\xb6\x23\xc1\xac +\x31\x15\xDD\x9D\x68\x19\xB3\xBF\x45\x32\x99\x74\x0D\x04\xAE\x37\xAD\x69\xE5\x23\x4C\xD5\x40\xF8\xB5\x89\x4B\xA4 \x7C\x57\xC5\x98\xCD\x00\xE0\x0F\x55\x48\x6A\xF0\x02\x4E\x84\xB7\xAE\x07\xB5\xCD\xB1\x1E\x17\x2D\x24\xF0\xB3\xB3\xB8\x4B\x54\x4A\x82\x84\x15\xAD\x52\x24\x52\xBB\x34\x0D\x95\x30\x45\x3E\x15\x14 +\x07\xDF\x9C\xC1\x59\xB6\x42\x8E\x57\x84\xED\xB1\x60\x37\xF3\x24\x2F\x70\x27\x5D\x07\xC4\xA8\xB9\xF0\xA7\xA6\x7F \x13\x9C\x33\x7E\x5C\x4E\x23\x5F\xCB\xFF\xD0\xD0\x54\x38\x0E\x04\x46\x2E\x6C\x8D\x51\x52\xEE\xAD\x79\x3F\x07\xA8\xCD\x18\x7D\x99\x99\x82\x1F\xA1\x51\xE2\xF6\xD4\x3F\x7B\x5C\x8A\xFE\x83\x6F\x4F +@TEST-END-FILE + +@load protocols/ssl/decryption +@load base/protocols/http + +event zeek_init() + { + suspend_processing(); + } + +event Input::end_of_data(name: string, source: string) + { + if ( name == "tls-keylog-file" ) + continue_processing(); + } diff --git a/testing/btest/scripts/policy/protocols/ssl/decryption.zeek b/testing/btest/scripts/policy/protocols/ssl/decryption.zeek new file mode 100644 index 0000000000..71dea5e41d --- /dev/null +++ b/testing/btest/scripts/policy/protocols/ssl/decryption.zeek @@ -0,0 +1,13 @@ +# @TEST-REQUIRES: grep -q "#define OPENSSL_HAVE_KDF_H" $BUILD/zeek-config.h + +# @TEST-EXEC: zeek -B dpd -C -r $TRACES/tls/tls12-decryption.pcap %INPUT +# @TEST-EXEC: btest-diff http.log + +@load protocols/ssl/decryption +@load base/protocols/http + +module SSL; + +redef SSL::secrets += { +["\xb4\x0a\x24\x4b\x48\xe4\x2e\xac\x28\x71\x44\xb1\xb7\x39\x30\x57\xca\xa1\x31\xf9\x61\xa7\x8e\x38\xb0\xe7\x7c\x1e"] = "\xbd\x01\xe5\x89\xd1\x05\x19\x9e\x9a\xb5\xfc\x9b\xd7\x58\xb5\xf2\x88\xdb\x28\xfd\x80\xaa\x02\x26\x1e\x47\x65\xac\x13\x57\xd0\x07\xfd\x08\xc7\xbd\xab\x45\x45\x0e\x01\x5a\x01\xd0\x8e\x5e\x7c\xa6", +}; diff --git a/testing/external/commit-hash.zeek-testing-cluster b/testing/external/commit-hash.zeek-testing-cluster index 93a321a305..f8de34839f 100644 --- a/testing/external/commit-hash.zeek-testing-cluster +++ b/testing/external/commit-hash.zeek-testing-cluster @@ -1 +1 @@ -a2b3414ff6cf7cc3141c11849519c58fe15727ec +1b515f3f60abed5c505a970cae380560ce6304c1 diff --git a/testing/external/commit-hash.zeek-testing-private b/testing/external/commit-hash.zeek-testing-private index 78c1099b34..569fc23187 100644 --- a/testing/external/commit-hash.zeek-testing-private +++ b/testing/external/commit-hash.zeek-testing-private @@ -1 +1 @@ -7c40cc2c3709fc54e5c75c119d1d01ed8a3ceb93 +76a9ffd27c15ff1603216ee77f59cc9d515747c6 diff --git a/testing/scripts/diff-remove-timestamps-and-sort b/testing/scripts/diff-remove-timestamps-and-sort new file mode 100755 index 0000000000..9726a8e331 --- /dev/null +++ b/testing/scripts/diff-remove-timestamps-and-sort @@ -0,0 +1,5 @@ +#! /usr/bin/env bash + +scripts=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) + +$scripts/diff-remove-timestamps | $scripts/diff-sort diff --git a/zeek-config.h.in b/zeek-config.h.in index ff5edc8c8b..dad7242488 100644 --- a/zeek-config.h.in +++ b/zeek-config.h.in @@ -83,6 +83,9 @@ /* Compatibility for Darwin */ #cmakedefine NEED_NAMESER_COMPAT_H +/* openssl/kdf.h for TLS PRF (key derivation) */ +#cmakedefine OPENSSL_HAVE_KDF_H + /* d2i_x509 uses const char** */ #cmakedefine OPENSSL_D2I_X509_USES_CONST_CHAR diff --git a/zeek-config.in b/zeek-config.in index 20c3e22475..a25449ff04 100755 --- a/zeek-config.in +++ b/zeek-config.in @@ -65,7 +65,7 @@ Toplevel installation directories for third-party components: --binpac_root BinPAC compiler --broker_root Broker communication framework - --caf_root C++ Actor Framework + --caf_root C++ Actor Framework (deprecated, will be removed in 5.1) " } @@ -100,7 +100,7 @@ while [ $# -ne 0 ]; do echo $build_type ;; --caf_root) - echo $caf_root + echo "The caf_root option is deprecated and will be removed in 5.1. The Broker API has been updated to no longer require access to CAF to build against Broker." ;; --cmake_dir) echo $cmake_dir