From 56b9a79a653c0e52c7734c6c5f53186930debb6a Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 25 Apr 2024 10:14:20 +0200 Subject: [PATCH] Spicy: Query Zeek scriptland for file handles. Like traditional file analyzers, we now query Zeek's `get_file_handle()` event for handles when a connection begins analyzing an embedded file. That means that Spicy-side protocol analyzers that are forwarding data into file analysis now need to call Zeek's `Files::register_protocol()` and provide a callback for computing file handles. If that's missing, Zeek will now issue a warning. This aligns with the requirements Zeek's traditional protocol analyzers. (If the EVT file defines a protocol analyzer to `replace` an existing one, that one's `register_protocol()` will be consulted.) Because Zeek's `get_file_handle()` event requires a current connection, if a Spicy file analyzer isn't directly part of a connection context (e.g., with nested files), we continue to use hardcoded, built-in file handle. Scriptland won't be consulted in that case, just like before. Closes #3440. --- NEWS | 7 +++++ auxil/binpac | 2 +- auxil/broker | 2 +- auxil/gen-zam | 2 +- auxil/spicy | 2 +- doc | 2 +- src/file_analysis/Manager.h | 26 +++++++++---------- src/spicy/runtime-support.cc | 18 +++++++++++-- .../spicy.file-analysis-data-in/output-1 | 6 +++-- .../spicy.file-data-in-at-offset/output | 4 +-- .../file-analysis-data-in-concurrent.zeek | 19 +++++++++++--- .../btest/spicy/file-analysis-data-in.zeek | 20 ++++++++++---- .../btest/spicy/file-data-in-at-offset.zeek | 18 ++++++++++--- 13 files changed, 91 insertions(+), 37 deletions(-) diff --git a/NEWS b/NEWS index e726382b58..4e3b545e5f 100644 --- a/NEWS +++ b/NEWS @@ -64,6 +64,13 @@ Changed Functionality but mainly for providing comparisons, which is why this is not a breaking change. +- If a Spicy protocol analyzers feeds data into file analysis, it now + needs to call Zeek's `Files::register_protocol()` and provide a + callback for computing file handles. If that's missing, Zeek will + issue a warning. While this was not necessary in previous versions, + it aligns with the same requirement for traditional analyzers and + enables customizing file handles for protocol-specific semantics. + Removed Functionality --------------------- diff --git a/auxil/binpac b/auxil/binpac index 822cdb551b..fafd02f560 160000 --- a/auxil/binpac +++ b/auxil/binpac @@ -1 +1 @@ -Subproject commit 822cdb551bf6c2e7c18f16c7f088c61675ae588b +Subproject commit fafd02f5606003838f046f2f30fc4ca9439b7373 diff --git a/auxil/broker b/auxil/broker index 80204cbda8..48660b74ae 160000 --- a/auxil/broker +++ b/auxil/broker @@ -1 +1 @@ -Subproject commit 80204cbda8a190f61fbc14057a4d3bc2e6cf5562 +Subproject commit 48660b74ae2e5b5babd6f0dc38f3c85e24434d77 diff --git a/auxil/gen-zam b/auxil/gen-zam index 5887f1f04f..c1eba47ad9 160000 --- a/auxil/gen-zam +++ b/auxil/gen-zam @@ -1 +1 @@ -Subproject commit 5887f1f04f525c4c682eb55aa0c7505574459c74 +Subproject commit c1eba47ad91762ba165189cd9c4cfddc2a8f0bd0 diff --git a/auxil/spicy b/auxil/spicy index b1f54e4d42..f4ff0d0f83 160000 --- a/auxil/spicy +++ b/auxil/spicy @@ -1 +1 @@ -Subproject commit b1f54e4d42fb950230f70633ad74d1aac9216e18 +Subproject commit f4ff0d0f83d736d7c7f2e31d89337b166102ee78 diff --git a/doc b/doc index 089d4deddf..0dad81c85f 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 089d4deddf5ab9a202d3a5c3eb0a32cf4e0805d2 +Subproject commit 0dad81c85fdc1140290af555c8cd400f5560059d diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 327e5b2cd3..bdc850b171 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -351,6 +351,19 @@ public: */ std::string DetectMIME(const u_char* data, uint64_t len) const; + /** + * Sets #current_file_id to a hash of a unique file handle string based on + * what the \c get_file_handle event derives from the connection params. + * Event queue is flushed so that we can get the handle value immediately. + * @param tag network protocol over which the file is transferred. + * @param conn network connection over which the file is transferred. + * @param is_orig true if the file is being sent from connection originator + * or false if is being sent in the opposite direction. + * @return #current_file_id, which is a hash of a unique file handle string + * set by a \c get_file_handle event handler. + */ + std::string GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig); + uint64_t CurrentFiles() { return id_map.size(); } uint64_t MaxFiles() { return max_files; } @@ -399,19 +412,6 @@ protected: */ bool RemoveFile(const std::string& file_id); - /** - * Sets #current_file_id to a hash of a unique file handle string based on - * what the \c get_file_handle event derives from the connection params. - * Event queue is flushed so that we can get the handle value immediately. - * @param tag network protocol over which the file is transferred. - * @param conn network connection over which the file is transferred. - * @param is_orig true if the file is being sent from connection originator - * or false if is being sent in the opposite direction. - * @return #current_file_id, which is a hash of a unique file handle string - * set by a \c get_file_handle event handler. - */ - std::string GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig); - /** * Check if analysis is available for files transferred over a given * network protocol. diff --git a/src/spicy/runtime-support.cc b/src/spicy/runtime-support.cc index 0319ba15ee..e6b340e0be 100644 --- a/src/spicy/runtime-support.cc +++ b/src/spicy/runtime-support.cc @@ -689,8 +689,22 @@ rt::cookie::FileState* rt::cookie::FileStateStack::push(std::optionalHashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter)); + else { + auto cookie = static_cast(hilti::rt::context::cookie()); + assert(cookie); + + if ( auto c = cookie->protocol ) { + auto tag = spicy_mgr->tagForProtocolAnalyzer(c->analyzer->GetAnalyzerTag()); + fid = file_mgr->GetFileID(tag, c->analyzer->Conn(), c->is_orig); + } + + if ( fid.empty() ) + // If we can't get a FID from the file manager (e.g., because don't + // have a current protocol), we make one up. + fid = file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter)); + } + + assert(! fid.empty()); _stack.emplace_back(std::move(fid)); return &_stack.back(); } diff --git a/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 b/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 index 8a97453dba..afec41f20e 100644 --- a/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 +++ b/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 @@ -1,3 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -FM47gX3vI5ofQPm1li -FZjUS57tUkGFTibv3 +FU6P513gQKLVP6iwmf +FU6P513gQKLVP6iwmf +get_file_handle called +get_file_handle called diff --git a/testing/btest/Baseline/spicy.file-data-in-at-offset/output b/testing/btest/Baseline/spicy.file-data-in-at-offset/output index 8a97453dba..90a64302f0 100644 --- a/testing/btest/Baseline/spicy.file-data-in-at-offset/output +++ b/testing/btest/Baseline/spicy.file-data-in-at-offset/output @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -FM47gX3vI5ofQPm1li -FZjUS57tUkGFTibv3 +FU6P513gQKLVP6iwmf +FU6P513gQKLVP6iwmf diff --git a/testing/btest/spicy/file-analysis-data-in-concurrent.zeek b/testing/btest/spicy/file-analysis-data-in-concurrent.zeek index fe8c8c3afe..3a42662615 100644 --- a/testing/btest/spicy/file-analysis-data-in-concurrent.zeek +++ b/testing/btest/spicy/file-analysis-data-in-concurrent.zeek @@ -1,12 +1,23 @@ # @TEST-REQUIRES: have-spicy # # @TEST-EXEC: spicyz -d -o test.hlto ssh.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T >output +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T >output 2>&1 # @TEST-EXEC: btest-diff output -event zeek_init() { - Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); -} +module SSH; + +global i: count = 0; + +function get_file_handle(c: connection, is_orig: bool): string + { + return cat(c$uid, ++i); + } + +event zeek_init() + { + Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); + Files::register_protocol(Analyzer::ANALYZER_SSH, [$get_file_handle=SSH::get_file_handle]); # use tag of replaced analyzer + } # @TEST-START-FILE ssh.spicy module SSH; diff --git a/testing/btest/spicy/file-analysis-data-in.zeek b/testing/btest/spicy/file-analysis-data-in.zeek index 4418eabe94..de8a5c079a 100644 --- a/testing/btest/spicy/file-analysis-data-in.zeek +++ b/testing/btest/spicy/file-analysis-data-in.zeek @@ -2,7 +2,7 @@ # # @TEST-EXEC: cat ssh.spicy ssh-1.spicy > ssh-test.spicy # @TEST-EXEC: spicyz -d -o test.hlto ssh-test.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output-1 +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T 2>&1 | sort >output-1 # # @TEST-EXEC: cat x509.log | grep -v ^# | cut -f 4-5 >x509.log.tmp && mv x509.log.tmp x509.log # @TEST-EXEC: btest-diff x509.log @@ -12,7 +12,7 @@ # # @TEST-EXEC: cat ssh.spicy ssh-2.spicy > ssh-test.spicy # @TEST-EXEC: spicyz -d -o test.hlto ssh-test.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output-2 +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T 2>&1 | sort >output-2 # # @TEST-EXEC: cat files.log | zeek-cut fuid filename >files.log.tmp && mv files.log.tmp files-2.log # @TEST-EXEC: btest-diff files-2.log @@ -20,9 +20,19 @@ # @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output-1 # @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output-2 -event zeek_init() { - Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); -} +module SSH; + +function get_file_handle(c: connection, is_orig: bool): string + { + print "get_file_handle called"; + return cat(c$uid); + } + +event zeek_init() + { + Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); + Files::register_protocol(Analyzer::ANALYZER_SSH, [$get_file_handle=SSH::get_file_handle]); # use tag of replaced analyzer + } # @TEST-START-FILE ssh.spicy module SSH; diff --git a/testing/btest/spicy/file-data-in-at-offset.zeek b/testing/btest/spicy/file-data-in-at-offset.zeek index e2b680f37f..9f06933a1c 100644 --- a/testing/btest/spicy/file-data-in-at-offset.zeek +++ b/testing/btest/spicy/file-data-in-at-offset.zeek @@ -1,15 +1,25 @@ # @TEST-REQUIRES: have-spicy # # @TEST-EXEC: spicyz -d -o test.hlto ssh.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T 2>&1 | sort >output # # @TEST-EXEC: cat x509.log | grep -v ^# | cut -f 4-5 >x509.log.tmp && mv x509.log.tmp x509.log # @TEST-EXEC: btest-diff x509.log # @TEST-EXEC: btest-diff output -event zeek_init() { - Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); -} +module SSH; + +function get_file_handle(c: connection, is_orig: bool): string + { + return cat(c$uid); + } + +event zeek_init() + { + Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); + Files::register_protocol(Analyzer::ANALYZER_SSH, [$get_file_handle=SSH::get_file_handle]); # use tag of replaced analyzer + + } # @TEST-START-FILE ssh.spicy module SSH;