diff --git a/NEWS b/NEWS index ff3c5be3dc..eb0ee24ea6 100644 --- a/NEWS +++ b/NEWS @@ -71,6 +71,13 @@ Changed Functionality now timeout after 5 minutes by default instead of no timeout, and extracted files now have a default size limit of 100MB instead of unlimited. +- If a Spicy protocol analyzers feeds data into file analysis, it now + needs to call Zeek's `Files::register_protocol()` and provide a + callback for computing file handles. If that's missing, Zeek will + issue a warning. While this was not necessary in previous versions, + it aligns with the same requirement for traditional analyzers and + enables customizing file handles for protocol-specific semantics. + Removed Functionality --------------------- diff --git a/src/file_analysis/Manager.h b/src/file_analysis/Manager.h index 327e5b2cd3..bdc850b171 100644 --- a/src/file_analysis/Manager.h +++ b/src/file_analysis/Manager.h @@ -351,6 +351,19 @@ public: */ std::string DetectMIME(const u_char* data, uint64_t len) const; + /** + * Sets #current_file_id to a hash of a unique file handle string based on + * what the \c get_file_handle event derives from the connection params. + * Event queue is flushed so that we can get the handle value immediately. + * @param tag network protocol over which the file is transferred. + * @param conn network connection over which the file is transferred. + * @param is_orig true if the file is being sent from connection originator + * or false if is being sent in the opposite direction. + * @return #current_file_id, which is a hash of a unique file handle string + * set by a \c get_file_handle event handler. + */ + std::string GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig); + uint64_t CurrentFiles() { return id_map.size(); } uint64_t MaxFiles() { return max_files; } @@ -399,19 +412,6 @@ protected: */ bool RemoveFile(const std::string& file_id); - /** - * Sets #current_file_id to a hash of a unique file handle string based on - * what the \c get_file_handle event derives from the connection params. - * Event queue is flushed so that we can get the handle value immediately. - * @param tag network protocol over which the file is transferred. - * @param conn network connection over which the file is transferred. - * @param is_orig true if the file is being sent from connection originator - * or false if is being sent in the opposite direction. - * @return #current_file_id, which is a hash of a unique file handle string - * set by a \c get_file_handle event handler. - */ - std::string GetFileID(const zeek::Tag& tag, Connection* c, bool is_orig); - /** * Check if analysis is available for files transferred over a given * network protocol. diff --git a/src/spicy/runtime-support.cc b/src/spicy/runtime-support.cc index 0319ba15ee..e6b340e0be 100644 --- a/src/spicy/runtime-support.cc +++ b/src/spicy/runtime-support.cc @@ -689,8 +689,22 @@ rt::cookie::FileState* rt::cookie::FileStateStack::push(std::optionalHashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter)); + else { + auto cookie = static_cast(hilti::rt::context::cookie()); + assert(cookie); + + if ( auto c = cookie->protocol ) { + auto tag = spicy_mgr->tagForProtocolAnalyzer(c->analyzer->GetAnalyzerTag()); + fid = file_mgr->GetFileID(tag, c->analyzer->Conn(), c->is_orig); + } + + if ( fid.empty() ) + // If we can't get a FID from the file manager (e.g., because don't + // have a current protocol), we make one up. + fid = file_mgr->HashHandle(hilti::rt::fmt("%s.%d", _analyzer_id, ++_id_counter)); + } + + assert(! fid.empty()); _stack.emplace_back(std::move(fid)); return &_stack.back(); } diff --git a/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 b/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 index 8a97453dba..afec41f20e 100644 --- a/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 +++ b/testing/btest/Baseline/spicy.file-analysis-data-in/output-1 @@ -1,3 +1,5 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -FM47gX3vI5ofQPm1li -FZjUS57tUkGFTibv3 +FU6P513gQKLVP6iwmf +FU6P513gQKLVP6iwmf +get_file_handle called +get_file_handle called diff --git a/testing/btest/Baseline/spicy.file-data-in-at-offset/output b/testing/btest/Baseline/spicy.file-data-in-at-offset/output index 8a97453dba..90a64302f0 100644 --- a/testing/btest/Baseline/spicy.file-data-in-at-offset/output +++ b/testing/btest/Baseline/spicy.file-data-in-at-offset/output @@ -1,3 +1,3 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. -FM47gX3vI5ofQPm1li -FZjUS57tUkGFTibv3 +FU6P513gQKLVP6iwmf +FU6P513gQKLVP6iwmf diff --git a/testing/btest/spicy/file-analysis-data-in-concurrent.zeek b/testing/btest/spicy/file-analysis-data-in-concurrent.zeek index fe8c8c3afe..3a42662615 100644 --- a/testing/btest/spicy/file-analysis-data-in-concurrent.zeek +++ b/testing/btest/spicy/file-analysis-data-in-concurrent.zeek @@ -1,12 +1,23 @@ # @TEST-REQUIRES: have-spicy # # @TEST-EXEC: spicyz -d -o test.hlto ssh.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T >output +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T >output 2>&1 # @TEST-EXEC: btest-diff output -event zeek_init() { - Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); -} +module SSH; + +global i: count = 0; + +function get_file_handle(c: connection, is_orig: bool): string + { + return cat(c$uid, ++i); + } + +event zeek_init() + { + Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); + Files::register_protocol(Analyzer::ANALYZER_SSH, [$get_file_handle=SSH::get_file_handle]); # use tag of replaced analyzer + } # @TEST-START-FILE ssh.spicy module SSH; diff --git a/testing/btest/spicy/file-analysis-data-in.zeek b/testing/btest/spicy/file-analysis-data-in.zeek index 4418eabe94..de8a5c079a 100644 --- a/testing/btest/spicy/file-analysis-data-in.zeek +++ b/testing/btest/spicy/file-analysis-data-in.zeek @@ -2,7 +2,7 @@ # # @TEST-EXEC: cat ssh.spicy ssh-1.spicy > ssh-test.spicy # @TEST-EXEC: spicyz -d -o test.hlto ssh-test.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output-1 +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T 2>&1 | sort >output-1 # # @TEST-EXEC: cat x509.log | grep -v ^# | cut -f 4-5 >x509.log.tmp && mv x509.log.tmp x509.log # @TEST-EXEC: btest-diff x509.log @@ -12,7 +12,7 @@ # # @TEST-EXEC: cat ssh.spicy ssh-2.spicy > ssh-test.spicy # @TEST-EXEC: spicyz -d -o test.hlto ssh-test.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output-2 +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T 2>&1 | sort >output-2 # # @TEST-EXEC: cat files.log | zeek-cut fuid filename >files.log.tmp && mv files.log.tmp files-2.log # @TEST-EXEC: btest-diff files-2.log @@ -20,9 +20,19 @@ # @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output-1 # @TEST-EXEC: TEST_DIFF_CANONIFIER=diff-canonifier-spicy btest-diff output-2 -event zeek_init() { - Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); -} +module SSH; + +function get_file_handle(c: connection, is_orig: bool): string + { + print "get_file_handle called"; + return cat(c$uid); + } + +event zeek_init() + { + Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); + Files::register_protocol(Analyzer::ANALYZER_SSH, [$get_file_handle=SSH::get_file_handle]); # use tag of replaced analyzer + } # @TEST-START-FILE ssh.spicy module SSH; diff --git a/testing/btest/spicy/file-data-in-at-offset.zeek b/testing/btest/spicy/file-data-in-at-offset.zeek index e2b680f37f..9f06933a1c 100644 --- a/testing/btest/spicy/file-data-in-at-offset.zeek +++ b/testing/btest/spicy/file-data-in-at-offset.zeek @@ -1,15 +1,25 @@ # @TEST-REQUIRES: have-spicy # # @TEST-EXEC: spicyz -d -o test.hlto ssh.spicy ./ssh-cond.evt -# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T | sort >output +# @TEST-EXEC: zeek -r ${TRACES}/ssh/single-conn.trace test.hlto %INPUT Spicy::enable_print=T 2>&1 | sort >output # # @TEST-EXEC: cat x509.log | grep -v ^# | cut -f 4-5 >x509.log.tmp && mv x509.log.tmp x509.log # @TEST-EXEC: btest-diff x509.log # @TEST-EXEC: btest-diff output -event zeek_init() { - Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); -} +module SSH; + +function get_file_handle(c: connection, is_orig: bool): string + { + return cat(c$uid); + } + +event zeek_init() + { + Analyzer::register_for_port(Analyzer::ANALYZER_SPICY_SSH, 22/tcp); + Files::register_protocol(Analyzer::ANALYZER_SSH, [$get_file_handle=SSH::get_file_handle]); # use tag of replaced analyzer + + } # @TEST-START-FILE ssh.spicy module SSH;