diff --git a/NEWS b/NEWS index 119513cea2..5c98a28644 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ New Functionality on ``AllAnalyzers::Tag`` values named ``is_protocol_analyzer()``, ``is_packet_analyzer()`` and ``is_file_analyzer()``. +- File analyzers can now raise analyzer violations to the script-layer via + the new AnalyzerViolation() method. + Deprecated Functionality ------------------------ diff --git a/src/file_analysis/Analyzer.cc b/src/file_analysis/Analyzer.cc index b30a2cd786..9d33fd2660 100644 --- a/src/file_analysis/Analyzer.cc +++ b/src/file_analysis/Analyzer.cc @@ -2,9 +2,14 @@ #include "zeek/file_analysis/Analyzer.h" +#include "zeek/Event.h" #include "zeek/Val.h" +#include "zeek/file_analysis/File.h" #include "zeek/file_analysis/Manager.h" +// For analyzer_violation_info +#include "event.bif.netvar_h" + namespace zeek::file_analysis { @@ -23,7 +28,7 @@ void Analyzer::SetAnalyzerTag(const zeek::Tag& arg_tag) Analyzer::Analyzer(zeek::Tag arg_tag, RecordValPtr arg_args, File* arg_file) : tag(arg_tag), args(std::move(arg_args)), file(arg_file), got_stream_delivery(false), - skip(false) + skip(false), analyzer_confirmed(false) { id = ++id_counter; } @@ -33,4 +38,44 @@ Analyzer::Analyzer(RecordValPtr arg_args, File* arg_file) { } +void Analyzer::AnalyzerConfirmation(zeek::Tag arg_tag) + { + if ( analyzer_confirmed ) + return; + + analyzer_confirmed = true; + + if ( ! analyzer_confirmation_info ) + return; + + static auto info_type = zeek::id::find_type("AnalyzerConfirmationInfo"); + static auto info_f_idx = info_type->FieldOffset("f"); + + auto info = zeek::make_intrusive(info_type); + info->Assign(info_f_idx, GetFile()->ToVal()); + + const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal(); + event_mgr.Enqueue(analyzer_confirmation_info, tval, info); + } + +void Analyzer::AnalyzerViolation(const char* reason, const char* data, int len, zeek::Tag arg_tag) + { + if ( ! analyzer_violation_info ) + return; + + static auto info_type = zeek::id::find_type("AnalyzerViolationInfo"); + static auto info_reason_idx = info_type->FieldOffset("reason"); + static auto info_f_idx = info_type->FieldOffset("f"); + static auto info_data_idx = info_type->FieldOffset("data"); + + auto info = zeek::make_intrusive(info_type); + info->Assign(info_reason_idx, make_intrusive(reason)); + info->Assign(info_f_idx, GetFile()->ToVal()); + if ( data && len ) + info->Assign(info_data_idx, make_intrusive(len, data)); + + const auto& tval = arg_tag ? arg_tag.AsVal() : tag.AsVal(); + event_mgr.Enqueue(analyzer_violation_info, tval, info); + } + } // namespace zeek::file_analysis diff --git a/src/file_analysis/Analyzer.h b/src/file_analysis/Analyzer.h index 4e10a46ff3..126e3be53c 100644 --- a/src/file_analysis/Analyzer.h +++ b/src/file_analysis/Analyzer.h @@ -133,6 +133,38 @@ public: */ bool Skipping() const { return skip; } + /** + * Signals to Zeek that the analyzer has recognized the input to indeed + * conform to the expected format. This should be called as early as + * possible during file analysis. It may turn into \c analyzer_confirmation_info + * events at the script-layer (but only once per file , even if the method is + * called multiple times). + * + * If tag is given, it overrides the analyzer tag passed to the + * scripting layer; the default is the one of the analyzer itself. + */ + virtual void AnalyzerConfirmation(zeek::Tag tag = zeek::Tag()); + + /** + * Signals to Zeek that the analyzer has found a sever violation + * that could indicate it's not parsing the expected file format. + * This turns into \c analyzer_violation_info events at the script-layer + * (one such event is raised for each call to this method so that the + * script-layer can built up a notion of how prevalent violations are; the + * more, the less likely it's the right format). + * + * @param reason A textual description of the error encountered. + * + * @param data An optional pointer to the malformed data. + * + * @param len If \a data is given, the length of it. + * + * @param tag If tag is given, it overrides the analyzer tag passed to the + * scripting layer; the default is the one of the analyzer itself. + */ + virtual void AnalyzerViolation(const char* reason, const char* data = nullptr, int len = 0, + zeek::Tag tag = zeek::Tag()); + protected: /** * Constructor. Only derived classes are meant to be instantiated. @@ -161,6 +193,7 @@ private: File* file; /**< The file to which the analyzer is attached. */ bool got_stream_delivery; bool skip; + bool analyzer_confirmed; static ID id_counter; }; diff --git a/src/file_analysis/analyzer/pe/PE.cc b/src/file_analysis/analyzer/pe/PE.cc index dfa10435f2..62a1461e7b 100644 --- a/src/file_analysis/analyzer/pe/PE.cc +++ b/src/file_analysis/analyzer/pe/PE.cc @@ -30,6 +30,7 @@ bool PE::DeliverStream(const u_char* data, uint64_t len) } catch ( const binpac::Exception& e ) { + AnalyzerViolation(util::fmt("Binpac exception: %s", e.c_msg())); return false; } diff --git a/testing/btest/Baseline/core.file-analyzer-violation/.stdout b/testing/btest/Baseline/core.file-analyzer-violation/.stdout new file mode 100644 index 0000000000..faa142204c --- /dev/null +++ b/testing/btest/Baseline/core.file-analyzer-violation/.stdout @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +Files::ANALYZER_PE, Binpac exception: binpac exception: &enforce violation : DOS_Header:AddressOfNewExeHeader, FKPuH630Tmj6UQUMP7, {\x0aPE\x0a} diff --git a/testing/btest/Baseline/core.file-analyzer-violation/files.log b/testing/btest/Baseline/core.file-analyzer-violation/files.log new file mode 100644 index 0000000000..abef87d6a6 --- /dev/null +++ b/testing/btest/Baseline/core.file-analyzer-violation/files.log @@ -0,0 +1,10 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path files +#open XXXX-XX-XX-XX-XX-XX +#fields ts fuid uid id.orig_h id.orig_p id.resp_h id.resp_p source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid +#types time string string addr port addr port string count set[string] string string interval bool bool count count count count bool string +XXXXXXXXXX.XXXXXX FKPuH630Tmj6UQUMP7 - - - - - ./myfile.exe 0 PE application/x-dosexec - 0.000000 - - 64 - 0 0 F - diff --git a/testing/btest/Baseline/plugins.file/output b/testing/btest/Baseline/plugins.file/output index 6d68624eb5..0d9cb558fb 100644 --- a/testing/btest/Baseline/plugins.file/output +++ b/testing/btest/Baseline/plugins.file/output @@ -4,15 +4,18 @@ Demo::Foo - A Foo test analyzer (dynamic, version 1.0.0) [Event] foo_piece === +analyzer_confirmation_info, Files::ANALYZER_FOO, FCceqBvpMfirSN0Ri foo_piece, FCceqBvpMfirSN0Ri, The National Center foo_piece, FCceqBvpMfirSN0Ri, net, consult your lo foo_piece, FCceqBvpMfirSN0Ri, most everything else foo_piece, FCceqBvpMfirSN0Ri, low:\x0a\x0a /Mac foo_piece, FCceqBvpMfirSN0Ri, es and directories o +analyzer_violation_info, Files::ANALYZER_FOO, FCceqBvpMfirSN0Ri, test violation 5, es and directori foo_piece, FCceqBvpMfirSN0Ri, r example, here is a foo_piece, FCceqBvpMfirSN0Ri, application, StuffIt foo_piece, FCceqBvpMfirSN0Ri, tion BinHex by doubl foo_piece, FCceqBvpMfirSN0Ri, laced, or are going foo_piece, FCceqBvpMfirSN0Ri, sers several documen +analyzer_violation_info, Files::ANALYZER_FOO, FCceqBvpMfirSN0Ri, test violation 10, sers several doc foo_piece, FCceqBvpMfirSN0Ri, er or can be printed foo_piece, FCceqBvpMfirSN0Ri, \x0a\x0aBug reports shoul diff --git a/testing/btest/core/file-analyzer-violation.zeek b/testing/btest/core/file-analyzer-violation.zeek new file mode 100644 index 0000000000..6d73d2bfb6 --- /dev/null +++ b/testing/btest/core/file-analyzer-violation.zeek @@ -0,0 +1,24 @@ +# @TEST-DOC: Verify analyzer_violation_info is raised for an invalid PE file. +# @TEST-EXEC: zeek -b %INPUT +# @TEST-EXEC: btest-diff .stdout +# @TEST-EXEC: btest-diff files.log + +@load base/frameworks/files +@load base/files/pe + +event analyzer_violation_info(tag: AllAnalyzers::Tag, info: AnalyzerViolationInfo) + { + print tag, info$reason, info$f$id, cat(info$f$info$analyzers); + } + +event zeek_init() + { + local source: string = "./myfile.exe"; + Input::add_analysis([$source=source, $name=source]); + } + +# This file triggers a binpac exception for PE that is reported through +# analyzer_violation_info +@TEST-START-FILE ./myfile.exe +MZ0000000000000000000000000000000000000000000000000000000000000 +@TEST-END-FILE diff --git a/testing/btest/plugins/file-plugin/src/Foo.cc b/testing/btest/plugins/file-plugin/src/Foo.cc index 45bc69cfae..41b9eac52f 100644 --- a/testing/btest/plugins/file-plugin/src/Foo.cc +++ b/testing/btest/plugins/file-plugin/src/Foo.cc @@ -1,8 +1,8 @@ - #include "Foo.h" #include #include +#include #include "events.bif.h" @@ -21,7 +21,16 @@ zeek::file_analysis::Analyzer* Foo::Instantiate(zeek::RecordValPtr args, bool Foo::DeliverStream(const u_char* data, uint64_t len) { + static int i = 0; + AnalyzerConfirmation(); zeek::event_mgr.Enqueue(foo_piece, GetFile()->ToVal(), zeek::make_intrusive(new zeek::String(data, len, 0))); + if ( ++i % 5 == 0 ) + { + uint64_t threshold = 16; + AnalyzerViolation(zeek::util::fmt("test violation %d", i), + reinterpret_cast(data), std::min(len, threshold)); + } + return true; } diff --git a/testing/btest/plugins/file.zeek b/testing/btest/plugins/file.zeek index 1697514fa5..77747bc4d2 100644 --- a/testing/btest/plugins/file.zeek +++ b/testing/btest/plugins/file.zeek @@ -7,12 +7,21 @@ # @TEST-EXEC: TEST_DIFF_CANONIFIER= btest-diff output event file_new(f: fa_file) - { - Files::add_analyzer(f, Files::ANALYZER_FOO); - } - + { + Files::add_analyzer(f, Files::ANALYZER_FOO); + } + event foo_piece(f: fa_file, data: string) { print "foo_piece", f$id, sub_bytes(data, 0, 20); } +event analyzer_confirmation_info(tag: AllAnalyzers::Tag, info: AnalyzerConfirmationInfo) + { + print "analyzer_confirmation_info", tag, info$f$id; + } + +event analyzer_violation_info(tag: AllAnalyzers::Tag, info: AnalyzerViolationInfo) + { + print "analyzer_violation_info", tag, info$f$id, info$reason, info$data; + }