From 4e7bb59bb1c1b0216077c9a79eb4be1ae13da59b Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Wed, 28 Aug 2024 21:25:14 +0200 Subject: [PATCH] script_opt: Extend Support.h to break include dependencies Rebuilding ZBody.cc is annoyingly slow with UBSAN or ASAN enabled. A rebuild is currently triggered when touching Manager.h files like logging/Manager.h, broker/Manager.h or file_analysis/Manager.h and various other headers that are included from there or directly in ZBody.h. Add tiny wrapper functions to Support.h that encapsulate the actual Zeek functionality and move the includes to Support.cc instead. This will cause frequent Support.cc rebuilds, but that is decently fast. This comes at the expense of one indirect function call for some ops, but its overhead should be negligible in comparison to a lookup for a connection or component or a log_mgr->Write() call. --- src/script_opt/ZAM/OPs/ZBI.op | 124 ++++++++-------------------------- src/script_opt/ZAM/Support.cc | 117 ++++++++++++++++++++++++++++++++ src/script_opt/ZAM/Support.h | 63 ++++++++++++++++- src/script_opt/ZAM/ZBody.cc | 12 +++- src/script_opt/ZAM/ZBody.h | 25 ------- 5 files changed, 216 insertions(+), 125 deletions(-) diff --git a/src/script_opt/ZAM/OPs/ZBI.op b/src/script_opt/ZAM/OPs/ZBI.op index 29d50f13f0..2759492e10 100644 --- a/src/script_opt/ZAM/OPs/ZBI.op +++ b/src/script_opt/ZAM/OPs/ZBI.op @@ -4,56 +4,33 @@ internal-op Remove-Teredo op1-read class V op-types R -eval auto teredo = zeek::packet_mgr->GetAnalyzer("Teredo"); - if ( teredo ) - { - zeek::detail::ConnKey conn_key($1); - static_cast(teredo.get())->RemoveConnection(conn_key); - } +eval ZAM::packet_mgr_remove_teredo($1); internal-op Remove-Teredo side-effects OP_REMOVE_TEREDO_V OP_V class VV op-types I R -eval auto teredo = zeek::packet_mgr->GetAnalyzer("Teredo"); - if ( teredo ) - { - zeek::detail::ConnKey conn_key($1); - static_cast(teredo.get())->RemoveConnection(conn_key); - } +eval ZAM::packet_mgr_remove_teredo($1); $$ = 1; internal-op Remove-GTPv1 op1-read class V op-types R -eval auto gtpv1 = zeek::packet_mgr->GetAnalyzer("GTPv1"); - if ( gtpv1 ) - { - zeek::detail::ConnKey conn_key($1); - static_cast(gtpv1.get())->RemoveConnection(conn_key); - } +eval ZAM::packet_mgr_remove_gtpv1($1); internal-op Remove-GTPv1 side-effects OP_REMOVE_GTPV1_V OP_V class VV op-types I R -eval auto gtpv1 = zeek::packet_mgr->GetAnalyzer("GTPv1"); - if ( gtpv1 ) - { - zeek::detail::ConnKey conn_key($1); - static_cast(gtpv1.get())->RemoveConnection(conn_key); - } +eval ZAM::packet_mgr_remove_gtpv1($1); $$ = 1; internal-op Set-File-Handle op1-read class V op-types S -eval auto handle = $1; - auto bytes = reinterpret_cast(handle->Bytes()); - auto h = std::string(bytes, handle->Len()); - zeek::file_mgr->SetHandle(h); +eval ZAM::file_mgr_set_handle($1); internal-op Subnet-To-Addr class VV @@ -113,7 +90,7 @@ macro LogWritePre(id_val, columns_val) auto columns = columns_val; macro LogWriteResPost(lhs) - bool result = log_mgr->Write(id->AsEnumVal(), columns->AsRecordVal()); + bool result = ZAM::log_mgr_write(id->AsEnumVal(), columns->AsRecordVal()); lhs = result; internal-op Log-Write @@ -138,18 +115,18 @@ op1-read classes VV CV op-types X R eval LogWritePre(LogEnum($1), $2) - (void) log_mgr->Write(id->AsEnumVal(), columns->AsRecordVal()); + (void) ZAM::log_mgr_write(id->AsEnumVal(), columns->AsRecordVal()); internal-op Broker-Flush-Logs side-effects OP_BROKER_FLUSH_LOGS_X OP_X class V op-types U -eval $$ = broker_mgr->FlushLogBuffers(); +eval $$ = ZAM::broker_mgr_flush_log_buffers(); internal-op Broker-Flush-Logs side-effects class X -eval (void) broker_mgr->FlushLogBuffers(); +eval (void) ZAM::broker_mgr_flush_log_buffers(); internal-op Get-Port-Transport-Proto class VV @@ -167,13 +144,13 @@ eval auto mask = $1 & PORT_SPACE_MASK; predicate-op Conn-Exists class V op-types R -eval session_mgr->FindConnection($1) != nullptr +eval ZAM::session_mgr_find_connection($1) != nullptr internal-op Lookup-Conn class VV op-types X R eval auto cid = $1; - Connection* conn = session_mgr->FindConnection(cid); + Connection* conn = ZAM::session_mgr_find_connection(cid); ValPtr res; if ( conn ) res = conn->GetVal(); @@ -453,92 +430,61 @@ eval CatNPre() internal-op Analyzer-Name classes VV VC op-types S X -eval auto atype = $1.ToVal(Z_TYPE); - auto val = atype->AsEnumVal(); - Unref($$); - plugin::Component* component = zeek::analyzer_mgr->Lookup(val); - if ( ! component ) - component = zeek::packet_mgr->Lookup(val); - if ( ! component ) - component = zeek::file_mgr->Lookup(val); - if ( component ) - $$ = new StringVal(component->CanonicalName()); - else - $$ = new StringVal(""); - -macro FilesAddOrRemoveAnalyzer(file_id_val, tag, args_val, METHOD) - auto file_id = file_id_val; - using zeek::BifType::Record::Files::AnalyzerArgs; - auto rv = args_val->CoerceTo(AnalyzerArgs); - bool result = zeek::file_mgr->METHOD( - file_id->CheckString(), - zeek::file_mgr->GetComponentTag(tag.ToVal(Z_TYPE).get()), - std::move(rv)); - -macro FilesAddAnalyzer(file_id_val, tag, args_val) - FilesAddOrRemoveAnalyzer(file_id_val, tag, args_val, AddAnalyzer) +eval Unref($$); + $$ = ZAM::analyzer_name($1.ToVal(Z_TYPE)->AsEnumVal()); internal-op Files-Add-Analyzer op1-read classes VVV VCV op-types S X R -eval FilesAddAnalyzer($1, $2, $3) +eval ZAM::file_mgr_add_analyzer($1, $2.ToVal(Z_TYPE)->AsEnumVal(), $3); internal-op Files-Add-Analyzer class VVVV side-effects OP_FILES_ADD_ANALYZER_VVV OP_VVV op-types I S X R -eval FilesAddAnalyzer($1, $2, $3) - $$ = result; +eval $$ = ZAM::file_mgr_add_analyzer($1, $2.ToVal(Z_TYPE)->AsEnumVal(), $3); internal-op Files-Add-Analyzer class VVCV op-types I S X R side-effects OP_FILES_ADD_ANALYZER_VCV OP_VVC -eval FilesAddAnalyzer($1, $2, $3) - $$ = result; - -macro FilesRemoveAnalyzer(file_id_val, tag, args_slot) - FilesAddOrRemoveAnalyzer(file_id_val, tag, args_slot, RemoveAnalyzer) +eval $$ = ZAM::file_mgr_add_analyzer($1, $2.ToVal(Z_TYPE)->AsEnumVal(), $3); internal-op Files-Remove-Analyzer op1-read classes VVV VCV op-types S X R -eval FilesRemoveAnalyzer($1, $2, $3) +eval ZAM::file_mgr_remove_analyzer($1, $2.ToVal(Z_TYPE)->AsEnumVal(), $3); internal-op Files-Remove-Analyzer class VVVV op-types I S X R side-effects OP_FILES_REMOVE_ANALYZER_VVV OP_VVV -eval FilesRemoveAnalyzer($1, $2, $3) - $$ = result; +eval $$ = ZAM::file_mgr_remove_analyzer($1, $2.ToVal(Z_TYPE)->AsEnumVal(), $3); internal-op Files-Remove-Analyzer class VVCV op-types I S X R side-effects OP_FILES_REMOVE_ANALYZER_VCV OP_VVC -eval FilesRemoveAnalyzer($1, $2, $3) - $$ = result; +eval $$ = ZAM::file_mgr_remove_analyzer($1, $2.ToVal(Z_TYPE)->AsEnumVal(), $3); internal-op Analyzer-Enabled classes VV VC op-types I X -eval auto atype = $1.ToVal(Z_TYPE); - auto c = zeek::file_mgr->Lookup(atype->AsEnumVal()); - $$ = c && c->Enabled(); +eval $$ = ZAM::file_mgr_analyzer_enabled($1.ToVal(Z_TYPE)->AsEnumVal()); internal-op File-Analyzer-Name classes VV VC eval auto atype = $1.ToVal(Z_TYPE); Unref($$.AsString()); - $$ = ZVal(file_mgr->GetComponentNameVal({NewRef{}, atype->AsEnumVal()})); + $$ = ZVal(ZAM::file_mgr_analyzer_name(atype->AsEnumVal())); internal-op Is-Protocol-Analyzer classes VV VC op-types I X eval auto atype = $1.ToVal(Z_TYPE); - $$ = analyzer_mgr->Lookup(atype->AsEnumVal()) != nullptr; + $$ = ZAM::analyzer_mgr_lookup(atype->AsEnumVal()) != nullptr; internal-op Clear-Table op1-read @@ -550,47 +496,33 @@ internal-op Files-Enable-Reassembly op1-read class V op-types S -eval auto f = $1->CheckString(); - file_mgr->EnableReassembly(f); +eval ZAM::file_mgr_enable_reassembly($1); internal-op Files-Set-Reassembly-Buffer op1-read classes VV Vi op-types S U -eval auto f = $1->CheckString(); - file_mgr->SetReassemblyBuffer(f, $2); +eval ZAM::file_mgr_set_reassembly_buffer($1, $2); internal-op Files-Set-Reassembly-Buffer class VVV op-types I S U side-effects OP_FILES_SET_REASSEMBLY_BUFFER_VV OP_VV -eval auto f = $1->CheckString(); - $$ = file_mgr->SetReassemblyBuffer(f, $2); +eval $$ = ZAM::file_mgr_set_reassembly_buffer($1, $2); internal-op Files-Set-Reassembly-Buffer class VVi op-types I S U side-effects OP_FILES_SET_REASSEMBLY_BUFFER_Vi OP_VV_I2 -eval auto f = $1->CheckString(); - $$ = file_mgr->SetReassemblyBuffer(f, $2); +eval $$ = ZAM::file_mgr_set_reassembly_buffer($1, $2); internal-op Get-Bytes-Thresh classes VVV VVC op-types U R I -eval auto a = analyzer::conn_size::GetConnsizeAnalyzer($1); - auto res = 0U; - if ( a ) - res = static_cast(a)->GetByteAndPacketThreshold(true, $2); - $$ = res; +eval $$ = ZAM::conn_size_get_bytes_threshold($1, $2); macro SetBytesThresh(cid, threshold, is_orig) - bool res = false; - auto a = analyzer::conn_size::GetConnsizeAnalyzer(cid); - if ( a ) - { - static_cast(a)->SetByteAndPacketThreshold(threshold, true, is_orig); - res = true; - } + bool res = ZAM::conn_size_set_bytes_threshold(threshold, cid, is_orig); internal-op Set-Bytes-Thresh op1-read diff --git a/src/script_opt/ZAM/Support.cc b/src/script_opt/ZAM/Support.cc index 61efb9265d..5411eea2b4 100644 --- a/src/script_opt/ZAM/Support.cc +++ b/src/script_opt/ZAM/Support.cc @@ -8,7 +8,17 @@ #include "zeek/Reporter.h" #include "zeek/ScriptValidation.h" #include "zeek/ZeekString.h" +#include "zeek/analyzer/Manager.h" +#include "zeek/analyzer/protocol/conn-size/ConnSize.h" +#include "zeek/broker/Manager.h" +#include "zeek/file_analysis/Manager.h" +#include "zeek/file_analysis/file_analysis.bif.h" +#include "zeek/logging/Manager.h" +#include "zeek/packet_analysis/Manager.h" +#include "zeek/packet_analysis/protocol/gtpv1/GTPv1.h" +#include "zeek/packet_analysis/protocol/teredo/Teredo.h" #include "zeek/script_opt/ProfileFunc.h" +#include "zeek/session/Manager.h" namespace zeek::detail { @@ -17,6 +27,113 @@ std::string curr_func; std::shared_ptr curr_loc; TypePtr log_ID_enum_type; TypePtr any_base_type = base_type(TYPE_ANY); + +bool log_mgr_write(zeek::EnumVal* v, zeek::RecordVal* r) { return zeek::log_mgr->Write(v, r); } + +size_t broker_mgr_flush_log_buffers() { return zeek::broker_mgr->FlushLogBuffers(); } + +zeek::Connection* session_mgr_find_connection(zeek::Val* cid) { return zeek::session_mgr->FindConnection(cid); } + +bool packet_mgr_remove_teredo(zeek::Val* cid) { + auto teredo = zeek::packet_mgr->GetAnalyzer("Teredo"); + if ( teredo ) { + zeek::detail::ConnKey conn_key(cid); + static_cast(teredo.get())->RemoveConnection(conn_key); + return true; + } + return false; +} + +bool packet_mgr_remove_gtpv1(zeek::Val* cid) { + auto gtpv1 = zeek::packet_mgr->GetAnalyzer("GTPv1"); + if ( gtpv1 ) { + zeek::detail::ConnKey conn_key(cid); + static_cast(gtpv1.get())->RemoveConnection(conn_key); + return true; + } + return false; +} + +zeek::StringVal* analyzer_name(zeek::EnumVal* val) { + plugin::Component* component = zeek::analyzer_mgr->Lookup(val); + + if ( ! component ) + component = zeek::packet_mgr->Lookup(val); + + if ( ! component ) + component = zeek::file_mgr->Lookup(val); + + if ( component ) + return new StringVal(component->CanonicalName()); + return new StringVal(""); +} + +zeek::plugin::Component* analyzer_mgr_lookup(zeek::EnumVal* v) { return zeek::analyzer_mgr->Lookup(v); } + +zeek_uint_t conn_size_get_bytes_threshold(Val* cid, bool is_orig) { + if ( auto* a = analyzer::conn_size::GetConnsizeAnalyzer(cid) ) + return static_cast(a)->GetByteAndPacketThreshold(true, is_orig); + + return 0; +} + +bool conn_size_set_bytes_threshold(zeek_uint_t threshold, Val* cid, bool is_orig) { + if ( auto* a = analyzer::conn_size::GetConnsizeAnalyzer(cid) ) { + static_cast(a)->SetByteAndPacketThreshold(threshold, true, is_orig); + return true; + } + + return false; +} + +// File analysis wrappers +void file_mgr_set_handle(StringVal* h) { zeek::file_mgr->SetHandle(h->ToStdString()); } + +bool file_mgr_add_analyzer(StringVal* file_id, EnumVal* tag, RecordVal* args) { + const auto& tag_ = zeek::file_mgr->GetComponentTag(tag); + if ( ! tag_ ) + return false; + + using zeek::BifType::Record::Files::AnalyzerArgs; + auto rv = args->CoerceTo(AnalyzerArgs); + return zeek::file_mgr->AddAnalyzer(file_id->CheckString(), tag_, std::move(rv)); +} + +bool file_mgr_remove_analyzer(StringVal* file_id, EnumVal* tag, RecordVal* args) { + const auto& tag_ = zeek::file_mgr->GetComponentTag(tag); + if ( ! tag_ ) + return false; + + using zeek::BifType::Record::Files::AnalyzerArgs; + auto rv = args->CoerceTo(AnalyzerArgs); + return zeek::file_mgr->RemoveAnalyzer(file_id->CheckString(), tag_, std::move(rv)); +} + +bool file_mgr_analyzer_enabled(zeek::EnumVal* v) { + auto c = zeek::file_mgr->Lookup(v->AsEnumVal()); + return c && c->Enabled(); +} + +zeek::StringVal* file_mgr_analyzer_name(EnumVal* v) { + // to be placed into a ZVal + return file_mgr->GetComponentNameVal({NewRef{}, v}).release(); +} + +bool file_mgr_enable_reassembly(StringVal* file_id) { + std::string fid = file_id->CheckString(); + return zeek::file_mgr->EnableReassembly(fid); +} + +bool file_mgr_disable_reassembly(StringVal* file_id) { + std::string fid = file_id->CheckString(); + return zeek::file_mgr->DisableReassembly(fid); +} + +bool file_mgr_set_reassembly_buffer(StringVal* file_id, uint64_t max) { + std::string fid = file_id->CheckString(); + return zeek::file_mgr->SetReassemblyBuffer(fid, max); +} + } // namespace ZAM bool ZAM_error = false; diff --git a/src/script_opt/ZAM/Support.h b/src/script_opt/ZAM/Support.h index 96777b26f6..5aa439d387 100644 --- a/src/script_opt/ZAM/Support.h +++ b/src/script_opt/ZAM/Support.h @@ -1,14 +1,31 @@ // See the file "COPYING" in the main distribution directory for copyright. // Low-level support utilities/globals for ZAM compilation. +// +// Many of the wrapper functions are here to break header dependencies +// between ZBody.cc and the rest of Zeek. This avoids rebuilding of ZBody.cc +// when working on Zeek components unrelated to script optimization. +// #pragma once #include "zeek/Expr.h" -#include "zeek/Stmt.h" #include "zeek/script_opt/ZAM/Profile.h" -namespace zeek::detail { +namespace zeek { +class Connection; +class EnumVal; +class RecordVal; +class Val; +class StringVal; + +namespace plugin { +class Component; +} + +namespace detail { + +class Stmt; using ValVec = std::vector; @@ -28,6 +45,45 @@ extern TypePtr log_ID_enum_type; // Needed for a slight performance gain when dealing with "any" types. extern TypePtr any_base_type; +// log_mgr->Write() +bool log_mgr_write(EnumVal* v, RecordVal* r); + +// broker_mgr->FlushLogBuffers() +size_t broker_mgr_flush_log_buffers(); + +// session_mgr->FindConnection() +zeek::Connection* session_mgr_find_connection(Val* cid); + +// We've seen these two cause overhead even with normal script execution, +// maybe we should fix them via conn removal hooks or some such. +bool packet_mgr_remove_teredo(Val* cid); +bool packet_mgr_remove_gtpv1(Val* cid); + +// Analyzer-Name op +StringVal* analyzer_name(zeek::EnumVal* v); + +// Used with Is-Protocol-Analyzer op +plugin::Component* analyzer_mgr_lookup(EnumVal* v); + +// Conn size analyzer accessors for byte thresholds. +// +// Note: The underlying API uses a bool parameter to distinguish between +// packet and byte thresholds. For now, only need bytes and seems less +// obfuscated to use individual functions. +zeek_uint_t conn_size_get_bytes_threshold(Val* cid, bool is_orig); +bool conn_size_set_bytes_threshold(zeek_uint_t threshold, Val* cid, bool is_orig); + + +// File analysis facade. +void file_mgr_set_handle(StringVal* h); +bool file_mgr_add_analyzer(StringVal* file_id, EnumVal* tag, RecordVal* args); +bool file_mgr_remove_analyzer(StringVal* file_id, EnumVal* tag, RecordVal* args); +bool file_mgr_analyzer_enabled(EnumVal* v); +zeek::StringVal* file_mgr_analyzer_name(EnumVal* v); +bool file_mgr_enable_reassembly(StringVal* file_id); +bool file_mgr_disable_reassembly(StringVal* file_id); +bool file_mgr_set_reassembly_buffer(StringVal* file_id, uint64_t max); + } // namespace ZAM // True if a function with the given profile can be compiled to ZAM. @@ -61,4 +117,5 @@ extern StringVal* ZAM_sub_bytes(const StringVal* s, zeek_uint_t start, zeek_int_ extern StringValPtr ZAM_val_cat(const ValPtr& v); -} // namespace zeek::detail +} // namespace detail +} // namespace zeek diff --git a/src/script_opt/ZAM/ZBody.cc b/src/script_opt/ZAM/ZBody.cc index 126c01d55f..95eed26c40 100644 --- a/src/script_opt/ZAM/ZBody.cc +++ b/src/script_opt/ZAM/ZBody.cc @@ -4,7 +4,9 @@ #include "zeek/Desc.h" #include "zeek/EventHandler.h" +#include "zeek/File.h" #include "zeek/Frame.h" +#include "zeek/OpaqueVal.h" #include "zeek/Overflow.h" #include "zeek/RE.h" #include "zeek/Reporter.h" @@ -12,7 +14,15 @@ #include "zeek/Trigger.h" #include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ZAM/Compile.h" -#include "zeek/session/Manager.h" +#include "zeek/script_opt/ZAM/Support.h" + +// Forward declarations from RunState.cc +namespace zeek::run_state { +extern double network_time; +extern bool reading_traces; +extern bool reading_live; +extern bool terminating; +} // namespace zeek::run_state namespace zeek::detail { diff --git a/src/script_opt/ZAM/ZBody.h b/src/script_opt/ZAM/ZBody.h index 794ee36467..22330a6fa3 100644 --- a/src/script_opt/ZAM/ZBody.h +++ b/src/script_opt/ZAM/ZBody.h @@ -8,31 +8,6 @@ #include "zeek/script_opt/ZAM/Profile.h" #include "zeek/script_opt/ZAM/Support.h" -//////////////////////////////////////////////////////////////////////// -// Headers needed for run-time execution: - -// Needed for managing the corresponding values. -#include "zeek/File.h" -#include "zeek/Func.h" -#include "zeek/OpaqueVal.h" - -// Just needed for BiFs. -#include "zeek/analyzer/Manager.h" -#include "zeek/analyzer/protocol/conn-size/ConnSize.h" -#include "zeek/broker/Manager.h" -#include "zeek/file_analysis/Manager.h" -#include "zeek/file_analysis/file_analysis.bif.h" -#include "zeek/logging/Manager.h" -#include "zeek/packet_analysis/Manager.h" -#include "zeek/packet_analysis/protocol/gtpv1/GTPv1.h" -#include "zeek/packet_analysis/protocol/teredo/Teredo.h" - -#include "zeek.bif.func_h" - -// For reading_live and reading_traces -#include "zeek/RunState.h" -//////////////////////////////////////////////////////////////////////// - namespace zeek::detail { // Static information about globals used in a function.