From e2e7ab28da117753afa40a3408ebc6133ed016c3 Mon Sep 17 00:00:00 2001 From: Tim Wojtulewicz Date: Fri, 25 Jul 2025 13:19:47 -0700 Subject: [PATCH] Implement string- and container-length filtering at the log record level --- scripts/base/init-bare.zeek | 25 +++++++ src/logging/Manager.cc | 61 +++++++++++++--- src/logging/Manager.h | 7 ++ .../test.log | 11 +++ .../test.log | 11 +++ .../test.log | 11 +++ .../test.log | 11 +++ .../test.log | 11 +++ .../test.log | 11 +++ .../logging/field-length-limiting.zeek | 72 +++++++++++++++++++ .../frameworks/logging/length-checking.zeek | 6 ++ 11 files changed, 227 insertions(+), 10 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-2/test.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-3/test.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-4/test.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-5/test.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-6/test.log create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting/test.log create mode 100644 testing/btest/scripts/base/frameworks/logging/field-length-limiting.zeek diff --git a/scripts/base/init-bare.zeek b/scripts/base/init-bare.zeek index 66ce27876c..8f300bce4d 100644 --- a/scripts/base/init-bare.zeek +++ b/scripts/base/init-bare.zeek @@ -3743,6 +3743,31 @@ export { ## higher than this limit, but it prevents runaway-sized log entries from causing ## problems. const max_log_record_size = 1024*1024*64 &redef; + + ## The maximum number of bytes that a single string field can contain when + ## logging. If a string reaches this limit, the log output for the field will be + ## truncated. Setting this to zero disables the limiting. + const max_field_string_bytes = 4096 &redef; + + ## The maximum number of elements a single container field can contain when + ## logging. If a container reaches this limit, the log output for the field will + ## be truncated. Setting this to zero disables the limiting. + const max_field_container_elements = 100 &redef; + + ## The maximum total bytes a record may log for string fields. This is the sum of + ## all bytes in string fields logged for the record. If this limit is reached, all + ## further string fields will be logged as empty strings. Any containers holding + ## string fields will be logged as empty containers. If the limit is reached while + ## processing a container holding string fields, the container will be truncated + ## in the log output. Setting this to zero disables the limiting. + const max_total_string_bytes = 256000 &redef; + + ## The maximum total number of container elements a record may log. This is the + ## sum of all container elements logged for the record. If this limit is reached, + ## all further containers will be logged as empty containers. If the limit is + ## reached while processing a container, the container will be truncated in the + ## output. Setting this to zero disables the limiting. + const max_total_container_elements = 500 &redef; } module POP3; diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index eb22695e05..734a54bbc0 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -504,6 +504,22 @@ void Manager::InitPostScript() { rotation_format_func = id::find_func("Log::rotation_format_func"); log_stream_policy_hook = id::find_func("Log::log_stream_policy"); max_log_record_size = id::find_val("Log::max_log_record_size")->AsCount(); + + max_field_string_bytes = id::find_val("Log::max_field_string_bytes")->AsCount(); + if ( max_field_string_bytes == 0 ) + max_field_string_bytes = std::numeric_limits::max(); + + max_total_string_bytes = id::find_val("Log::max_total_string_bytes")->AsCount(); + if ( max_total_string_bytes == 0 ) + max_total_string_bytes = std::numeric_limits::max(); + + max_field_container_elements = id::find_val("Log::max_field_container_elements")->AsCount(); + if ( max_field_container_elements == 0 ) + max_field_container_elements = std::numeric_limits::max(); + + max_total_container_elements = id::find_val("Log::max_total_container_elements")->AsCount(); + if ( max_total_container_elements == 0 ) + max_total_container_elements = std::numeric_limits::max(); } WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, EnumVal* tag) { @@ -1149,6 +1165,8 @@ bool Manager::WriteToFilters(const Manager::Stream* stream, zeek::RecordValPtr c // Alright, can do the write now. size_t total_size = 0; + total_string_bytes = 0; + total_container_elements = 0; auto rec = RecordToLogRecord(stream, filter, columns.get(), total_size); if ( total_size > max_log_record_size ) { @@ -1464,12 +1482,20 @@ threading::Value Manager::ValToLogVal(std::optional& val, Type* ty, size_t case TYPE_STRING: { const String* s = val->AsString()->AsString(); - char* buf = new char[s->Len()]; - memcpy(buf, s->Bytes(), s->Len()); + + size_t allowed_bytes = std::min( + {static_cast(s->Len()), max_field_string_bytes, max_total_string_bytes - total_string_bytes}); + + if ( allowed_bytes == 0 ) + return lval; + + char* buf = new char[allowed_bytes]; + memcpy(buf, s->Bytes(), allowed_bytes); lval.val.string_val.data = buf; - lval.val.string_val.length = s->Len(); - total_size += lval.val.string_val.length; + lval.val.string_val.length = allowed_bytes; + total_size += allowed_bytes; + total_string_bytes += allowed_bytes; break; } @@ -1508,10 +1534,15 @@ threading::Value Manager::ValToLogVal(std::optional& val, Type* ty, size_t auto& set_t = tbl_t->GetIndexTypes()[0]; bool is_managed = ZVal::IsManagedType(set_t); - zeek_int_t set_length = set->Length(); - lval.val.set_val.vals = new threading::Value*[set_length]; + size_t allowed_elements = std::min({static_cast(set->Length()), max_field_container_elements, + max_total_container_elements - total_container_elements}); - for ( zeek_int_t i = 0; i < set_length && total_size < max_log_record_size; i++ ) { + if ( allowed_elements == 0 ) + return lval; + + lval.val.set_val.vals = new threading::Value*[allowed_elements]; + + for ( size_t i = 0; i < allowed_elements && total_size < max_log_record_size; i++ ) { std::optional s_i = ZVal(set->Idx(i), set_t); lval.val.set_val.vals[i] = new threading::Value(ValToLogVal(s_i, set_t.get(), total_size)); if ( is_managed ) @@ -1519,22 +1550,32 @@ threading::Value Manager::ValToLogVal(std::optional& val, Type* ty, size_t lval.val.set_val.size++; } + total_container_elements += lval.val.set_val.size; + break; } case TYPE_VECTOR: { VectorVal* vec = val->AsVector(); - zeek_int_t vec_length = vec->Size(); - lval.val.vector_val.vals = new threading::Value*[vec_length]; + + size_t allowed_elements = std::min({static_cast(vec->Size()), max_field_container_elements, + max_total_container_elements - total_container_elements}); + + if ( allowed_elements == 0 ) + return lval; + + lval.val.vector_val.vals = new threading::Value*[allowed_elements]; auto& vv = vec->RawVec(); auto& vt = vec->GetType()->Yield(); - for ( zeek_int_t i = 0; i < vec_length && total_size < max_log_record_size; i++ ) { + for ( size_t i = 0; i < allowed_elements && total_size < max_log_record_size; i++ ) { lval.val.vector_val.vals[i] = new threading::Value(ValToLogVal(vv[i], vt.get(), total_size)); lval.val.vector_val.size++; } + total_container_elements += lval.val.vector_val.size; + break; } diff --git a/src/logging/Manager.h b/src/logging/Manager.h index 63336d67cb..dd933e0b50 100644 --- a/src/logging/Manager.h +++ b/src/logging/Manager.h @@ -448,7 +448,14 @@ private: int rotations_pending = 0; // Number of rotations not yet finished. FuncPtr rotation_format_func; FuncPtr log_stream_policy_hook; + size_t max_log_record_size = 0; + size_t max_field_string_bytes = 0; + size_t max_total_string_bytes = 0; + size_t max_field_container_elements = 0; + size_t max_total_container_elements = 0; + size_t total_string_bytes = 0; + size_t total_container_elements = 0; std::shared_ptr total_log_stream_writes_family; std::shared_ptr total_log_writer_writes_family; diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-2/test.log b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-2/test.log new file mode 100644 index 0000000000..93c73f8a84 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-2/test.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path test +#open XXXX-XX-XX-XX-XX-XX +#fields strings1 strings2 +#types vector[string] vector[string] +ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDE,(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty) +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-3/test.log b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-3/test.log new file mode 100644 index 0000000000..c44d2e2c5c --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-3/test.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path test +#open XXXX-XX-XX-XX-XX-XX +#fields strings1 strings2 +#types vector[string] vector[string] +ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDE,(empty) (empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty) +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-4/test.log b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-4/test.log new file mode 100644 index 0000000000..542d53cf44 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-4/test.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path test +#open XXXX-XX-XX-XX-XX-XX +#fields strings1 strings2 +#types vector[string] vector[string] +ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-5/test.log b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-5/test.log new file mode 100644 index 0000000000..2b02e6e655 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-5/test.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path test +#open XXXX-XX-XX-XX-XX-XX +#fields strings1 strings2 +#types vector[string] vector[string] +ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-6/test.log b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-6/test.log new file mode 100644 index 0000000000..4f595571fe --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting-6/test.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path test +#open XXXX-XX-XX-XX-XX-XX +#fields strings1 strings2 +#types vector[string] vector[string] +ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ (empty) +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting/test.log b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting/test.log new file mode 100644 index 0000000000..9bcebb3086 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.field-length-limiting/test.log @@ -0,0 +1,11 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +#separator \x09 +#set_separator , +#empty_field (empty) +#unset_field - +#path test +#open XXXX-XX-XX-XX-XX-XX +#fields strings1 strings2 +#types vector[string] vector[string] +ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE +#close XXXX-XX-XX-XX-XX-XX diff --git a/testing/btest/scripts/base/frameworks/logging/field-length-limiting.zeek b/testing/btest/scripts/base/frameworks/logging/field-length-limiting.zeek new file mode 100644 index 0000000000..d52f625561 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/field-length-limiting.zeek @@ -0,0 +1,72 @@ +# @TEST-DOC: Test the options that limit string and container lengths when logging +# +# @TEST-EXEC: zeek -b test.zeek %INPUT +# @TEST-EXEC: btest-diff test.log + +# @TEST-START-FILE test.zeek + +module Test; + +export { + redef enum Log::ID += { LOG }; + + type Info: record { + strings1: vector of string &log; + strings2: vector of string &log; + }; +} + +event zeek_init() + { + Log::create_stream(LOG, [$columns=Info, $path="test"]); + + local rec = Test::Info(); + local i = 0; + + # Create two vectors containing 10 strings with 10 characters each. + # This leaves us with 200 total characters to work with. + while ( ++i <= 10 ) + { + rec$strings1 += "ABCDEFGHIJ"; + rec$strings2 += "ABCDEFGHIJ"; + } + + Log::write(Test::LOG, rec); + } + + +# @TEST-END-FILE test.zeek + +# Limit the individual fields to 5 bytes, but keep the total maximum large enough that it +# will write all of the fields. +redef Log::max_field_string_bytes = 5; + +# @TEST-START-NEXT + +# Leave the individual field bytes alone, but set the maximum length to where it cuts off +# the second field in the middle of a string. +redef Log::max_total_string_bytes = 115; + +# @TEST-START-NEXT + +# Leave the individual field bytes alone, but set the maximum length to where it cuts off +# the first field in the middle of a string. Second field should log empty strings. +redef Log::max_total_string_bytes = 85; + +# @TEST-START-NEXT + +# Limit the individual containers to 5 items, but keep the total maximum large enough that +# it will write all of the fields. +redef Log::max_field_container_elements = 5; + +# @TEST-START-NEXT + +# Leave the individual field items alone, but set the maximum length to where it cuts off +# the second field in the middle. +redef Log::max_total_container_elements = 15; + +# @TEST-START-NEXT + +# Leave the individual field bytes alone, but set the maximum length to where it cuts off +# the first field in the middle. Second field should log empty containers. +redef Log::max_total_container_elements = 5; diff --git a/testing/btest/scripts/base/frameworks/logging/length-checking.zeek b/testing/btest/scripts/base/frameworks/logging/length-checking.zeek index 7316b2c4d0..0e14734012 100644 --- a/testing/btest/scripts/base/frameworks/logging/length-checking.zeek +++ b/testing/btest/scripts/base/frameworks/logging/length-checking.zeek @@ -19,6 +19,12 @@ module Test; +# Disable the string and container length filtering. +redef Log::max_field_string_bytes = 0; +redef Log::max_total_string_bytes = 0; +redef Log::max_field_container_elements = 0; +redef Log::max_total_container_elements = 0; + export { redef enum Log::ID += { LOG };