Implement string- and container-length filtering at the log record level

This commit is contained in:
Tim Wojtulewicz 2025-07-25 13:19:47 -07:00
parent cc59bfa5d8
commit e2e7ab28da
11 changed files with 227 additions and 10 deletions

View file

@ -3743,6 +3743,31 @@ export {
## higher than this limit, but it prevents runaway-sized log entries from causing
## problems.
const max_log_record_size = 1024*1024*64 &redef;
## The maximum number of bytes that a single string field can contain when
## logging. If a string reaches this limit, the log output for the field will be
## truncated. Setting this to zero disables the limiting.
const max_field_string_bytes = 4096 &redef;
## The maximum number of elements a single container field can contain when
## logging. If a container reaches this limit, the log output for the field will
## be truncated. Setting this to zero disables the limiting.
const max_field_container_elements = 100 &redef;
## The maximum total bytes a record may log for string fields. This is the sum of
## all bytes in string fields logged for the record. If this limit is reached, all
## further string fields will be logged as empty strings. Any containers holding
## string fields will be logged as empty containers. If the limit is reached while
## processing a container holding string fields, the container will be truncated
## in the log output. Setting this to zero disables the limiting.
const max_total_string_bytes = 256000 &redef;
## The maximum total number of container elements a record may log. This is the
## sum of all container elements logged for the record. If this limit is reached,
## all further containers will be logged as empty containers. If the limit is
## reached while processing a container, the container will be truncated in the
## output. Setting this to zero disables the limiting.
const max_total_container_elements = 500 &redef;
}
module POP3;

View file

@ -504,6 +504,22 @@ void Manager::InitPostScript() {
rotation_format_func = id::find_func("Log::rotation_format_func");
log_stream_policy_hook = id::find_func("Log::log_stream_policy");
max_log_record_size = id::find_val("Log::max_log_record_size")->AsCount();
max_field_string_bytes = id::find_val("Log::max_field_string_bytes")->AsCount();
if ( max_field_string_bytes == 0 )
max_field_string_bytes = std::numeric_limits<size_t>::max();
max_total_string_bytes = id::find_val("Log::max_total_string_bytes")->AsCount();
if ( max_total_string_bytes == 0 )
max_total_string_bytes = std::numeric_limits<size_t>::max();
max_field_container_elements = id::find_val("Log::max_field_container_elements")->AsCount();
if ( max_field_container_elements == 0 )
max_field_container_elements = std::numeric_limits<size_t>::max();
max_total_container_elements = id::find_val("Log::max_total_container_elements")->AsCount();
if ( max_total_container_elements == 0 )
max_total_container_elements = std::numeric_limits<size_t>::max();
}
WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, EnumVal* tag) {
@ -1149,6 +1165,8 @@ bool Manager::WriteToFilters(const Manager::Stream* stream, zeek::RecordValPtr c
// Alright, can do the write now.
size_t total_size = 0;
total_string_bytes = 0;
total_container_elements = 0;
auto rec = RecordToLogRecord(stream, filter, columns.get(), total_size);
if ( total_size > max_log_record_size ) {
@ -1464,12 +1482,20 @@ threading::Value Manager::ValToLogVal(std::optional<ZVal>& val, Type* ty, size_t
case TYPE_STRING: {
const String* s = val->AsString()->AsString();
char* buf = new char[s->Len()];
memcpy(buf, s->Bytes(), s->Len());
size_t allowed_bytes = std::min(
{static_cast<size_t>(s->Len()), max_field_string_bytes, max_total_string_bytes - total_string_bytes});
if ( allowed_bytes == 0 )
return lval;
char* buf = new char[allowed_bytes];
memcpy(buf, s->Bytes(), allowed_bytes);
lval.val.string_val.data = buf;
lval.val.string_val.length = s->Len();
total_size += lval.val.string_val.length;
lval.val.string_val.length = allowed_bytes;
total_size += allowed_bytes;
total_string_bytes += allowed_bytes;
break;
}
@ -1508,10 +1534,15 @@ threading::Value Manager::ValToLogVal(std::optional<ZVal>& val, Type* ty, size_t
auto& set_t = tbl_t->GetIndexTypes()[0];
bool is_managed = ZVal::IsManagedType(set_t);
zeek_int_t set_length = set->Length();
lval.val.set_val.vals = new threading::Value*[set_length];
size_t allowed_elements = std::min({static_cast<size_t>(set->Length()), max_field_container_elements,
max_total_container_elements - total_container_elements});
for ( zeek_int_t i = 0; i < set_length && total_size < max_log_record_size; i++ ) {
if ( allowed_elements == 0 )
return lval;
lval.val.set_val.vals = new threading::Value*[allowed_elements];
for ( size_t i = 0; i < allowed_elements && total_size < max_log_record_size; i++ ) {
std::optional<ZVal> s_i = ZVal(set->Idx(i), set_t);
lval.val.set_val.vals[i] = new threading::Value(ValToLogVal(s_i, set_t.get(), total_size));
if ( is_managed )
@ -1519,22 +1550,32 @@ threading::Value Manager::ValToLogVal(std::optional<ZVal>& val, Type* ty, size_t
lval.val.set_val.size++;
}
total_container_elements += lval.val.set_val.size;
break;
}
case TYPE_VECTOR: {
VectorVal* vec = val->AsVector();
zeek_int_t vec_length = vec->Size();
lval.val.vector_val.vals = new threading::Value*[vec_length];
size_t allowed_elements = std::min({static_cast<size_t>(vec->Size()), max_field_container_elements,
max_total_container_elements - total_container_elements});
if ( allowed_elements == 0 )
return lval;
lval.val.vector_val.vals = new threading::Value*[allowed_elements];
auto& vv = vec->RawVec();
auto& vt = vec->GetType()->Yield();
for ( zeek_int_t i = 0; i < vec_length && total_size < max_log_record_size; i++ ) {
for ( size_t i = 0; i < allowed_elements && total_size < max_log_record_size; i++ ) {
lval.val.vector_val.vals[i] = new threading::Value(ValToLogVal(vv[i], vt.get(), total_size));
lval.val.vector_val.size++;
}
total_container_elements += lval.val.vector_val.size;
break;
}

View file

@ -448,7 +448,14 @@ private:
int rotations_pending = 0; // Number of rotations not yet finished.
FuncPtr rotation_format_func;
FuncPtr log_stream_policy_hook;
size_t max_log_record_size = 0;
size_t max_field_string_bytes = 0;
size_t max_total_string_bytes = 0;
size_t max_field_container_elements = 0;
size_t max_total_container_elements = 0;
size_t total_string_bytes = 0;
size_t total_container_elements = 0;
std::shared_ptr<telemetry::CounterFamily> total_log_stream_writes_family;
std::shared_ptr<telemetry::CounterFamily> total_log_writer_writes_family;

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path test
#open XXXX-XX-XX-XX-XX-XX
#fields strings1 strings2
#types vector[string] vector[string]
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDE,(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty)
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path test
#open XXXX-XX-XX-XX-XX-XX
#fields strings1 strings2
#types vector[string] vector[string]
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDE,(empty) (empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty)
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path test
#open XXXX-XX-XX-XX-XX-XX
#fields strings1 strings2
#types vector[string] vector[string]
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path test
#open XXXX-XX-XX-XX-XX-XX
#fields strings1 strings2
#types vector[string] vector[string]
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path test
#open XXXX-XX-XX-XX-XX-XX
#fields strings1 strings2
#types vector[string] vector[string]
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ (empty)
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,11 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path test
#open XXXX-XX-XX-XX-XX-XX
#fields strings1 strings2
#types vector[string] vector[string]
ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE
#close XXXX-XX-XX-XX-XX-XX

View file

@ -0,0 +1,72 @@
# @TEST-DOC: Test the options that limit string and container lengths when logging
#
# @TEST-EXEC: zeek -b test.zeek %INPUT
# @TEST-EXEC: btest-diff test.log
# @TEST-START-FILE test.zeek
module Test;
export {
redef enum Log::ID += { LOG };
type Info: record {
strings1: vector of string &log;
strings2: vector of string &log;
};
}
event zeek_init()
{
Log::create_stream(LOG, [$columns=Info, $path="test"]);
local rec = Test::Info();
local i = 0;
# Create two vectors containing 10 strings with 10 characters each.
# This leaves us with 200 total characters to work with.
while ( ++i <= 10 )
{
rec$strings1 += "ABCDEFGHIJ";
rec$strings2 += "ABCDEFGHIJ";
}
Log::write(Test::LOG, rec);
}
# @TEST-END-FILE test.zeek
# Limit the individual fields to 5 bytes, but keep the total maximum large enough that it
# will write all of the fields.
redef Log::max_field_string_bytes = 5;
# @TEST-START-NEXT
# Leave the individual field bytes alone, but set the maximum length to where it cuts off
# the second field in the middle of a string.
redef Log::max_total_string_bytes = 115;
# @TEST-START-NEXT
# Leave the individual field bytes alone, but set the maximum length to where it cuts off
# the first field in the middle of a string. Second field should log empty strings.
redef Log::max_total_string_bytes = 85;
# @TEST-START-NEXT
# Limit the individual containers to 5 items, but keep the total maximum large enough that
# it will write all of the fields.
redef Log::max_field_container_elements = 5;
# @TEST-START-NEXT
# Leave the individual field items alone, but set the maximum length to where it cuts off
# the second field in the middle.
redef Log::max_total_container_elements = 15;
# @TEST-START-NEXT
# Leave the individual field bytes alone, but set the maximum length to where it cuts off
# the first field in the middle. Second field should log empty containers.
redef Log::max_total_container_elements = 5;

View file

@ -19,6 +19,12 @@
module Test;
# Disable the string and container length filtering.
redef Log::max_field_string_bytes = 0;
redef Log::max_total_string_bytes = 0;
redef Log::max_field_container_elements = 0;
redef Log::max_total_container_elements = 0;
export {
redef enum Log::ID += { LOG };