mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
Implement string- and container-length filtering at the log record level
This commit is contained in:
parent
cc59bfa5d8
commit
e2e7ab28da
11 changed files with 227 additions and 10 deletions
|
@ -3743,6 +3743,31 @@ export {
|
|||
## higher than this limit, but it prevents runaway-sized log entries from causing
|
||||
## problems.
|
||||
const max_log_record_size = 1024*1024*64 &redef;
|
||||
|
||||
## The maximum number of bytes that a single string field can contain when
|
||||
## logging. If a string reaches this limit, the log output for the field will be
|
||||
## truncated. Setting this to zero disables the limiting.
|
||||
const max_field_string_bytes = 4096 &redef;
|
||||
|
||||
## The maximum number of elements a single container field can contain when
|
||||
## logging. If a container reaches this limit, the log output for the field will
|
||||
## be truncated. Setting this to zero disables the limiting.
|
||||
const max_field_container_elements = 100 &redef;
|
||||
|
||||
## The maximum total bytes a record may log for string fields. This is the sum of
|
||||
## all bytes in string fields logged for the record. If this limit is reached, all
|
||||
## further string fields will be logged as empty strings. Any containers holding
|
||||
## string fields will be logged as empty containers. If the limit is reached while
|
||||
## processing a container holding string fields, the container will be truncated
|
||||
## in the log output. Setting this to zero disables the limiting.
|
||||
const max_total_string_bytes = 256000 &redef;
|
||||
|
||||
## The maximum total number of container elements a record may log. This is the
|
||||
## sum of all container elements logged for the record. If this limit is reached,
|
||||
## all further containers will be logged as empty containers. If the limit is
|
||||
## reached while processing a container, the container will be truncated in the
|
||||
## output. Setting this to zero disables the limiting.
|
||||
const max_total_container_elements = 500 &redef;
|
||||
}
|
||||
|
||||
module POP3;
|
||||
|
|
|
@ -504,6 +504,22 @@ void Manager::InitPostScript() {
|
|||
rotation_format_func = id::find_func("Log::rotation_format_func");
|
||||
log_stream_policy_hook = id::find_func("Log::log_stream_policy");
|
||||
max_log_record_size = id::find_val("Log::max_log_record_size")->AsCount();
|
||||
|
||||
max_field_string_bytes = id::find_val("Log::max_field_string_bytes")->AsCount();
|
||||
if ( max_field_string_bytes == 0 )
|
||||
max_field_string_bytes = std::numeric_limits<size_t>::max();
|
||||
|
||||
max_total_string_bytes = id::find_val("Log::max_total_string_bytes")->AsCount();
|
||||
if ( max_total_string_bytes == 0 )
|
||||
max_total_string_bytes = std::numeric_limits<size_t>::max();
|
||||
|
||||
max_field_container_elements = id::find_val("Log::max_field_container_elements")->AsCount();
|
||||
if ( max_field_container_elements == 0 )
|
||||
max_field_container_elements = std::numeric_limits<size_t>::max();
|
||||
|
||||
max_total_container_elements = id::find_val("Log::max_total_container_elements")->AsCount();
|
||||
if ( max_total_container_elements == 0 )
|
||||
max_total_container_elements = std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, EnumVal* tag) {
|
||||
|
@ -1149,6 +1165,8 @@ bool Manager::WriteToFilters(const Manager::Stream* stream, zeek::RecordValPtr c
|
|||
|
||||
// Alright, can do the write now.
|
||||
size_t total_size = 0;
|
||||
total_string_bytes = 0;
|
||||
total_container_elements = 0;
|
||||
auto rec = RecordToLogRecord(stream, filter, columns.get(), total_size);
|
||||
|
||||
if ( total_size > max_log_record_size ) {
|
||||
|
@ -1464,12 +1482,20 @@ threading::Value Manager::ValToLogVal(std::optional<ZVal>& val, Type* ty, size_t
|
|||
|
||||
case TYPE_STRING: {
|
||||
const String* s = val->AsString()->AsString();
|
||||
char* buf = new char[s->Len()];
|
||||
memcpy(buf, s->Bytes(), s->Len());
|
||||
|
||||
size_t allowed_bytes = std::min(
|
||||
{static_cast<size_t>(s->Len()), max_field_string_bytes, max_total_string_bytes - total_string_bytes});
|
||||
|
||||
if ( allowed_bytes == 0 )
|
||||
return lval;
|
||||
|
||||
char* buf = new char[allowed_bytes];
|
||||
memcpy(buf, s->Bytes(), allowed_bytes);
|
||||
|
||||
lval.val.string_val.data = buf;
|
||||
lval.val.string_val.length = s->Len();
|
||||
total_size += lval.val.string_val.length;
|
||||
lval.val.string_val.length = allowed_bytes;
|
||||
total_size += allowed_bytes;
|
||||
total_string_bytes += allowed_bytes;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1508,10 +1534,15 @@ threading::Value Manager::ValToLogVal(std::optional<ZVal>& val, Type* ty, size_t
|
|||
auto& set_t = tbl_t->GetIndexTypes()[0];
|
||||
bool is_managed = ZVal::IsManagedType(set_t);
|
||||
|
||||
zeek_int_t set_length = set->Length();
|
||||
lval.val.set_val.vals = new threading::Value*[set_length];
|
||||
size_t allowed_elements = std::min({static_cast<size_t>(set->Length()), max_field_container_elements,
|
||||
max_total_container_elements - total_container_elements});
|
||||
|
||||
for ( zeek_int_t i = 0; i < set_length && total_size < max_log_record_size; i++ ) {
|
||||
if ( allowed_elements == 0 )
|
||||
return lval;
|
||||
|
||||
lval.val.set_val.vals = new threading::Value*[allowed_elements];
|
||||
|
||||
for ( size_t i = 0; i < allowed_elements && total_size < max_log_record_size; i++ ) {
|
||||
std::optional<ZVal> s_i = ZVal(set->Idx(i), set_t);
|
||||
lval.val.set_val.vals[i] = new threading::Value(ValToLogVal(s_i, set_t.get(), total_size));
|
||||
if ( is_managed )
|
||||
|
@ -1519,22 +1550,32 @@ threading::Value Manager::ValToLogVal(std::optional<ZVal>& val, Type* ty, size_t
|
|||
lval.val.set_val.size++;
|
||||
}
|
||||
|
||||
total_container_elements += lval.val.set_val.size;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_VECTOR: {
|
||||
VectorVal* vec = val->AsVector();
|
||||
zeek_int_t vec_length = vec->Size();
|
||||
lval.val.vector_val.vals = new threading::Value*[vec_length];
|
||||
|
||||
size_t allowed_elements = std::min({static_cast<size_t>(vec->Size()), max_field_container_elements,
|
||||
max_total_container_elements - total_container_elements});
|
||||
|
||||
if ( allowed_elements == 0 )
|
||||
return lval;
|
||||
|
||||
lval.val.vector_val.vals = new threading::Value*[allowed_elements];
|
||||
|
||||
auto& vv = vec->RawVec();
|
||||
auto& vt = vec->GetType()->Yield();
|
||||
|
||||
for ( zeek_int_t i = 0; i < vec_length && total_size < max_log_record_size; i++ ) {
|
||||
for ( size_t i = 0; i < allowed_elements && total_size < max_log_record_size; i++ ) {
|
||||
lval.val.vector_val.vals[i] = new threading::Value(ValToLogVal(vv[i], vt.get(), total_size));
|
||||
lval.val.vector_val.size++;
|
||||
}
|
||||
|
||||
total_container_elements += lval.val.vector_val.size;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -448,7 +448,14 @@ private:
|
|||
int rotations_pending = 0; // Number of rotations not yet finished.
|
||||
FuncPtr rotation_format_func;
|
||||
FuncPtr log_stream_policy_hook;
|
||||
|
||||
size_t max_log_record_size = 0;
|
||||
size_t max_field_string_bytes = 0;
|
||||
size_t max_total_string_bytes = 0;
|
||||
size_t max_field_container_elements = 0;
|
||||
size_t max_total_container_elements = 0;
|
||||
size_t total_string_bytes = 0;
|
||||
size_t total_container_elements = 0;
|
||||
|
||||
std::shared_ptr<telemetry::CounterFamily> total_log_stream_writes_family;
|
||||
std::shared_ptr<telemetry::CounterFamily> total_log_writer_writes_family;
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
#separator \x09
|
||||
#set_separator ,
|
||||
#empty_field (empty)
|
||||
#unset_field -
|
||||
#path test
|
||||
#open XXXX-XX-XX-XX-XX-XX
|
||||
#fields strings1 strings2
|
||||
#types vector[string] vector[string]
|
||||
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDE,(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty)
|
||||
#close XXXX-XX-XX-XX-XX-XX
|
|
@ -0,0 +1,11 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
#separator \x09
|
||||
#set_separator ,
|
||||
#empty_field (empty)
|
||||
#unset_field -
|
||||
#path test
|
||||
#open XXXX-XX-XX-XX-XX-XX
|
||||
#fields strings1 strings2
|
||||
#types vector[string] vector[string]
|
||||
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDE,(empty) (empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty),(empty)
|
||||
#close XXXX-XX-XX-XX-XX-XX
|
|
@ -0,0 +1,11 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
#separator \x09
|
||||
#set_separator ,
|
||||
#empty_field (empty)
|
||||
#unset_field -
|
||||
#path test
|
||||
#open XXXX-XX-XX-XX-XX-XX
|
||||
#fields strings1 strings2
|
||||
#types vector[string] vector[string]
|
||||
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ
|
||||
#close XXXX-XX-XX-XX-XX-XX
|
|
@ -0,0 +1,11 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
#separator \x09
|
||||
#set_separator ,
|
||||
#empty_field (empty)
|
||||
#unset_field -
|
||||
#path test
|
||||
#open XXXX-XX-XX-XX-XX-XX
|
||||
#fields strings1 strings2
|
||||
#types vector[string] vector[string]
|
||||
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ
|
||||
#close XXXX-XX-XX-XX-XX-XX
|
|
@ -0,0 +1,11 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
#separator \x09
|
||||
#set_separator ,
|
||||
#empty_field (empty)
|
||||
#unset_field -
|
||||
#path test
|
||||
#open XXXX-XX-XX-XX-XX-XX
|
||||
#fields strings1 strings2
|
||||
#types vector[string] vector[string]
|
||||
ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ,ABCDEFGHIJ (empty)
|
||||
#close XXXX-XX-XX-XX-XX-XX
|
|
@ -0,0 +1,11 @@
|
|||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
|
||||
#separator \x09
|
||||
#set_separator ,
|
||||
#empty_field (empty)
|
||||
#unset_field -
|
||||
#path test
|
||||
#open XXXX-XX-XX-XX-XX-XX
|
||||
#fields strings1 strings2
|
||||
#types vector[string] vector[string]
|
||||
ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE,ABCDE
|
||||
#close XXXX-XX-XX-XX-XX-XX
|
|
@ -0,0 +1,72 @@
|
|||
# @TEST-DOC: Test the options that limit string and container lengths when logging
|
||||
#
|
||||
# @TEST-EXEC: zeek -b test.zeek %INPUT
|
||||
# @TEST-EXEC: btest-diff test.log
|
||||
|
||||
# @TEST-START-FILE test.zeek
|
||||
|
||||
module Test;
|
||||
|
||||
export {
|
||||
redef enum Log::ID += { LOG };
|
||||
|
||||
type Info: record {
|
||||
strings1: vector of string &log;
|
||||
strings2: vector of string &log;
|
||||
};
|
||||
}
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Log::create_stream(LOG, [$columns=Info, $path="test"]);
|
||||
|
||||
local rec = Test::Info();
|
||||
local i = 0;
|
||||
|
||||
# Create two vectors containing 10 strings with 10 characters each.
|
||||
# This leaves us with 200 total characters to work with.
|
||||
while ( ++i <= 10 )
|
||||
{
|
||||
rec$strings1 += "ABCDEFGHIJ";
|
||||
rec$strings2 += "ABCDEFGHIJ";
|
||||
}
|
||||
|
||||
Log::write(Test::LOG, rec);
|
||||
}
|
||||
|
||||
|
||||
# @TEST-END-FILE test.zeek
|
||||
|
||||
# Limit the individual fields to 5 bytes, but keep the total maximum large enough that it
|
||||
# will write all of the fields.
|
||||
redef Log::max_field_string_bytes = 5;
|
||||
|
||||
# @TEST-START-NEXT
|
||||
|
||||
# Leave the individual field bytes alone, but set the maximum length to where it cuts off
|
||||
# the second field in the middle of a string.
|
||||
redef Log::max_total_string_bytes = 115;
|
||||
|
||||
# @TEST-START-NEXT
|
||||
|
||||
# Leave the individual field bytes alone, but set the maximum length to where it cuts off
|
||||
# the first field in the middle of a string. Second field should log empty strings.
|
||||
redef Log::max_total_string_bytes = 85;
|
||||
|
||||
# @TEST-START-NEXT
|
||||
|
||||
# Limit the individual containers to 5 items, but keep the total maximum large enough that
|
||||
# it will write all of the fields.
|
||||
redef Log::max_field_container_elements = 5;
|
||||
|
||||
# @TEST-START-NEXT
|
||||
|
||||
# Leave the individual field items alone, but set the maximum length to where it cuts off
|
||||
# the second field in the middle.
|
||||
redef Log::max_total_container_elements = 15;
|
||||
|
||||
# @TEST-START-NEXT
|
||||
|
||||
# Leave the individual field bytes alone, but set the maximum length to where it cuts off
|
||||
# the first field in the middle. Second field should log empty containers.
|
||||
redef Log::max_total_container_elements = 5;
|
|
@ -19,6 +19,12 @@
|
|||
|
||||
module Test;
|
||||
|
||||
# Disable the string and container length filtering.
|
||||
redef Log::max_field_string_bytes = 0;
|
||||
redef Log::max_total_string_bytes = 0;
|
||||
redef Log::max_field_container_elements = 0;
|
||||
redef Log::max_total_container_elements = 0;
|
||||
|
||||
export {
|
||||
redef enum Log::ID += { LOG };
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue