Merge remote-tracking branch 'origin/topic/vern/footprint'

* origin/topic/vern/footprint:
  new environment variable to enable BTests to skip ASAN checks
  skip new BiF test for ASAN CI runs
  btest update to include recursive value that doesn't require a record
  to avoid recursion, track all aggregates, not just records isolate the internal methods
  simpler public calling interface for computing footprint
  use stack-based set to prevent infinite recursion rather than a static one
  change value_footprint() to val_footprint() to be more similar to val_size()
  make including count of container elements non-optional
  btest for mutually-recursive case
  fix for tracking footprints of mutually-recursive records
  added value_footprint() and global_container_footprints() BiFs
This commit is contained in:
Tim Wojtulewicz 2022-05-06 11:32:13 -07:00
commit cf51931615
8 changed files with 324 additions and 1 deletions

View file

@ -283,6 +283,7 @@ asan_sanitizer_task:
CXXFLAGS: -DZEEK_DICT_DEBUG CXXFLAGS: -DZEEK_DICT_DEBUG
ZEEK_CI_CONFIGURE_FLAGS: *ASAN_SANITIZER_CONFIG ZEEK_CI_CONFIGURE_FLAGS: *ASAN_SANITIZER_CONFIG
ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1 ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1
ZEEK_CI_ASAN_SKIP_TEST: 1
ASAN_OPTIONS: detect_leaks=1 ASAN_OPTIONS: detect_leaks=1
ubsan_sanitizer_task: ubsan_sanitizer_task:

17
CHANGES
View file

@ -1,3 +1,20 @@
5.0.0-dev.359 | 2022-05-06 11:32:13 -0700
* to avoid recursion, track all aggregates, not just records (Vern Paxson, Corelight)
isolate the internal methods
* simpler public calling interface for computing footprint (Vern Paxson, Corelight)
* use stack-based set to prevent infinite recursion rather than a static one (Vern Paxson, Corelight)
* change value_footprint() to val_footprint() to be more similar to val_size() (Vern Paxson, Corelight)
* make including count of container elements non-optional (Vern Paxson, Corelight)
* fix for tracking footprints of mutually-recursive records (Vern Paxson, Corelight)
* added value_footprint() and global_container_footprints() BiFs (Vern Paxson, Corelight)
5.0.0-dev.347 | 2022-05-05 18:09:44 -0700 5.0.0-dev.347 | 2022-05-05 18:09:44 -0700
* Management framework: add get_configuration_request/response transaction (Christian Kreibich, Corelight) * Management framework: add get_configuration_request/response transaction (Christian Kreibich, Corelight)

View file

@ -1 +1 @@
5.0.0-dev.347 5.0.0-dev.359

View file

@ -1322,6 +1322,16 @@ ValPtr ListVal::DoClone(CloneState* state)
return lv; return lv;
} }
unsigned int ListVal::ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const
{
unsigned int fp = vals.size();
for ( const auto& val : vals )
fp += val->Footprint(analyzed_vals);
return fp;
}
unsigned int ListVal::MemoryAllocation() const unsigned int ListVal::MemoryAllocation() const
{ {
#pragma GCC diagnostic push #pragma GCC diagnostic push
@ -2684,6 +2694,24 @@ ValPtr TableVal::DoClone(CloneState* state)
return tv; return tv;
} }
unsigned int TableVal::ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const
{
unsigned int fp = table_val->Length();
for ( const auto& iter : *table_val )
{
auto k = iter.GetHashKey();
auto vl = table_hash->RecoverVals(*k);
auto v = iter.GetValue<TableEntryVal*>()->GetVal();
fp += vl->Footprint(analyzed_vals);
if ( v )
fp += v->Footprint(analyzed_vals);
}
return fp;
}
unsigned int TableVal::MemoryAllocation() const unsigned int TableVal::MemoryAllocation() const
{ {
unsigned int size = 0; unsigned int size = 0;
@ -3049,6 +3077,24 @@ ValPtr RecordVal::DoClone(CloneState* state)
return rv; return rv;
} }
unsigned int RecordVal::ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const
{
int n = NumFields();
unsigned int fp = n;
for ( auto i = 0; i < n; ++i )
{
if ( ! HasField(i) )
continue;
auto f_i = GetField(i);
if ( f_i )
fp += f_i->Footprint(analyzed_vals);
}
return fp;
}
unsigned int RecordVal::MemoryAllocation() const unsigned int RecordVal::MemoryAllocation() const
{ {
unsigned int size = 0; unsigned int size = 0;
@ -3572,6 +3618,21 @@ bool VectorVal::Concretize(const TypePtr& t)
return true; return true;
} }
unsigned int VectorVal::ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const
{
auto n = vector_val->size();
unsigned int fp = n;
for ( auto i = 0U; i < n; ++i )
{
auto v = At(i);
if ( v )
fp += v->Footprint(analyzed_vals);
}
return fp;
}
unsigned int VectorVal::Resize(unsigned int new_num_elements) unsigned int VectorVal::Resize(unsigned int new_num_elements)
{ {
unsigned int oldsize = vector_val->size(); unsigned int oldsize = vector_val->size();
@ -3953,6 +4014,31 @@ ValPtr Val::MakeCount(bro_uint_t u)
return make_intrusive<CountVal>(u); return make_intrusive<CountVal>(u);
} }
unsigned int Val::Footprint(std::unordered_set<const Val*>* analyzed_vals) const
{
auto is_aggr = IsAggr(type);
// We only need to check containers for possible recursion, as there's
// no way to construct a cycle using only non-aggregates.
if ( is_aggr )
{
if ( analyzed_vals->count(this) > 0 )
// Footprint is 1 for generating a cycle.
return 1;
analyzed_vals->insert(this);
}
auto fp = ComputeFootprint(analyzed_vals);
if ( is_aggr )
// Allow the aggregate to be revisited providing it's not
// in the context of a cycle.
analyzed_vals->erase(this);
return fp;
}
ValManager::ValManager() ValManager::ValManager()
{ {
empty_string = make_intrusive<StringVal>(""); empty_string = make_intrusive<StringVal>("");

View file

@ -121,6 +121,20 @@ public:
// size depends on the Val's type. // size depends on the Val's type.
virtual ValPtr SizeVal() const; virtual ValPtr SizeVal() const;
/**
* Returns the Val's "footprint", i.e., how many elements / Val
* objects the value includes, either directly or indirectly.
* The number is not meant to be precise, but rather comparable:
* larger footprint correlates with more memory consumption.
*
* @return The total footprint.
*/
unsigned int Footprint() const
{
std::unordered_set<const Val*> analyzed_vals;
return Footprint(&analyzed_vals);
}
// Bytes in total value object. // Bytes in total value object.
[[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See "
"GHI-572.")]] virtual unsigned int "GHI-572.")]] virtual unsigned int
@ -230,6 +244,7 @@ protected:
friend class EnumType; friend class EnumType;
friend class ListVal; friend class ListVal;
friend class RecordVal; friend class RecordVal;
friend class TableVal;
friend class VectorVal; friend class VectorVal;
friend class ValManager; friend class ValManager;
friend class TableEntryVal; friend class TableEntryVal;
@ -243,6 +258,21 @@ protected:
explicit Val(TypePtr t) noexcept : type(std::move(t)) { } explicit Val(TypePtr t) noexcept : type(std::move(t)) { }
/**
* Internal function for computing a Val's "footprint".
*
* @param analyzed_vals A pointer to a set used to track which values
* have been analyzed to date, used to prevent infinite recursion.
* The set should be empty (but not nil) on the first call.
*
* @return The total footprint.
*/
unsigned int Footprint(std::unordered_set<const Val*>* analyzed_vals) const;
virtual unsigned int ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const
{
return 1;
}
// For internal use by the Val::Clone() methods. // For internal use by the Val::Clone() methods.
struct CloneState struct CloneState
{ {
@ -671,6 +701,8 @@ public:
MemoryAllocation() const override; MemoryAllocation() const override;
protected: protected:
unsigned int ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const override;
ValPtr DoClone(CloneState* state) override; ValPtr DoClone(CloneState* state) override;
std::vector<ValPtr> vals; std::vector<ValPtr> vals;
@ -1031,6 +1063,8 @@ protected:
// Sends data on to backing Broker Store // Sends data on to backing Broker Store
void SendToStore(const Val* index, const TableEntryVal* new_entry_val, OnChangeType tpe); void SendToStore(const Val* index, const TableEntryVal* new_entry_val, OnChangeType tpe);
unsigned int ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const override;
ValPtr DoClone(CloneState* state) override; ValPtr DoClone(CloneState* state) override;
TableTypePtr table_type; TableTypePtr table_type;
@ -1439,6 +1473,8 @@ private:
// Just for template inferencing. // Just for template inferencing.
RecordVal* Get() { return this; } RecordVal* Get() { return this; }
unsigned int ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const override;
// Keep this handy for quick access during low-level operations. // Keep this handy for quick access during low-level operations.
RecordTypePtr rt; RecordTypePtr rt;
@ -1637,6 +1673,9 @@ protected:
ValPtr At(unsigned int index) const; ValPtr At(unsigned int index) const;
void ValDescribe(ODesc* d) const override; void ValDescribe(ODesc* d) const override;
unsigned int ComputeFootprint(std::unordered_set<const Val*>* analyzed_vals) const override;
ValPtr DoClone(CloneState* state) override; ValPtr DoClone(CloneState* state) override;
private: private:

View file

@ -1985,6 +1985,48 @@ function global_sizes%(%): var_sizes &deprecated="Remove in v5.1. MemoryAllocati
return sizes; return sizes;
%} %}
## Generates a table of the "footprint" of all global container variables.
## This is (approximately) the number of objects the global contains either
## directly or indirectly. The number is not meant to be precise, but
## rather comparable: larger footprint correlates with more memory consumption.
## The table index is the variable name and the value is the footprint.
##
## Returns: A table that maps variable names to their footprints.
##
## .. zeek:see:: val_footprint
function global_container_footprints%(%): var_sizes
%{
auto sizes = zeek::make_intrusive<zeek::TableVal>(IntrusivePtr{zeek::NewRef{}, var_sizes});
const auto& globals = zeek::detail::global_scope()->Vars();
for ( const auto& global : globals )
{
auto& id = global.second;
auto v = id->GetVal();
if ( ! v || ! IsAggr(v->GetType()) )
continue;
auto id_name = zeek::make_intrusive<zeek::StringVal>(id->Name());
auto fp = zeek::val_mgr->Count(v->Footprint());
sizes->Assign(std::move(id_name), std::move(fp));
}
return sizes;
%}
## Computes a value's "footprint": the number of objects the value contains
## either directly or indirectly. The number is not meant to be precise, but
## rather comparable: larger footprint correlates with more memory consumption.
##
## Returns: the footprint.
##
## .. zeek:see:: global_container_footprints
function val_footprint%(v: any%): count
%{
return zeek::val_mgr->Count(v->Footprint());
%}
## Generates a table with information about all global identifiers. The table ## Generates a table with information about all global identifiers. The table
## value is a record containing the type name of the identifier, whether it is ## value is a record containing the type name of the identifier, whether it is
## exported, a constant, an enum constant, redefinable, and its value (if it ## exported, a constant, an enum constant, redefinable, and its value (if it

View file

@ -0,0 +1,29 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
bool, 1
count, 1
int, 1
double, 1
string, 1
pattern, 1
addr, 1
subnet, 1
port, 1
l1, 3
l1b, 6
l2, 7
l2b, 9
v1, 8
v2, 18
v3, 11
t1, 12
t2, 18
t3, 46
t4, 19
s1, 9
s2, 15
s3, 20
s4, 9
3
3
srt, 0
srt, 4

View file

@ -0,0 +1,109 @@
# The ASAN CI job complains (correctly!) about this script leaking memory
# due to the script-level cycles it includes as stress-tests.
# @TEST-REQUIRES: test "${ZEEK_CI_ASAN_SKIP_TEST}" != "1"
#
# @TEST-EXEC: zeek -b %INPUT >out
# @TEST-EXEC: btest-diff out
type r1: record {
a: count;
b: double;
c: string;
};
type r2: record {
a: count;
b1: double &default = 1.0;
b2: double &default = 2.0;
c: string &optional;
d: string &optional;
};
# For testing mutually recursive records.
type X: record {
};
type Y: record {
x: X;
};
redef record X += {
y: Y &optional;
};
event zeek_init()
{
print "bool", val_footprint(T);
print "count", val_footprint(4);
print "int", val_footprint(-4);
print "double", val_footprint(4e99);
print "string", val_footprint("longlonglong");
print "pattern", val_footprint(/longlonglong/);
print "addr", val_footprint([ffff::ffff]);
print "subnet", val_footprint([ffff::ffff]/99);
print "port", val_footprint(9999/udp);
local l1: r1;
print "l1", val_footprint(l1);
local l1b = r1($a=3, $b=3.0, $c="3");
print "l1b", val_footprint(l1b);
local l2: r2;
print "l2", val_footprint(l2);
local l2b = r2($a=3, $b1=99.0, $c="I'm here");
print "l2b", val_footprint(l2b);
local v1 = vector(9, 7, 3, 1);
print "v1", val_footprint(v1);
local v2 = vector(v1, v1);
print "v2", val_footprint(v2);
local v3 = vector(l1, l1b);
print "v3", val_footprint(v3);
local t1 = table([1] = 1, [2] = 4, [3] = 9);
# Note, table and set footprint values using count_entries=T because
# table indices are ListVal's, so those add their own container
# entry counts into the sum.
print "t1", val_footprint(t1);
local t2 = table([1, 3] = 1, [2, 3] = 4, [3, 3] = 9);
print "t2", val_footprint(t2);
local t3 = table([1, 3] = v2, [2, 3] = v2);
print "t3", val_footprint(t3);
local t4 = table([1, 3] = l1, [2, 3] = l1b);
print "t4", val_footprint(t4);
local s1 = set(1, 4, 9);
print "s1", val_footprint(s1);
local s2 = set([1, 3], [2, 3], [3, 3]);
print "s2", val_footprint(s2);
local s3: set[r1, count];
add s3[l1b, 9];
add s3[l1b, 12];
print "s3", val_footprint(s3);
local s4 = set(vector(l1b), vector(l1b), vector(l1b));
print "s4", val_footprint(s4);
local x: X;
local y: Y;
x$y = y;
y$x = x;
print val_footprint(x);
print val_footprint(y);
local self_ref_table: table[string] of any;
print "srt", val_footprint(self_ref_table);
self_ref_table["x"] = self_ref_table;
print "srt", val_footprint(self_ref_table);
}