From 16c37034de476be66955c24e8fa476342bc3d1ea Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 28 Apr 2022 16:42:14 -0700 Subject: [PATCH 01/11] added value_footprint() and global_container_footprints() BiFs --- src/Val.cc | 72 +++++++++++++++ src/Val.h | 20 +++++ src/zeek.bif | 43 +++++++++ testing/btest/Baseline/bifs.footprint/out | 49 ++++++++++ testing/btest/bifs/footprint.zeek | 103 ++++++++++++++++++++++ 5 files changed, 287 insertions(+) create mode 100644 testing/btest/Baseline/bifs.footprint/out create mode 100644 testing/btest/bifs/footprint.zeek diff --git a/src/Val.cc b/src/Val.cc index 1d0055f71b..d2d5079699 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -1322,6 +1322,18 @@ ValPtr ListVal::DoClone(CloneState* state) return lv; } +unsigned int ListVal::Footprint(bool count_entries) const + { + unsigned int fp = 0; + for ( const auto& val : vals ) + fp += val->Footprint(count_entries); + + if ( count_entries ) + fp += vals.size(); + + return fp; + } + unsigned int ListVal::MemoryAllocation() const { #pragma GCC diagnostic push @@ -2673,6 +2685,27 @@ ValPtr TableVal::DoClone(CloneState* state) return tv; } +unsigned int TableVal::Footprint(bool count_entries) const + { + unsigned int fp = 0; + + for ( const auto& iter : *table_val ) + { + auto k = iter.GetHashKey(); + auto vl = table_hash->RecoverVals(*k); + auto v = iter.GetValue()->GetVal(); + + fp += vl->Footprint(count_entries); + if ( v ) + fp += v->Footprint(count_entries); + } + + if ( count_entries ) + fp += table_val->Length(); + + return fp; + } + unsigned int TableVal::MemoryAllocation() const { unsigned int size = 0; @@ -3038,6 +3071,27 @@ ValPtr RecordVal::DoClone(CloneState* state) return rv; } +unsigned int RecordVal::Footprint(bool count_entries) const + { + unsigned int fp = 0; + int n = NumFields(); + + for ( auto i = 0; i < n; ++i ) + { + if ( ! HasField(i) ) + continue; + + auto f_i = GetField(i); + if ( f_i ) + fp += f_i->Footprint(count_entries); + } + + if ( count_entries ) + fp += n; + + return fp; + } + unsigned int RecordVal::MemoryAllocation() const { unsigned int size = 0; @@ -3561,6 +3615,24 @@ bool VectorVal::Concretize(const TypePtr& t) return true; } +unsigned int VectorVal::Footprint(bool count_entries) const + { + unsigned int fp = 0; + auto n = vector_val->size(); + + for ( auto i = 0U; i < n; ++i ) + { + auto v = At(i); + if ( v ) + fp += v->Footprint(count_entries); + } + + if ( count_entries ) + fp += n; + + return fp; + } + unsigned int VectorVal::Resize(unsigned int new_num_elements) { unsigned int oldsize = vector_val->size(); diff --git a/src/Val.h b/src/Val.h index b7cae59690..74f5295c25 100644 --- a/src/Val.h +++ b/src/Val.h @@ -121,6 +121,18 @@ public: // size depends on the Val's type. virtual ValPtr SizeVal() const; + /** + * Returns the Val's "footprint", i.e., how many atomic (non-container) + * values it includes, either directly or indirectly. + * + * @param count_entries If true, (recursively) include in the + * footprint the count of the number of container elements as well + * as each element's footprint. + * + * @return The total footprint, optionally including element counts. + */ + virtual unsigned int Footprint(bool count_entries) const { return 1; } + // Bytes in total value object. [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] virtual unsigned int @@ -666,6 +678,8 @@ public: void Describe(ODesc* d) const override; + unsigned int Footprint(bool count_entries) const override; + [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int MemoryAllocation() const override; @@ -941,6 +955,8 @@ public: // the function in the frame allowing it to capture its closure. void InitDefaultFunc(detail::Frame* f); + unsigned int Footprint(bool count_entries) const override; + [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int MemoryAllocation() const override; @@ -1367,6 +1383,8 @@ public: } RecordValPtr CoerceTo(RecordTypePtr other, bool allow_orphaning = false); + unsigned int Footprint(bool count_entries) const override; + [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int MemoryAllocation() const override; @@ -1624,6 +1642,8 @@ public: const auto& RawYieldType() const { return yield_type; } const auto& RawYieldTypes() const { return yield_types; } + unsigned int Footprint(bool count_entries) const override; + protected: /** * Returns the element at a given index or nullptr if it does not exist. diff --git a/src/zeek.bif b/src/zeek.bif index 4665831d65..6df100aa8a 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -1985,6 +1985,49 @@ function global_sizes%(%): var_sizes &deprecated="Remove in v5.1. MemoryAllocati return sizes; %} +## Generates a table of the footprint of all global container variables. +## The table index is the variable name and the value is the footprint. +## The argument specifies whether the footprint should (recursively) include +## the number of entries in each global. +## +## Returns: A table that maps variable names to their footprints. +## +## .. zeek:see:: value_footprint +function global_container_footprints%(count_entries: bool%): var_sizes + %{ + auto sizes = zeek::make_intrusive(IntrusivePtr{zeek::NewRef{}, var_sizes}); + const auto& globals = zeek::detail::global_scope()->Vars(); + + for ( const auto& global : globals ) + { + auto& id = global.second; + auto v = id->GetVal(); + + if ( ! v || ! IsAggr(v->GetType()) ) + continue; + + auto id_name = zeek::make_intrusive(id->Name()); + auto fp = zeek::val_mgr->Count(v->Footprint(count_entries)); + sizes->Assign(std::move(id_name), std::move(fp)); + } + + return sizes; + %} + +## Computes a value's "footprint" (number of atomic elements included, either +## directly or indirectly). The argument, if true, specifies that the +## footprint should also include the number of entries in each included +## container. +## +## Returns: the total number of atomic elements plus (optionally) the +## number of entries in included containers. +## +## .. zeek:see:: global_container_footprints +function value_footprint%(v: any, count_entries: bool%): count + %{ + return zeek::val_mgr->Count(v->Footprint(count_entries)); + %} + ## Generates a table with information about all global identifiers. The table ## value is a record containing the type name of the identifier, whether it is ## exported, a constant, an enum constant, redefinable, and its value (if it diff --git a/testing/btest/Baseline/bifs.footprint/out b/testing/btest/Baseline/bifs.footprint/out new file mode 100644 index 0000000000..3d0f6d4b9b --- /dev/null +++ b/testing/btest/Baseline/bifs.footprint/out @@ -0,0 +1,49 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +bool, 1 +bool, 1 +count, 1 +count, 1 +int, 1 +int, 1 +double, 1 +double, 1 +string, 1 +string, 1 +pattern, 1 +pattern, 1 +addr, 1 +addr, 1 +subnet, 1 +subnet, 1 +port, 1 +port, 1 +l1, 0 +l1, 3 +l1b, 3 +l1b, 6 +l2, 2 +l2, 7 +l2b, 4 +l2b, 9 +v1, 4 +v1, 8 +v2, 8 +v2, 18 +v3, 3 +v3, 11 +t1, 6 +t1, 12 +t2, 9 +t2, 18 +t3, 20 +t3, 46 +t4, 7 +t4, 19 +s1, 3 +s1, 9 +s2, 6 +s2, 15 +s3, 8 +s3, 20 +s4, 3 +s4, 9 diff --git a/testing/btest/bifs/footprint.zeek b/testing/btest/bifs/footprint.zeek new file mode 100644 index 0000000000..6f9666a2a8 --- /dev/null +++ b/testing/btest/bifs/footprint.zeek @@ -0,0 +1,103 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out + +type r1: record { + a: count; + b: double; + c: string; +}; + +type r2: record { + a: count; + b1: double &default = 1.0; + b2: double &default = 2.0; + c: string &optional; + d: string &optional; +}; + +event zeek_init() + { + print "bool", value_footprint(T, F); + print "bool", value_footprint(T, T); + print "count", value_footprint(3, F); + print "count", value_footprint(4, T); + print "int", value_footprint(-3, F); + print "int", value_footprint(-4, T); + print "double", value_footprint(-3.0, F); + print "double", value_footprint(4e99, T); + print "string", value_footprint("short", F); + print "string", value_footprint("longlonglong", T); + print "pattern", value_footprint(/short/, F); + print "pattern", value_footprint(/longlonglong/, T); + print "addr", value_footprint(1.2.3.4, F); + print "addr", value_footprint([ffff::ffff], T); + print "subnet", value_footprint(1.2.3.4/22, F); + print "subnet", value_footprint([ffff::ffff]/99, T); + print "port", value_footprint(123/tcp, F); + print "port", value_footprint(9999/udp, T); + + local l1: r1; + print "l1", value_footprint(l1, F); + print "l1", value_footprint(l1, T); + + local l1b = r1($a=3, $b=3.0, $c="3"); + print "l1b", value_footprint(l1b, F); + print "l1b", value_footprint(l1b, T); + + local l2: r2; + print "l2", value_footprint(l2, F); + print "l2", value_footprint(l2, T); + + local l2b = r2($a=3, $b1=99.0, $c="I'm here"); + print "l2b", value_footprint(l2b, F); + print "l2b", value_footprint(l2b, T); + + local v1 = vector(9, 7, 3, 1); + print "v1", value_footprint(v1, F); + print "v1", value_footprint(v1, T); + + local v2 = vector(v1, v1); + print "v2", value_footprint(v2, F); + print "v2", value_footprint(v2, T); + + local v3 = vector(l1, l1b); + print "v3", value_footprint(v3, F); + print "v3", value_footprint(v3, T); + + local t1 = table([1] = 1, [2] = 4, [3] = 9); + print "t1", value_footprint(t1, F); + # Note, table and set footprint values using count_entries=T because + # table indices are ListVal's, so those add their own container + # entry counts into the sum. + print "t1", value_footprint(t1, T); + + local t2 = table([1, 3] = 1, [2, 3] = 4, [3, 3] = 9); + print "t2", value_footprint(t2, F); + print "t2", value_footprint(t2, T); + + local t3 = table([1, 3] = v2, [2, 3] = v2); + print "t3", value_footprint(t3, F); + print "t3", value_footprint(t3, T); + + local t4 = table([1, 3] = l1, [2, 3] = l1b); + print "t4", value_footprint(t4, F); + print "t4", value_footprint(t4, T); + + local s1 = set(1, 4, 9); + print "s1", value_footprint(s1, F); + print "s1", value_footprint(s1, T); + + local s2 = set([1, 3], [2, 3], [3, 3]); + print "s2", value_footprint(s2, F); + print "s2", value_footprint(s2, T); + + local s3: set[r1, count]; + add s3[l1b, 9]; + add s3[l1b, 12]; + print "s3", value_footprint(s3, F); + print "s3", value_footprint(s3, T); + + local s4 = set(vector(l1b), vector(l1b), vector(l1b)); + print "s4", value_footprint(s4, F); + print "s4", value_footprint(s4, T); + } From 5a1c33ed4aa0294720570ea42853c1bd6dd00207 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 29 Apr 2022 08:44:34 -0700 Subject: [PATCH 02/11] fix for tracking footprints of mutually-recursive records --- src/Val.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Val.cc b/src/Val.cc index d2d5079699..93f38c02cc 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -3073,6 +3073,17 @@ ValPtr RecordVal::DoClone(CloneState* state) unsigned int RecordVal::Footprint(bool count_entries) const { + // Which records we're in the process of analyzing - used to + // avoid infinite recursion for circular types (which can only + // occur due to the presence of records). + static std::unordered_set pending_records; + + if ( pending_records.count(this) > 0 ) + // Footprint is 1 for the RecordVal itself. + return 1; + + pending_records.insert(this); + unsigned int fp = 0; int n = NumFields(); @@ -3089,6 +3100,8 @@ unsigned int RecordVal::Footprint(bool count_entries) const if ( count_entries ) fp += n; + pending_records.erase(this); + return fp; } From b670046a69b96e13c6803ac207e8f41663a54e17 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 29 Apr 2022 08:44:58 -0700 Subject: [PATCH 03/11] btest for mutually-recursive case --- testing/btest/Baseline/bifs.footprint/out | 4 ++++ testing/btest/bifs/footprint.zeek | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/testing/btest/Baseline/bifs.footprint/out b/testing/btest/Baseline/bifs.footprint/out index 3d0f6d4b9b..922ae4ff7a 100644 --- a/testing/btest/Baseline/bifs.footprint/out +++ b/testing/btest/Baseline/bifs.footprint/out @@ -47,3 +47,7 @@ s3, 8 s3, 20 s4, 3 s4, 9 +1 +3 +1 +3 diff --git a/testing/btest/bifs/footprint.zeek b/testing/btest/bifs/footprint.zeek index 6f9666a2a8..2f719db380 100644 --- a/testing/btest/bifs/footprint.zeek +++ b/testing/btest/bifs/footprint.zeek @@ -15,6 +15,18 @@ type r2: record { d: string &optional; }; +# For testing mutually recursive records. +type X: record { +}; + +type Y: record { + x: X; +}; + +redef record X += { + y: Y &optional; +}; + event zeek_init() { print "bool", value_footprint(T, F); @@ -100,4 +112,15 @@ event zeek_init() local s4 = set(vector(l1b), vector(l1b), vector(l1b)); print "s4", value_footprint(s4, F); print "s4", value_footprint(s4, T); + + local x: X; + local y: Y; + + x$y = y; + y$x = x; + + print value_footprint(x, F); + print value_footprint(x, T); + print value_footprint(y, F); + print value_footprint(y, T); } From edf276520a56ffedae6b4e5ffea65c82f2530227 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 29 Apr 2022 08:59:26 -0700 Subject: [PATCH 04/11] make including count of container elements non-optional --- src/Val.cc | 39 ++++-------- src/Val.h | 22 +++---- src/zeek.bif | 25 ++++---- testing/btest/Baseline/bifs.footprint/out | 26 -------- testing/btest/bifs/footprint.zeek | 78 ++++++++--------------- 5 files changed, 62 insertions(+), 128 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index 93f38c02cc..052e7d0ee7 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -1322,14 +1322,12 @@ ValPtr ListVal::DoClone(CloneState* state) return lv; } -unsigned int ListVal::Footprint(bool count_entries) const +unsigned int ListVal::Footprint() const { - unsigned int fp = 0; - for ( const auto& val : vals ) - fp += val->Footprint(count_entries); + unsigned int fp = vals.size(); - if ( count_entries ) - fp += vals.size(); + for ( const auto& val : vals ) + fp += val->Footprint(); return fp; } @@ -2685,9 +2683,9 @@ ValPtr TableVal::DoClone(CloneState* state) return tv; } -unsigned int TableVal::Footprint(bool count_entries) const +unsigned int TableVal::Footprint() const { - unsigned int fp = 0; + unsigned int fp = table_val->Length(); for ( const auto& iter : *table_val ) { @@ -2695,14 +2693,11 @@ unsigned int TableVal::Footprint(bool count_entries) const auto vl = table_hash->RecoverVals(*k); auto v = iter.GetValue()->GetVal(); - fp += vl->Footprint(count_entries); + fp += vl->Footprint(); if ( v ) - fp += v->Footprint(count_entries); + fp += v->Footprint(); } - if ( count_entries ) - fp += table_val->Length(); - return fp; } @@ -3071,7 +3066,7 @@ ValPtr RecordVal::DoClone(CloneState* state) return rv; } -unsigned int RecordVal::Footprint(bool count_entries) const +unsigned int RecordVal::Footprint() const { // Which records we're in the process of analyzing - used to // avoid infinite recursion for circular types (which can only @@ -3084,8 +3079,8 @@ unsigned int RecordVal::Footprint(bool count_entries) const pending_records.insert(this); - unsigned int fp = 0; int n = NumFields(); + unsigned int fp = n; for ( auto i = 0; i < n; ++i ) { @@ -3094,12 +3089,9 @@ unsigned int RecordVal::Footprint(bool count_entries) const auto f_i = GetField(i); if ( f_i ) - fp += f_i->Footprint(count_entries); + fp += f_i->Footprint(); } - if ( count_entries ) - fp += n; - pending_records.erase(this); return fp; @@ -3628,21 +3620,18 @@ bool VectorVal::Concretize(const TypePtr& t) return true; } -unsigned int VectorVal::Footprint(bool count_entries) const +unsigned int VectorVal::Footprint() const { - unsigned int fp = 0; auto n = vector_val->size(); + unsigned int fp = n; for ( auto i = 0U; i < n; ++i ) { auto v = At(i); if ( v ) - fp += v->Footprint(count_entries); + fp += v->Footprint(); } - if ( count_entries ) - fp += n; - return fp; } diff --git a/src/Val.h b/src/Val.h index 74f5295c25..d3c605022b 100644 --- a/src/Val.h +++ b/src/Val.h @@ -122,16 +122,14 @@ public: virtual ValPtr SizeVal() const; /** - * Returns the Val's "footprint", i.e., how many atomic (non-container) - * values it includes, either directly or indirectly. + * Returns the Val's "footprint", i.e., how many elements / Val + * objects the value includes, either directly or indirectly. + * The number is not meant to be precise, but rather comparable: + * larger footprint correlates with more memory consumption. * - * @param count_entries If true, (recursively) include in the - * footprint the count of the number of container elements as well - * as each element's footprint. - * - * @return The total footprint, optionally including element counts. + * @return The total footprint. */ - virtual unsigned int Footprint(bool count_entries) const { return 1; } + virtual unsigned int Footprint() const { return 1; } // Bytes in total value object. [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " @@ -678,7 +676,7 @@ public: void Describe(ODesc* d) const override; - unsigned int Footprint(bool count_entries) const override; + unsigned int Footprint() const override; [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int @@ -955,7 +953,7 @@ public: // the function in the frame allowing it to capture its closure. void InitDefaultFunc(detail::Frame* f); - unsigned int Footprint(bool count_entries) const override; + unsigned int Footprint() const override; [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int @@ -1383,7 +1381,7 @@ public: } RecordValPtr CoerceTo(RecordTypePtr other, bool allow_orphaning = false); - unsigned int Footprint(bool count_entries) const override; + unsigned int Footprint() const override; [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int @@ -1642,7 +1640,7 @@ public: const auto& RawYieldType() const { return yield_type; } const auto& RawYieldTypes() const { return yield_types; } - unsigned int Footprint(bool count_entries) const override; + unsigned int Footprint() const override; protected: /** diff --git a/src/zeek.bif b/src/zeek.bif index 6df100aa8a..72c18b95f2 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -1985,15 +1985,16 @@ function global_sizes%(%): var_sizes &deprecated="Remove in v5.1. MemoryAllocati return sizes; %} -## Generates a table of the footprint of all global container variables. +## Generates a table of the "footprint" of all global container variables. +## This is (approximately) the number of objects the global contains either +## directly or indirectly. The number is not meant to be precise, but +## rather comparable: larger footprint correlates with more memory consumption. ## The table index is the variable name and the value is the footprint. -## The argument specifies whether the footprint should (recursively) include -## the number of entries in each global. ## ## Returns: A table that maps variable names to their footprints. ## ## .. zeek:see:: value_footprint -function global_container_footprints%(count_entries: bool%): var_sizes +function global_container_footprints%(%): var_sizes %{ auto sizes = zeek::make_intrusive(IntrusivePtr{zeek::NewRef{}, var_sizes}); const auto& globals = zeek::detail::global_scope()->Vars(); @@ -2007,25 +2008,23 @@ function global_container_footprints%(count_entries: bool%): var_sizes continue; auto id_name = zeek::make_intrusive(id->Name()); - auto fp = zeek::val_mgr->Count(v->Footprint(count_entries)); + auto fp = zeek::val_mgr->Count(v->Footprint()); sizes->Assign(std::move(id_name), std::move(fp)); } return sizes; %} -## Computes a value's "footprint" (number of atomic elements included, either -## directly or indirectly). The argument, if true, specifies that the -## footprint should also include the number of entries in each included -## container. +## Computes a value's "footprint": the number of objects the value contains +## either directly or indirectly. The number is not meant to be precise, but +## rather comparable: larger footprint correlates with more memory consumption. ## -## Returns: the total number of atomic elements plus (optionally) the -## number of entries in included containers. +## Returns: the footprint. ## ## .. zeek:see:: global_container_footprints -function value_footprint%(v: any, count_entries: bool%): count +function value_footprint%(v: any%): count %{ - return zeek::val_mgr->Count(v->Footprint(count_entries)); + return zeek::val_mgr->Count(v->Footprint()); %} ## Generates a table with information about all global identifiers. The table diff --git a/testing/btest/Baseline/bifs.footprint/out b/testing/btest/Baseline/bifs.footprint/out index 922ae4ff7a..b2aea3b37e 100644 --- a/testing/btest/Baseline/bifs.footprint/out +++ b/testing/btest/Baseline/bifs.footprint/out @@ -1,53 +1,27 @@ ### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. bool, 1 -bool, 1 -count, 1 count, 1 int, 1 -int, 1 -double, 1 double, 1 string, 1 -string, 1 -pattern, 1 pattern, 1 addr, 1 -addr, 1 -subnet, 1 subnet, 1 port, 1 -port, 1 -l1, 0 l1, 3 -l1b, 3 l1b, 6 -l2, 2 l2, 7 -l2b, 4 l2b, 9 -v1, 4 v1, 8 -v2, 8 v2, 18 -v3, 3 v3, 11 -t1, 6 t1, 12 -t2, 9 t2, 18 -t3, 20 t3, 46 -t4, 7 t4, 19 -s1, 3 s1, 9 -s2, 6 s2, 15 -s3, 8 s3, 20 -s4, 3 s4, 9 -1 3 -1 3 diff --git a/testing/btest/bifs/footprint.zeek b/testing/btest/bifs/footprint.zeek index 2f719db380..44737ac6f2 100644 --- a/testing/btest/bifs/footprint.zeek +++ b/testing/btest/bifs/footprint.zeek @@ -29,89 +29,65 @@ redef record X += { event zeek_init() { - print "bool", value_footprint(T, F); - print "bool", value_footprint(T, T); - print "count", value_footprint(3, F); - print "count", value_footprint(4, T); - print "int", value_footprint(-3, F); - print "int", value_footprint(-4, T); - print "double", value_footprint(-3.0, F); - print "double", value_footprint(4e99, T); - print "string", value_footprint("short", F); - print "string", value_footprint("longlonglong", T); - print "pattern", value_footprint(/short/, F); - print "pattern", value_footprint(/longlonglong/, T); - print "addr", value_footprint(1.2.3.4, F); - print "addr", value_footprint([ffff::ffff], T); - print "subnet", value_footprint(1.2.3.4/22, F); - print "subnet", value_footprint([ffff::ffff]/99, T); - print "port", value_footprint(123/tcp, F); - print "port", value_footprint(9999/udp, T); + print "bool", val_footprint(T); + print "count", val_footprint(4); + print "int", val_footprint(-4); + print "double", val_footprint(4e99); + print "string", val_footprint("longlonglong"); + print "pattern", val_footprint(/longlonglong/); + print "addr", val_footprint([ffff::ffff]); + print "subnet", val_footprint([ffff::ffff]/99); + print "port", val_footprint(9999/udp); local l1: r1; - print "l1", value_footprint(l1, F); - print "l1", value_footprint(l1, T); + print "l1", val_footprint(l1); local l1b = r1($a=3, $b=3.0, $c="3"); - print "l1b", value_footprint(l1b, F); - print "l1b", value_footprint(l1b, T); + print "l1b", val_footprint(l1b); local l2: r2; - print "l2", value_footprint(l2, F); - print "l2", value_footprint(l2, T); + print "l2", val_footprint(l2); local l2b = r2($a=3, $b1=99.0, $c="I'm here"); - print "l2b", value_footprint(l2b, F); - print "l2b", value_footprint(l2b, T); + print "l2b", val_footprint(l2b); local v1 = vector(9, 7, 3, 1); - print "v1", value_footprint(v1, F); - print "v1", value_footprint(v1, T); + print "v1", val_footprint(v1); local v2 = vector(v1, v1); - print "v2", value_footprint(v2, F); - print "v2", value_footprint(v2, T); + print "v2", val_footprint(v2); local v3 = vector(l1, l1b); - print "v3", value_footprint(v3, F); - print "v3", value_footprint(v3, T); + print "v3", val_footprint(v3); local t1 = table([1] = 1, [2] = 4, [3] = 9); - print "t1", value_footprint(t1, F); # Note, table and set footprint values using count_entries=T because # table indices are ListVal's, so those add their own container # entry counts into the sum. - print "t1", value_footprint(t1, T); + print "t1", val_footprint(t1); local t2 = table([1, 3] = 1, [2, 3] = 4, [3, 3] = 9); - print "t2", value_footprint(t2, F); - print "t2", value_footprint(t2, T); + print "t2", val_footprint(t2); local t3 = table([1, 3] = v2, [2, 3] = v2); - print "t3", value_footprint(t3, F); - print "t3", value_footprint(t3, T); + print "t3", val_footprint(t3); local t4 = table([1, 3] = l1, [2, 3] = l1b); - print "t4", value_footprint(t4, F); - print "t4", value_footprint(t4, T); + print "t4", val_footprint(t4); local s1 = set(1, 4, 9); - print "s1", value_footprint(s1, F); - print "s1", value_footprint(s1, T); + print "s1", val_footprint(s1); local s2 = set([1, 3], [2, 3], [3, 3]); - print "s2", value_footprint(s2, F); - print "s2", value_footprint(s2, T); + print "s2", val_footprint(s2); local s3: set[r1, count]; add s3[l1b, 9]; add s3[l1b, 12]; - print "s3", value_footprint(s3, F); - print "s3", value_footprint(s3, T); + print "s3", val_footprint(s3); local s4 = set(vector(l1b), vector(l1b), vector(l1b)); - print "s4", value_footprint(s4, F); - print "s4", value_footprint(s4, T); + print "s4", val_footprint(s4); local x: X; local y: Y; @@ -119,8 +95,6 @@ event zeek_init() x$y = y; y$x = x; - print value_footprint(x, F); - print value_footprint(x, T); - print value_footprint(y, F); - print value_footprint(y, T); + print val_footprint(x); + print val_footprint(y); } From f43a9f9babb196718f8ed3476a0cd393b4b9198e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 29 Apr 2022 09:03:56 -0700 Subject: [PATCH 05/11] change value_footprint() to val_footprint() to be more similar to val_size() --- src/zeek.bif | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zeek.bif b/src/zeek.bif index 72c18b95f2..07bfdf6e94 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -1993,7 +1993,7 @@ function global_sizes%(%): var_sizes &deprecated="Remove in v5.1. MemoryAllocati ## ## Returns: A table that maps variable names to their footprints. ## -## .. zeek:see:: value_footprint +## .. zeek:see:: val_footprint function global_container_footprints%(%): var_sizes %{ auto sizes = zeek::make_intrusive(IntrusivePtr{zeek::NewRef{}, var_sizes}); @@ -2022,7 +2022,7 @@ function global_container_footprints%(%): var_sizes ## Returns: the footprint. ## ## .. zeek:see:: global_container_footprints -function value_footprint%(v: any%): count +function val_footprint%(v: any%): count %{ return zeek::val_mgr->Count(v->Footprint()); %} From 15123b6768ee8ed3220a9d6ca2b476d377a80673 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 2 May 2022 14:16:20 -0700 Subject: [PATCH 06/11] use stack-based set to prevent infinite recursion rather than a static one --- src/Val.cc | 29 ++++++++++++----------------- src/Val.h | 14 +++++++++----- src/zeek.bif | 6 ++++-- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index 052e7d0ee7..47d992f564 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -1322,12 +1322,12 @@ ValPtr ListVal::DoClone(CloneState* state) return lv; } -unsigned int ListVal::Footprint() const +unsigned int ListVal::Footprint(std::set* analyzed_records) const { unsigned int fp = vals.size(); for ( const auto& val : vals ) - fp += val->Footprint(); + fp += val->Footprint(analyzed_records); return fp; } @@ -2683,7 +2683,7 @@ ValPtr TableVal::DoClone(CloneState* state) return tv; } -unsigned int TableVal::Footprint() const +unsigned int TableVal::Footprint(std::set* analyzed_records) const { unsigned int fp = table_val->Length(); @@ -2693,9 +2693,9 @@ unsigned int TableVal::Footprint() const auto vl = table_hash->RecoverVals(*k); auto v = iter.GetValue()->GetVal(); - fp += vl->Footprint(); + fp += vl->Footprint(analyzed_records); if ( v ) - fp += v->Footprint(); + fp += v->Footprint(analyzed_records); } return fp; @@ -3066,18 +3066,13 @@ ValPtr RecordVal::DoClone(CloneState* state) return rv; } -unsigned int RecordVal::Footprint() const +unsigned int RecordVal::Footprint(std::set* analyzed_records) const { - // Which records we're in the process of analyzing - used to - // avoid infinite recursion for circular types (which can only - // occur due to the presence of records). - static std::unordered_set pending_records; - - if ( pending_records.count(this) > 0 ) + if ( analyzed_records->count(this) > 0 ) // Footprint is 1 for the RecordVal itself. return 1; - pending_records.insert(this); + analyzed_records->insert(this); int n = NumFields(); unsigned int fp = n; @@ -3089,10 +3084,10 @@ unsigned int RecordVal::Footprint() const auto f_i = GetField(i); if ( f_i ) - fp += f_i->Footprint(); + fp += f_i->Footprint(analyzed_records); } - pending_records.erase(this); + analyzed_records->erase(this); return fp; } @@ -3620,7 +3615,7 @@ bool VectorVal::Concretize(const TypePtr& t) return true; } -unsigned int VectorVal::Footprint() const +unsigned int VectorVal::Footprint(std::set* analyzed_records) const { auto n = vector_val->size(); unsigned int fp = n; @@ -3629,7 +3624,7 @@ unsigned int VectorVal::Footprint() const { auto v = At(i); if ( v ) - fp += v->Footprint(); + fp += v->Footprint(analyzed_records); } return fp; diff --git a/src/Val.h b/src/Val.h index d3c605022b..d0075f7259 100644 --- a/src/Val.h +++ b/src/Val.h @@ -127,9 +127,13 @@ public: * The number is not meant to be precise, but rather comparable: * larger footprint correlates with more memory consumption. * + * @param analyzed_records A pointer to a set used to track which + * records have been analyzed to date, used to prevent infinite + * recursion. The set should be empty (but not nil) on the first call. + * * @return The total footprint. */ - virtual unsigned int Footprint() const { return 1; } + virtual unsigned int Footprint(std::set* analyzed_records) const { return 1; } // Bytes in total value object. [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " @@ -676,7 +680,7 @@ public: void Describe(ODesc* d) const override; - unsigned int Footprint() const override; + unsigned int Footprint(std::set* analyzed_records) const override; [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int @@ -953,7 +957,7 @@ public: // the function in the frame allowing it to capture its closure. void InitDefaultFunc(detail::Frame* f); - unsigned int Footprint() const override; + unsigned int Footprint(std::set* analyzed_records) const override; [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int @@ -1381,7 +1385,7 @@ public: } RecordValPtr CoerceTo(RecordTypePtr other, bool allow_orphaning = false); - unsigned int Footprint() const override; + unsigned int Footprint(std::set* analyzed_records) const override; [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int @@ -1640,7 +1644,7 @@ public: const auto& RawYieldType() const { return yield_type; } const auto& RawYieldTypes() const { return yield_types; } - unsigned int Footprint() const override; + unsigned int Footprint(std::set* analyzed_records) const override; protected: /** diff --git a/src/zeek.bif b/src/zeek.bif index 07bfdf6e94..1af134f4b5 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -2008,7 +2008,8 @@ function global_container_footprints%(%): var_sizes continue; auto id_name = zeek::make_intrusive(id->Name()); - auto fp = zeek::val_mgr->Count(v->Footprint()); + std::set analyzed_records; + auto fp = zeek::val_mgr->Count(v->Footprint(&analyzed_records)); sizes->Assign(std::move(id_name), std::move(fp)); } @@ -2024,7 +2025,8 @@ function global_container_footprints%(%): var_sizes ## .. zeek:see:: global_container_footprints function val_footprint%(v: any%): count %{ - return zeek::val_mgr->Count(v->Footprint()); + std::set analyzed_records; + return zeek::val_mgr->Count(v->Footprint(&analyzed_records)); %} ## Generates a table with information about all global identifiers. The table From 7fd94f82a8b04954eb129f6576be6ebfa44d0588 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 2 May 2022 15:08:37 -0700 Subject: [PATCH 07/11] simpler public calling interface for computing footprint --- src/Val.h | 11 +++++++++++ src/zeek.bif | 6 ++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Val.h b/src/Val.h index d0075f7259..cda69e0331 100644 --- a/src/Val.h +++ b/src/Val.h @@ -127,6 +127,17 @@ public: * The number is not meant to be precise, but rather comparable: * larger footprint correlates with more memory consumption. * + * @return The total footprint. + */ + unsigned int Footprint() const + { + std::set analyzed_records; + return Footprint(&analyzed_records); + } + + /** + * Internal function for computing a Val's "footprint". + * * @param analyzed_records A pointer to a set used to track which * records have been analyzed to date, used to prevent infinite * recursion. The set should be empty (but not nil) on the first call. diff --git a/src/zeek.bif b/src/zeek.bif index 1af134f4b5..07bfdf6e94 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -2008,8 +2008,7 @@ function global_container_footprints%(%): var_sizes continue; auto id_name = zeek::make_intrusive(id->Name()); - std::set analyzed_records; - auto fp = zeek::val_mgr->Count(v->Footprint(&analyzed_records)); + auto fp = zeek::val_mgr->Count(v->Footprint()); sizes->Assign(std::move(id_name), std::move(fp)); } @@ -2025,8 +2024,7 @@ function global_container_footprints%(%): var_sizes ## .. zeek:see:: global_container_footprints function val_footprint%(v: any%): count %{ - std::set analyzed_records; - return zeek::val_mgr->Count(v->Footprint(&analyzed_records)); + return zeek::val_mgr->Count(v->Footprint()); %} ## Generates a table with information about all global identifiers. The table From 58cdc0be091775de1f0604ee3c39726d9ea6a4d1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 5 May 2022 16:51:59 -0700 Subject: [PATCH 08/11] to avoid recursion, track all aggregates, not just records isolate the internal methods --- src/Val.cc | 51 ++++++++++++++++++++++++++++++++++----------------- src/Val.h | 48 +++++++++++++++++++++++++++--------------------- 2 files changed, 61 insertions(+), 38 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index 47d992f564..15943ef87e 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -1322,12 +1322,12 @@ ValPtr ListVal::DoClone(CloneState* state) return lv; } -unsigned int ListVal::Footprint(std::set* analyzed_records) const +unsigned int ListVal::ComputeFootprint(std::unordered_set* analyzed_vals) const { unsigned int fp = vals.size(); for ( const auto& val : vals ) - fp += val->Footprint(analyzed_records); + fp += val->Footprint(analyzed_vals); return fp; } @@ -2683,7 +2683,7 @@ ValPtr TableVal::DoClone(CloneState* state) return tv; } -unsigned int TableVal::Footprint(std::set* analyzed_records) const +unsigned int TableVal::ComputeFootprint(std::unordered_set* analyzed_vals) const { unsigned int fp = table_val->Length(); @@ -2693,9 +2693,9 @@ unsigned int TableVal::Footprint(std::set* analyzed_records) c auto vl = table_hash->RecoverVals(*k); auto v = iter.GetValue()->GetVal(); - fp += vl->Footprint(analyzed_records); + fp += vl->Footprint(analyzed_vals); if ( v ) - fp += v->Footprint(analyzed_records); + fp += v->Footprint(analyzed_vals); } return fp; @@ -3066,14 +3066,8 @@ ValPtr RecordVal::DoClone(CloneState* state) return rv; } -unsigned int RecordVal::Footprint(std::set* analyzed_records) const +unsigned int RecordVal::ComputeFootprint(std::unordered_set* analyzed_vals) const { - if ( analyzed_records->count(this) > 0 ) - // Footprint is 1 for the RecordVal itself. - return 1; - - analyzed_records->insert(this); - int n = NumFields(); unsigned int fp = n; @@ -3084,11 +3078,9 @@ unsigned int RecordVal::Footprint(std::set* analyzed_records) auto f_i = GetField(i); if ( f_i ) - fp += f_i->Footprint(analyzed_records); + fp += f_i->Footprint(analyzed_vals); } - analyzed_records->erase(this); - return fp; } @@ -3615,7 +3607,7 @@ bool VectorVal::Concretize(const TypePtr& t) return true; } -unsigned int VectorVal::Footprint(std::set* analyzed_records) const +unsigned int VectorVal::ComputeFootprint(std::unordered_set* analyzed_vals) const { auto n = vector_val->size(); unsigned int fp = n; @@ -3624,7 +3616,7 @@ unsigned int VectorVal::Footprint(std::set* analyzed_records) { auto v = At(i); if ( v ) - fp += v->Footprint(analyzed_records); + fp += v->Footprint(analyzed_vals); } return fp; @@ -4011,6 +4003,31 @@ ValPtr Val::MakeCount(bro_uint_t u) return make_intrusive(u); } +unsigned int Val::Footprint(std::unordered_set* analyzed_vals) const + { + auto is_aggr = IsAggr(type); + + // We only need to check containers for possible recursion, as there's + // no way to construct a cycle using only non-aggregates. + if ( is_aggr ) + { + if ( analyzed_vals->count(this) > 0 ) + // Footprint is 1 for generating a cycle. + return 1; + + analyzed_vals->insert(this); + } + + auto fp = ComputeFootprint(analyzed_vals); + + if ( is_aggr ) + // Allow the aggregate to be revisited providing it's not + // in the context of a cycle. + analyzed_vals->erase(this); + + return fp; + } + ValManager::ValManager() { empty_string = make_intrusive(""); diff --git a/src/Val.h b/src/Val.h index cda69e0331..ff537072ea 100644 --- a/src/Val.h +++ b/src/Val.h @@ -131,21 +131,10 @@ public: */ unsigned int Footprint() const { - std::set analyzed_records; - return Footprint(&analyzed_records); + std::unordered_set analyzed_vals; + return Footprint(&analyzed_vals); } - /** - * Internal function for computing a Val's "footprint". - * - * @param analyzed_records A pointer to a set used to track which - * records have been analyzed to date, used to prevent infinite - * recursion. The set should be empty (but not nil) on the first call. - * - * @return The total footprint. - */ - virtual unsigned int Footprint(std::set* analyzed_records) const { return 1; } - // Bytes in total value object. [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] virtual unsigned int @@ -255,6 +244,7 @@ protected: friend class EnumType; friend class ListVal; friend class RecordVal; + friend class TableVal; friend class VectorVal; friend class ValManager; friend class TableEntryVal; @@ -268,6 +258,21 @@ protected: explicit Val(TypePtr t) noexcept : type(std::move(t)) { } + /** + * Internal function for computing a Val's "footprint". + * + * @param analyzed_vals A pointer to a set used to track which values + * have been analyzed to date, used to prevent infinite recursion. + * The set should be empty (but not nil) on the first call. + * + * @return The total footprint. + */ + unsigned int Footprint(std::unordered_set* analyzed_vals) const; + virtual unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const + { + return 1; + } + // For internal use by the Val::Clone() methods. struct CloneState { @@ -691,13 +696,13 @@ public: void Describe(ODesc* d) const override; - unsigned int Footprint(std::set* analyzed_records) const override; - [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int MemoryAllocation() const override; protected: + unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const override; + ValPtr DoClone(CloneState* state) override; std::vector vals; @@ -968,8 +973,6 @@ public: // the function in the frame allowing it to capture its closure. void InitDefaultFunc(detail::Frame* f); - unsigned int Footprint(std::set* analyzed_records) const override; - [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int MemoryAllocation() const override; @@ -1060,6 +1063,8 @@ protected: // Sends data on to backing Broker Store void SendToStore(const Val* index, const TableEntryVal* new_entry_val, OnChangeType tpe); + unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const override; + ValPtr DoClone(CloneState* state) override; TableTypePtr table_type; @@ -1396,8 +1401,6 @@ public: } RecordValPtr CoerceTo(RecordTypePtr other, bool allow_orphaning = false); - unsigned int Footprint(std::set* analyzed_records) const override; - [[deprecated("Remove in v5.1. MemoryAllocation() is deprecated and will be removed. See " "GHI-572.")]] unsigned int MemoryAllocation() const override; @@ -1470,6 +1473,8 @@ private: // Just for template inferencing. RecordVal* Get() { return this; } + unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const override; + // Keep this handy for quick access during low-level operations. RecordTypePtr rt; @@ -1655,8 +1660,6 @@ public: const auto& RawYieldType() const { return yield_type; } const auto& RawYieldTypes() const { return yield_types; } - unsigned int Footprint(std::set* analyzed_records) const override; - protected: /** * Returns the element at a given index or nullptr if it does not exist. @@ -1670,6 +1673,9 @@ protected: ValPtr At(unsigned int index) const; void ValDescribe(ODesc* d) const override; + + unsigned int ComputeFootprint(std::unordered_set* analyzed_vals) const override; + ValPtr DoClone(CloneState* state) override; private: From d5f60cfaba224e7aa46e56b86a125629509f51cd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 5 May 2022 16:55:03 -0700 Subject: [PATCH 09/11] btest update to include recursive value that doesn't require a record --- testing/btest/Baseline/bifs.footprint/out | 2 ++ testing/btest/bifs/footprint.zeek | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/testing/btest/Baseline/bifs.footprint/out b/testing/btest/Baseline/bifs.footprint/out index b2aea3b37e..bc43dfb6bc 100644 --- a/testing/btest/Baseline/bifs.footprint/out +++ b/testing/btest/Baseline/bifs.footprint/out @@ -25,3 +25,5 @@ s3, 20 s4, 9 3 3 +srt, 0 +srt, 4 diff --git a/testing/btest/bifs/footprint.zeek b/testing/btest/bifs/footprint.zeek index 44737ac6f2..e2c05a84ef 100644 --- a/testing/btest/bifs/footprint.zeek +++ b/testing/btest/bifs/footprint.zeek @@ -97,4 +97,9 @@ event zeek_init() print val_footprint(x); print val_footprint(y); + + local self_ref_table: table[string] of any; + print "srt", val_footprint(self_ref_table); + self_ref_table["x"] = self_ref_table; + print "srt", val_footprint(self_ref_table); } From a387157ead6e1b978eeb2970d7e8d5034fa65fa0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 5 May 2022 17:07:46 -0700 Subject: [PATCH 10/11] skip new BiF test for ASAN CI runs --- testing/btest/bifs/footprint.zeek | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testing/btest/bifs/footprint.zeek b/testing/btest/bifs/footprint.zeek index e2c05a84ef..20dadca633 100644 --- a/testing/btest/bifs/footprint.zeek +++ b/testing/btest/bifs/footprint.zeek @@ -1,3 +1,7 @@ +# The ASAN CI job complains (correctly!) about this script leaking memory +# due to the script-level cycles it includes as stress-tests. +# @TEST-REQUIRES: test "${ZEEK_CI_ASAN_SKIP_TEST}" != "1" +# # @TEST-EXEC: zeek -b %INPUT >out # @TEST-EXEC: btest-diff out From eaf63402ccc652f41ad162a2dc4fc8fb3343b15e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 6 May 2022 07:55:57 -0700 Subject: [PATCH 11/11] new environment variable to enable BTests to skip ASAN checks --- .cirrus.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.cirrus.yml b/.cirrus.yml index 1d5531501d..1977832a58 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -283,6 +283,7 @@ asan_sanitizer_task: CXXFLAGS: -DZEEK_DICT_DEBUG ZEEK_CI_CONFIGURE_FLAGS: *ASAN_SANITIZER_CONFIG ZEEK_CI_DISABLE_SCRIPT_PROFILING: 1 + ZEEK_CI_ASAN_SKIP_TEST: 1 ASAN_OPTIONS: detect_leaks=1 ubsan_sanitizer_task: