From 13e7ba3a00f133323c12c3a50b4bfe884e32c9bf Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 Feb 2021 18:09:40 -0800 Subject: [PATCH] moving sort()/order() functionality into VectorVal --- src/Val.cc | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/Val.h | 12 +++- src/zeek.bif | 150 +++++++++--------------------------------------- 3 files changed, 194 insertions(+), 126 deletions(-) diff --git a/src/Val.cc b/src/Val.cc index da287d66b4..a992f273e7 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -3358,9 +3358,163 @@ ValPtr VectorVal::At(unsigned int index) const return (*vector_val)[index].ToVal(t); } -void VectorVal::Sort(bool cmp_func(const ValPtr& a, const ValPtr& b)) +static Func* sort_function_comp = nullptr; + +// Used for indirect sorting to support order(). +static std::vector index_map; + +// The yield type of the vector being sorted. +static TypePtr sort_type; +static bool sort_type_is_managed = false; + +static bool sort_function(const ZVal& a, const ZVal& b) { - // Placeholder - will be filled in by a later commit. + // Missing values are only applicable for managed types. + if ( sort_type_is_managed ) + { + if ( ! a.ManagedVal() ) + return 0; + if ( ! b.ManagedVal() ) + return 1; + } + + auto a_v = a.ToVal(sort_type); + auto b_v = b.ToVal(sort_type); + + auto result = sort_function_comp->Invoke(a_v, b_v); + int int_result = result->CoerceToInt(); + + return int_result < 0; + } + +static bool signed_sort_function (const ZVal& a, const ZVal& b) + { + return a.AsInt() < b.AsInt(); + } + +static bool unsigned_sort_function (const ZVal& a, const ZVal& b) + { + return a.AsCount() < b.AsCount(); + } + +static bool double_sort_function (const ZVal& a, const ZVal& b) + { + return a.AsDouble() < b.AsDouble(); + } + +static bool indirect_sort_function(size_t a, size_t b) + { + return sort_function(*index_map[a], *index_map[b]); + } + +static bool indirect_signed_sort_function(size_t a, size_t b) + { + return signed_sort_function(*index_map[a], *index_map[b]); + } + +static bool indirect_unsigned_sort_function(size_t a, size_t b) + { + return unsigned_sort_function(*index_map[a], *index_map[b]); + } + +static bool indirect_double_sort_function(size_t a, size_t b) + { + return double_sort_function(*index_map[a], *index_map[b]); + } + +void VectorVal::Sort(Func* cmp_func) + { + if ( yield_types ) + reporter->RuntimeError(GetLocationInfo(), "cannot sort a vector-of-any"); + + sort_type = yield_type; + sort_type_is_managed = IsManagedType(sort_type); + + bool (*sort_func)(const ZVal&, const ZVal&); + + if ( cmp_func ) + { + sort_function_comp = cmp_func; + sort_func = sort_function; + } + + else + { + auto eti = sort_type->InternalType(); + + if ( eti == TYPE_INTERNAL_INT ) + sort_func = signed_sort_function; + else if ( eti == TYPE_INTERNAL_UNSIGNED ) + sort_func = unsigned_sort_function; + else + { + ASSERT(eti == TYPE_INTERNAL_DOUBLE); + sort_func = double_sort_function; + } + } + + sort(vector_val->begin(), vector_val->end(), sort_func); + } + +VectorValPtr VectorVal::Order(Func* cmp_func) + { + if ( yield_types ) + { + reporter->RuntimeError(GetLocationInfo(), "cannot order a vector-of-any"); + return nullptr; + } + + sort_type = yield_type; + sort_type_is_managed = IsManagedType(sort_type); + + bool (*sort_func)(size_t, size_t); + + if ( cmp_func ) + { + sort_function_comp = cmp_func; + sort_func = indirect_sort_function; + } + + else + { + auto eti = sort_type->InternalType(); + + if ( eti == TYPE_INTERNAL_INT ) + sort_func = indirect_signed_sort_function; + else if ( eti == TYPE_INTERNAL_UNSIGNED ) + sort_func = indirect_unsigned_sort_function; + else + { + ASSERT(eti == TYPE_INTERNAL_DOUBLE); + sort_func = indirect_double_sort_function; + } + } + + int n = Size(); + + // Set up initial mapping of indices directly to corresponding + // elements. + vector ind_vv(n); + size_t i; + for ( i = 0; i < n; ++i ) + { + ind_vv[i] = i; + index_map.emplace_back(&(*vector_val)[i]); + } + + sort(ind_vv.begin(), ind_vv.end(), sort_func); + + index_map.clear(); + + // Now spin through ind_vv to read out the rearrangement. + auto result_v = make_intrusive(zeek::id::index_vec); + for ( i = 0; i < n; ++i ) + { + int ind = ind_vv[i]; + result_v->Assign(i, zeek::val_mgr->Count(ind)); + } + + return result_v; } unsigned int VectorVal::Resize(unsigned int new_num_elements) diff --git a/src/Val.h b/src/Val.h index 5380fec458..50c8b87e09 100644 --- a/src/Val.h +++ b/src/Val.h @@ -1354,10 +1354,18 @@ public: bool Remove(unsigned int index); /** - * Sorts the vector in place, using the given comparison function. + * Sorts the vector in place, using the given optional + * comparison function. * @param cmp_func Comparison function for vector elements. */ - void Sort(bool cmp_func(const ValPtr& a, const ValPtr& b)); + void Sort(Func* cmp_func = nullptr); + + /** + * Returns a "vector of count" holding the indices of this + * vector when sorted using the given (optional) comparison function. + * @param cmp_func Comparison function for vector elements. + */ + VectorValPtr Order(Func* cmp_func = nullptr); protected: void ValDescribe(ODesc* d) const override; diff --git a/src/zeek.bif b/src/zeek.bif index 845c8c7a14..25c15d12dd 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -1328,72 +1328,12 @@ function all_set%(v: any%) : bool return zeek::val_mgr->True(); %} -%%{ -static zeek::Func* sort_function_comp = nullptr; -static std::vector index_map; // used for indirect sorting to support order() - -bool sort_function(const zeek::ValPtr& a, const zeek::ValPtr& b) - { - // Sort missing values as "high". - if ( ! a ) - return 0; - if ( ! b ) - return 1; - - auto result = sort_function_comp->Invoke(a, b); - int int_result = result->CoerceToInt(); - - return int_result < 0; - } - -bool indirect_sort_function(size_t a, size_t b) - { - return sort_function(*index_map[a], *index_map[b]); - } - -bool signed_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b) - { - if ( ! a ) - return 0; - if ( ! b ) - return 1; - - auto ia = a->CoerceToInt(); - auto ib = b->CoerceToInt(); - - return ia < ib; - } - -bool unsigned_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b) - { - if ( ! a ) - return 0; - if ( ! b ) - return 1; - - auto ia = a->CoerceToUnsigned(); - auto ib = b->CoerceToUnsigned(); - - return ia < ib; - } - -bool indirect_signed_sort_function(size_t a, size_t b) - { - return signed_sort_function(*index_map[a], *index_map[b]); - } - -bool indirect_unsigned_sort_function(size_t a, size_t b) - { - return unsigned_sort_function(*index_map[a], *index_map[b]); - } -%%} - ## Sorts a vector in place. The second argument is a comparison function that ## takes two arguments: if the vector type is ``vector of T``, then the ## comparison function must be ``function(a: T, b: T): int``, which returns ## a value less than zero if ``a < b`` for some type-specific notion of the ## less-than operator. The comparison function is optional if the type -## is an integral type (int, count, etc.). +## is a numeric type (int, count, double, time, etc.). ## ## v: The vector instance to sort. ## @@ -1415,7 +1355,10 @@ function sort%(v: any, ...%) : any zeek::Func* comp = nullptr; if ( @ARG@.size() > 2 ) + { zeek::emit_builtin_error("sort() called with extraneous argument"); + return rval; + } if ( @ARG@.size() == 2 ) { @@ -1427,36 +1370,27 @@ function sort%(v: any, ...%) : any } comp = comp_val->AsFunc(); - } - - if ( ! comp && ! IsIntegral(elt_type->Tag()) ) - zeek::emit_builtin_error("comparison function required for sort() with non-integral types"); - - auto vv = v->As(); - - if ( comp ) - { const auto& comp_type = comp->GetType(); if ( comp_type->Yield()->Tag() != zeek::TYPE_INT || - ! comp_type->ParamList()->AllMatch(elt_type, 0) ) + ! comp_type->ParamList()->AllMatch(elt_type, 0) || + comp_type->ParamList()->GetTypes().size() != 2 ) { zeek::emit_builtin_error("invalid comparison function in call to sort()"); return rval; } - - sort_function_comp = comp; - - vv->Sort(sort_function); } - else + + if ( ! comp && ! IsIntegral(elt_type->Tag()) && + elt_type->InternalType() != TYPE_INTERNAL_DOUBLE ) { - if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED ) - vv->Sort(unsigned_sort_function); - else - vv->Sort(signed_sort_function); + zeek::emit_builtin_error("comparison function required for sort() with non-numeric types"); + return rval; } + auto vv = v->As(); + vv->Sort(comp); + return rval; %} @@ -1473,19 +1407,22 @@ function sort%(v: any, ...%) : any ## .. zeek:see:: sort function order%(v: any, ...%) : index_vec %{ - auto result_v = zeek::make_intrusive(zeek::id::index_vec); + auto err_v = zeek::make_intrusive(zeek::id::index_vec); if ( v->GetType()->Tag() != zeek::TYPE_VECTOR ) { zeek::emit_builtin_error("order() requires vector"); - return result_v; + return err_v; } const auto& elt_type = v->GetType()->Yield(); zeek::Func* comp = nullptr; if ( @ARG@.size() > 2 ) + { zeek::emit_builtin_error("order() called with extraneous argument"); + return err_v; + } if ( @ARG@.size() == 2 ) { @@ -1497,58 +1434,27 @@ function order%(v: any, ...%) : index_vec } comp = comp_val->AsFunc(); - } - if ( ! comp && ! IsIntegral(elt_type->Tag()) ) - zeek::emit_builtin_error("comparison function required for order() with non-integral types"); - - auto vv = v->As(); - auto n = vv->Size(); - - // Set up initial mapping of indices directly to corresponding - // elements. - vector ind_vv(n); - index_map.reserve(n); - size_t i; - for ( i = 0; i < n; ++i ) - { - ind_vv[i] = i; - auto tmp_until_later_commit = vv->At(i); - index_map.emplace_back(&tmp_until_later_commit); - } - - if ( comp ) - { const auto& comp_type = comp->GetType(); if ( comp_type->Yield()->Tag() != zeek::TYPE_INT || - ! comp_type->ParamList()->AllMatch(elt_type, 0) ) + ! comp_type->ParamList()->AllMatch(elt_type, 0) || + comp_type->ParamList()->GetTypes().size() != 2 ) { zeek::emit_builtin_error("invalid comparison function in call to order()"); return zeek::ValPtr{zeek::NewRef{}, v}; } - - sort_function_comp = comp; - - sort(ind_vv.begin(), ind_vv.end(), indirect_sort_function); } - else + + if ( ! comp && ! IsIntegral(elt_type->Tag()) && + elt_type->InternalType() != TYPE_INTERNAL_DOUBLE ) { - if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED ) - sort(ind_vv.begin(), ind_vv.end(), indirect_unsigned_sort_function); - else - sort(ind_vv.begin(), ind_vv.end(), indirect_signed_sort_function); + zeek::emit_builtin_error("comparison function required for order() with non-numeric types"); + return err_v; } - index_map = {}; + auto vv = v->As(); - // Now spin through ind_vv to read out the rearrangement. - for ( i = 0; i < n; ++i ) - { - int ind = ind_vv[i]; - result_v->Assign(i, zeek::val_mgr->Count(ind)); - } - - return result_v; + return vv->Order(comp); %} # ===========================================================================