moving sort()/order() functionality into VectorVal

This commit is contained in:
Vern Paxson 2021-02-24 18:09:40 -08:00
parent 0299ea0894
commit 13e7ba3a00
3 changed files with 194 additions and 126 deletions

View file

@ -3358,9 +3358,163 @@ ValPtr VectorVal::At(unsigned int index) const
return (*vector_val)[index].ToVal(t); return (*vector_val)[index].ToVal(t);
} }
void VectorVal::Sort(bool cmp_func(const ValPtr& a, const ValPtr& b)) static Func* sort_function_comp = nullptr;
// Used for indirect sorting to support order().
static std::vector<const ZVal*> index_map;
// The yield type of the vector being sorted.
static TypePtr sort_type;
static bool sort_type_is_managed = false;
static bool sort_function(const ZVal& a, const ZVal& b)
{ {
// Placeholder - will be filled in by a later commit. // Missing values are only applicable for managed types.
if ( sort_type_is_managed )
{
if ( ! a.ManagedVal() )
return 0;
if ( ! b.ManagedVal() )
return 1;
}
auto a_v = a.ToVal(sort_type);
auto b_v = b.ToVal(sort_type);
auto result = sort_function_comp->Invoke(a_v, b_v);
int int_result = result->CoerceToInt();
return int_result < 0;
}
static bool signed_sort_function (const ZVal& a, const ZVal& b)
{
return a.AsInt() < b.AsInt();
}
static bool unsigned_sort_function (const ZVal& a, const ZVal& b)
{
return a.AsCount() < b.AsCount();
}
static bool double_sort_function (const ZVal& a, const ZVal& b)
{
return a.AsDouble() < b.AsDouble();
}
static bool indirect_sort_function(size_t a, size_t b)
{
return sort_function(*index_map[a], *index_map[b]);
}
static bool indirect_signed_sort_function(size_t a, size_t b)
{
return signed_sort_function(*index_map[a], *index_map[b]);
}
static bool indirect_unsigned_sort_function(size_t a, size_t b)
{
return unsigned_sort_function(*index_map[a], *index_map[b]);
}
static bool indirect_double_sort_function(size_t a, size_t b)
{
return double_sort_function(*index_map[a], *index_map[b]);
}
void VectorVal::Sort(Func* cmp_func)
{
if ( yield_types )
reporter->RuntimeError(GetLocationInfo(), "cannot sort a vector-of-any");
sort_type = yield_type;
sort_type_is_managed = IsManagedType(sort_type);
bool (*sort_func)(const ZVal&, const ZVal&);
if ( cmp_func )
{
sort_function_comp = cmp_func;
sort_func = sort_function;
}
else
{
auto eti = sort_type->InternalType();
if ( eti == TYPE_INTERNAL_INT )
sort_func = signed_sort_function;
else if ( eti == TYPE_INTERNAL_UNSIGNED )
sort_func = unsigned_sort_function;
else
{
ASSERT(eti == TYPE_INTERNAL_DOUBLE);
sort_func = double_sort_function;
}
}
sort(vector_val->begin(), vector_val->end(), sort_func);
}
VectorValPtr VectorVal::Order(Func* cmp_func)
{
if ( yield_types )
{
reporter->RuntimeError(GetLocationInfo(), "cannot order a vector-of-any");
return nullptr;
}
sort_type = yield_type;
sort_type_is_managed = IsManagedType(sort_type);
bool (*sort_func)(size_t, size_t);
if ( cmp_func )
{
sort_function_comp = cmp_func;
sort_func = indirect_sort_function;
}
else
{
auto eti = sort_type->InternalType();
if ( eti == TYPE_INTERNAL_INT )
sort_func = indirect_signed_sort_function;
else if ( eti == TYPE_INTERNAL_UNSIGNED )
sort_func = indirect_unsigned_sort_function;
else
{
ASSERT(eti == TYPE_INTERNAL_DOUBLE);
sort_func = indirect_double_sort_function;
}
}
int n = Size();
// Set up initial mapping of indices directly to corresponding
// elements.
vector<size_t> ind_vv(n);
size_t i;
for ( i = 0; i < n; ++i )
{
ind_vv[i] = i;
index_map.emplace_back(&(*vector_val)[i]);
}
sort(ind_vv.begin(), ind_vv.end(), sort_func);
index_map.clear();
// Now spin through ind_vv to read out the rearrangement.
auto result_v = make_intrusive<VectorVal>(zeek::id::index_vec);
for ( i = 0; i < n; ++i )
{
int ind = ind_vv[i];
result_v->Assign(i, zeek::val_mgr->Count(ind));
}
return result_v;
} }
unsigned int VectorVal::Resize(unsigned int new_num_elements) unsigned int VectorVal::Resize(unsigned int new_num_elements)

View file

@ -1354,10 +1354,18 @@ public:
bool Remove(unsigned int index); bool Remove(unsigned int index);
/** /**
* Sorts the vector in place, using the given comparison function. * Sorts the vector in place, using the given optional
* comparison function.
* @param cmp_func Comparison function for vector elements. * @param cmp_func Comparison function for vector elements.
*/ */
void Sort(bool cmp_func(const ValPtr& a, const ValPtr& b)); void Sort(Func* cmp_func = nullptr);
/**
* Returns a "vector of count" holding the indices of this
* vector when sorted using the given (optional) comparison function.
* @param cmp_func Comparison function for vector elements.
*/
VectorValPtr Order(Func* cmp_func = nullptr);
protected: protected:
void ValDescribe(ODesc* d) const override; void ValDescribe(ODesc* d) const override;

View file

@ -1328,72 +1328,12 @@ function all_set%(v: any%) : bool
return zeek::val_mgr->True(); return zeek::val_mgr->True();
%} %}
%%{
static zeek::Func* sort_function_comp = nullptr;
static std::vector<const zeek::ValPtr*> index_map; // used for indirect sorting to support order()
bool sort_function(const zeek::ValPtr& a, const zeek::ValPtr& b)
{
// Sort missing values as "high".
if ( ! a )
return 0;
if ( ! b )
return 1;
auto result = sort_function_comp->Invoke(a, b);
int int_result = result->CoerceToInt();
return int_result < 0;
}
bool indirect_sort_function(size_t a, size_t b)
{
return sort_function(*index_map[a], *index_map[b]);
}
bool signed_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b)
{
if ( ! a )
return 0;
if ( ! b )
return 1;
auto ia = a->CoerceToInt();
auto ib = b->CoerceToInt();
return ia < ib;
}
bool unsigned_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b)
{
if ( ! a )
return 0;
if ( ! b )
return 1;
auto ia = a->CoerceToUnsigned();
auto ib = b->CoerceToUnsigned();
return ia < ib;
}
bool indirect_signed_sort_function(size_t a, size_t b)
{
return signed_sort_function(*index_map[a], *index_map[b]);
}
bool indirect_unsigned_sort_function(size_t a, size_t b)
{
return unsigned_sort_function(*index_map[a], *index_map[b]);
}
%%}
## Sorts a vector in place. The second argument is a comparison function that ## Sorts a vector in place. The second argument is a comparison function that
## takes two arguments: if the vector type is ``vector of T``, then the ## takes two arguments: if the vector type is ``vector of T``, then the
## comparison function must be ``function(a: T, b: T): int``, which returns ## comparison function must be ``function(a: T, b: T): int``, which returns
## a value less than zero if ``a < b`` for some type-specific notion of the ## a value less than zero if ``a < b`` for some type-specific notion of the
## less-than operator. The comparison function is optional if the type ## less-than operator. The comparison function is optional if the type
## is an integral type (int, count, etc.). ## is a numeric type (int, count, double, time, etc.).
## ##
## v: The vector instance to sort. ## v: The vector instance to sort.
## ##
@ -1415,7 +1355,10 @@ function sort%(v: any, ...%) : any
zeek::Func* comp = nullptr; zeek::Func* comp = nullptr;
if ( @ARG@.size() > 2 ) if ( @ARG@.size() > 2 )
{
zeek::emit_builtin_error("sort() called with extraneous argument"); zeek::emit_builtin_error("sort() called with extraneous argument");
return rval;
}
if ( @ARG@.size() == 2 ) if ( @ARG@.size() == 2 )
{ {
@ -1427,36 +1370,27 @@ function sort%(v: any, ...%) : any
} }
comp = comp_val->AsFunc(); comp = comp_val->AsFunc();
}
if ( ! comp && ! IsIntegral(elt_type->Tag()) )
zeek::emit_builtin_error("comparison function required for sort() with non-integral types");
auto vv = v->As<zeek::VectorVal*>();
if ( comp )
{
const auto& comp_type = comp->GetType(); const auto& comp_type = comp->GetType();
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT || if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
! comp_type->ParamList()->AllMatch(elt_type, 0) ) ! comp_type->ParamList()->AllMatch(elt_type, 0) ||
comp_type->ParamList()->GetTypes().size() != 2 )
{ {
zeek::emit_builtin_error("invalid comparison function in call to sort()"); zeek::emit_builtin_error("invalid comparison function in call to sort()");
return rval; return rval;
} }
sort_function_comp = comp;
vv->Sort(sort_function);
} }
else
if ( ! comp && ! IsIntegral(elt_type->Tag()) &&
elt_type->InternalType() != TYPE_INTERNAL_DOUBLE )
{ {
if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED ) zeek::emit_builtin_error("comparison function required for sort() with non-numeric types");
vv->Sort(unsigned_sort_function); return rval;
else
vv->Sort(signed_sort_function);
} }
auto vv = v->As<zeek::VectorVal*>();
vv->Sort(comp);
return rval; return rval;
%} %}
@ -1473,19 +1407,22 @@ function sort%(v: any, ...%) : any
## .. zeek:see:: sort ## .. zeek:see:: sort
function order%(v: any, ...%) : index_vec function order%(v: any, ...%) : index_vec
%{ %{
auto result_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::index_vec); auto err_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::index_vec);
if ( v->GetType()->Tag() != zeek::TYPE_VECTOR ) if ( v->GetType()->Tag() != zeek::TYPE_VECTOR )
{ {
zeek::emit_builtin_error("order() requires vector"); zeek::emit_builtin_error("order() requires vector");
return result_v; return err_v;
} }
const auto& elt_type = v->GetType()->Yield(); const auto& elt_type = v->GetType()->Yield();
zeek::Func* comp = nullptr; zeek::Func* comp = nullptr;
if ( @ARG@.size() > 2 ) if ( @ARG@.size() > 2 )
{
zeek::emit_builtin_error("order() called with extraneous argument"); zeek::emit_builtin_error("order() called with extraneous argument");
return err_v;
}
if ( @ARG@.size() == 2 ) if ( @ARG@.size() == 2 )
{ {
@ -1497,58 +1434,27 @@ function order%(v: any, ...%) : index_vec
} }
comp = comp_val->AsFunc(); comp = comp_val->AsFunc();
}
if ( ! comp && ! IsIntegral(elt_type->Tag()) )
zeek::emit_builtin_error("comparison function required for order() with non-integral types");
auto vv = v->As<zeek::VectorVal*>();
auto n = vv->Size();
// Set up initial mapping of indices directly to corresponding
// elements.
vector<size_t> ind_vv(n);
index_map.reserve(n);
size_t i;
for ( i = 0; i < n; ++i )
{
ind_vv[i] = i;
auto tmp_until_later_commit = vv->At(i);
index_map.emplace_back(&tmp_until_later_commit);
}
if ( comp )
{
const auto& comp_type = comp->GetType(); const auto& comp_type = comp->GetType();
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT || if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
! comp_type->ParamList()->AllMatch(elt_type, 0) ) ! comp_type->ParamList()->AllMatch(elt_type, 0) ||
comp_type->ParamList()->GetTypes().size() != 2 )
{ {
zeek::emit_builtin_error("invalid comparison function in call to order()"); zeek::emit_builtin_error("invalid comparison function in call to order()");
return zeek::ValPtr{zeek::NewRef{}, v}; return zeek::ValPtr{zeek::NewRef{}, v};
} }
sort_function_comp = comp;
sort(ind_vv.begin(), ind_vv.end(), indirect_sort_function);
} }
else
if ( ! comp && ! IsIntegral(elt_type->Tag()) &&
elt_type->InternalType() != TYPE_INTERNAL_DOUBLE )
{ {
if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED ) zeek::emit_builtin_error("comparison function required for order() with non-numeric types");
sort(ind_vv.begin(), ind_vv.end(), indirect_unsigned_sort_function); return err_v;
else
sort(ind_vv.begin(), ind_vv.end(), indirect_signed_sort_function);
} }
index_map = {}; auto vv = v->As<zeek::VectorVal*>();
// Now spin through ind_vv to read out the rearrangement. return vv->Order(comp);
for ( i = 0; i < n; ++i )
{
int ind = ind_vv[i];
result_v->Assign(i, zeek::val_mgr->Count(ind));
}
return result_v;
%} %}
# =========================================================================== # ===========================================================================