mirror of
https://github.com/zeek/zeek.git
synced 2025-10-05 16:18:19 +00:00
moving sort()/order() functionality into VectorVal
This commit is contained in:
parent
0299ea0894
commit
13e7ba3a00
3 changed files with 194 additions and 126 deletions
158
src/Val.cc
158
src/Val.cc
|
@ -3358,9 +3358,163 @@ ValPtr VectorVal::At(unsigned int index) const
|
||||||
return (*vector_val)[index].ToVal(t);
|
return (*vector_val)[index].ToVal(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VectorVal::Sort(bool cmp_func(const ValPtr& a, const ValPtr& b))
|
static Func* sort_function_comp = nullptr;
|
||||||
|
|
||||||
|
// Used for indirect sorting to support order().
|
||||||
|
static std::vector<const ZVal*> index_map;
|
||||||
|
|
||||||
|
// The yield type of the vector being sorted.
|
||||||
|
static TypePtr sort_type;
|
||||||
|
static bool sort_type_is_managed = false;
|
||||||
|
|
||||||
|
static bool sort_function(const ZVal& a, const ZVal& b)
|
||||||
{
|
{
|
||||||
// Placeholder - will be filled in by a later commit.
|
// Missing values are only applicable for managed types.
|
||||||
|
if ( sort_type_is_managed )
|
||||||
|
{
|
||||||
|
if ( ! a.ManagedVal() )
|
||||||
|
return 0;
|
||||||
|
if ( ! b.ManagedVal() )
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto a_v = a.ToVal(sort_type);
|
||||||
|
auto b_v = b.ToVal(sort_type);
|
||||||
|
|
||||||
|
auto result = sort_function_comp->Invoke(a_v, b_v);
|
||||||
|
int int_result = result->CoerceToInt();
|
||||||
|
|
||||||
|
return int_result < 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool signed_sort_function (const ZVal& a, const ZVal& b)
|
||||||
|
{
|
||||||
|
return a.AsInt() < b.AsInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool unsigned_sort_function (const ZVal& a, const ZVal& b)
|
||||||
|
{
|
||||||
|
return a.AsCount() < b.AsCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool double_sort_function (const ZVal& a, const ZVal& b)
|
||||||
|
{
|
||||||
|
return a.AsDouble() < b.AsDouble();
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool indirect_sort_function(size_t a, size_t b)
|
||||||
|
{
|
||||||
|
return sort_function(*index_map[a], *index_map[b]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool indirect_signed_sort_function(size_t a, size_t b)
|
||||||
|
{
|
||||||
|
return signed_sort_function(*index_map[a], *index_map[b]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool indirect_unsigned_sort_function(size_t a, size_t b)
|
||||||
|
{
|
||||||
|
return unsigned_sort_function(*index_map[a], *index_map[b]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool indirect_double_sort_function(size_t a, size_t b)
|
||||||
|
{
|
||||||
|
return double_sort_function(*index_map[a], *index_map[b]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VectorVal::Sort(Func* cmp_func)
|
||||||
|
{
|
||||||
|
if ( yield_types )
|
||||||
|
reporter->RuntimeError(GetLocationInfo(), "cannot sort a vector-of-any");
|
||||||
|
|
||||||
|
sort_type = yield_type;
|
||||||
|
sort_type_is_managed = IsManagedType(sort_type);
|
||||||
|
|
||||||
|
bool (*sort_func)(const ZVal&, const ZVal&);
|
||||||
|
|
||||||
|
if ( cmp_func )
|
||||||
|
{
|
||||||
|
sort_function_comp = cmp_func;
|
||||||
|
sort_func = sort_function;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto eti = sort_type->InternalType();
|
||||||
|
|
||||||
|
if ( eti == TYPE_INTERNAL_INT )
|
||||||
|
sort_func = signed_sort_function;
|
||||||
|
else if ( eti == TYPE_INTERNAL_UNSIGNED )
|
||||||
|
sort_func = unsigned_sort_function;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ASSERT(eti == TYPE_INTERNAL_DOUBLE);
|
||||||
|
sort_func = double_sort_function;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort(vector_val->begin(), vector_val->end(), sort_func);
|
||||||
|
}
|
||||||
|
|
||||||
|
VectorValPtr VectorVal::Order(Func* cmp_func)
|
||||||
|
{
|
||||||
|
if ( yield_types )
|
||||||
|
{
|
||||||
|
reporter->RuntimeError(GetLocationInfo(), "cannot order a vector-of-any");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
sort_type = yield_type;
|
||||||
|
sort_type_is_managed = IsManagedType(sort_type);
|
||||||
|
|
||||||
|
bool (*sort_func)(size_t, size_t);
|
||||||
|
|
||||||
|
if ( cmp_func )
|
||||||
|
{
|
||||||
|
sort_function_comp = cmp_func;
|
||||||
|
sort_func = indirect_sort_function;
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto eti = sort_type->InternalType();
|
||||||
|
|
||||||
|
if ( eti == TYPE_INTERNAL_INT )
|
||||||
|
sort_func = indirect_signed_sort_function;
|
||||||
|
else if ( eti == TYPE_INTERNAL_UNSIGNED )
|
||||||
|
sort_func = indirect_unsigned_sort_function;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ASSERT(eti == TYPE_INTERNAL_DOUBLE);
|
||||||
|
sort_func = indirect_double_sort_function;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int n = Size();
|
||||||
|
|
||||||
|
// Set up initial mapping of indices directly to corresponding
|
||||||
|
// elements.
|
||||||
|
vector<size_t> ind_vv(n);
|
||||||
|
size_t i;
|
||||||
|
for ( i = 0; i < n; ++i )
|
||||||
|
{
|
||||||
|
ind_vv[i] = i;
|
||||||
|
index_map.emplace_back(&(*vector_val)[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
sort(ind_vv.begin(), ind_vv.end(), sort_func);
|
||||||
|
|
||||||
|
index_map.clear();
|
||||||
|
|
||||||
|
// Now spin through ind_vv to read out the rearrangement.
|
||||||
|
auto result_v = make_intrusive<VectorVal>(zeek::id::index_vec);
|
||||||
|
for ( i = 0; i < n; ++i )
|
||||||
|
{
|
||||||
|
int ind = ind_vv[i];
|
||||||
|
result_v->Assign(i, zeek::val_mgr->Count(ind));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result_v;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int VectorVal::Resize(unsigned int new_num_elements)
|
unsigned int VectorVal::Resize(unsigned int new_num_elements)
|
||||||
|
|
12
src/Val.h
12
src/Val.h
|
@ -1354,10 +1354,18 @@ public:
|
||||||
bool Remove(unsigned int index);
|
bool Remove(unsigned int index);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sorts the vector in place, using the given comparison function.
|
* Sorts the vector in place, using the given optional
|
||||||
|
* comparison function.
|
||||||
* @param cmp_func Comparison function for vector elements.
|
* @param cmp_func Comparison function for vector elements.
|
||||||
*/
|
*/
|
||||||
void Sort(bool cmp_func(const ValPtr& a, const ValPtr& b));
|
void Sort(Func* cmp_func = nullptr);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a "vector of count" holding the indices of this
|
||||||
|
* vector when sorted using the given (optional) comparison function.
|
||||||
|
* @param cmp_func Comparison function for vector elements.
|
||||||
|
*/
|
||||||
|
VectorValPtr Order(Func* cmp_func = nullptr);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void ValDescribe(ODesc* d) const override;
|
void ValDescribe(ODesc* d) const override;
|
||||||
|
|
150
src/zeek.bif
150
src/zeek.bif
|
@ -1328,72 +1328,12 @@ function all_set%(v: any%) : bool
|
||||||
return zeek::val_mgr->True();
|
return zeek::val_mgr->True();
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%%{
|
|
||||||
static zeek::Func* sort_function_comp = nullptr;
|
|
||||||
static std::vector<const zeek::ValPtr*> index_map; // used for indirect sorting to support order()
|
|
||||||
|
|
||||||
bool sort_function(const zeek::ValPtr& a, const zeek::ValPtr& b)
|
|
||||||
{
|
|
||||||
// Sort missing values as "high".
|
|
||||||
if ( ! a )
|
|
||||||
return 0;
|
|
||||||
if ( ! b )
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
auto result = sort_function_comp->Invoke(a, b);
|
|
||||||
int int_result = result->CoerceToInt();
|
|
||||||
|
|
||||||
return int_result < 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool indirect_sort_function(size_t a, size_t b)
|
|
||||||
{
|
|
||||||
return sort_function(*index_map[a], *index_map[b]);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool signed_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b)
|
|
||||||
{
|
|
||||||
if ( ! a )
|
|
||||||
return 0;
|
|
||||||
if ( ! b )
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
auto ia = a->CoerceToInt();
|
|
||||||
auto ib = b->CoerceToInt();
|
|
||||||
|
|
||||||
return ia < ib;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool unsigned_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b)
|
|
||||||
{
|
|
||||||
if ( ! a )
|
|
||||||
return 0;
|
|
||||||
if ( ! b )
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
auto ia = a->CoerceToUnsigned();
|
|
||||||
auto ib = b->CoerceToUnsigned();
|
|
||||||
|
|
||||||
return ia < ib;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool indirect_signed_sort_function(size_t a, size_t b)
|
|
||||||
{
|
|
||||||
return signed_sort_function(*index_map[a], *index_map[b]);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool indirect_unsigned_sort_function(size_t a, size_t b)
|
|
||||||
{
|
|
||||||
return unsigned_sort_function(*index_map[a], *index_map[b]);
|
|
||||||
}
|
|
||||||
%%}
|
|
||||||
|
|
||||||
## Sorts a vector in place. The second argument is a comparison function that
|
## Sorts a vector in place. The second argument is a comparison function that
|
||||||
## takes two arguments: if the vector type is ``vector of T``, then the
|
## takes two arguments: if the vector type is ``vector of T``, then the
|
||||||
## comparison function must be ``function(a: T, b: T): int``, which returns
|
## comparison function must be ``function(a: T, b: T): int``, which returns
|
||||||
## a value less than zero if ``a < b`` for some type-specific notion of the
|
## a value less than zero if ``a < b`` for some type-specific notion of the
|
||||||
## less-than operator. The comparison function is optional if the type
|
## less-than operator. The comparison function is optional if the type
|
||||||
## is an integral type (int, count, etc.).
|
## is a numeric type (int, count, double, time, etc.).
|
||||||
##
|
##
|
||||||
## v: The vector instance to sort.
|
## v: The vector instance to sort.
|
||||||
##
|
##
|
||||||
|
@ -1415,7 +1355,10 @@ function sort%(v: any, ...%) : any
|
||||||
zeek::Func* comp = nullptr;
|
zeek::Func* comp = nullptr;
|
||||||
|
|
||||||
if ( @ARG@.size() > 2 )
|
if ( @ARG@.size() > 2 )
|
||||||
|
{
|
||||||
zeek::emit_builtin_error("sort() called with extraneous argument");
|
zeek::emit_builtin_error("sort() called with extraneous argument");
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
||||||
if ( @ARG@.size() == 2 )
|
if ( @ARG@.size() == 2 )
|
||||||
{
|
{
|
||||||
|
@ -1427,36 +1370,27 @@ function sort%(v: any, ...%) : any
|
||||||
}
|
}
|
||||||
|
|
||||||
comp = comp_val->AsFunc();
|
comp = comp_val->AsFunc();
|
||||||
}
|
|
||||||
|
|
||||||
if ( ! comp && ! IsIntegral(elt_type->Tag()) )
|
|
||||||
zeek::emit_builtin_error("comparison function required for sort() with non-integral types");
|
|
||||||
|
|
||||||
auto vv = v->As<zeek::VectorVal*>();
|
|
||||||
|
|
||||||
if ( comp )
|
|
||||||
{
|
|
||||||
const auto& comp_type = comp->GetType();
|
const auto& comp_type = comp->GetType();
|
||||||
|
|
||||||
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
|
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
|
||||||
! comp_type->ParamList()->AllMatch(elt_type, 0) )
|
! comp_type->ParamList()->AllMatch(elt_type, 0) ||
|
||||||
|
comp_type->ParamList()->GetTypes().size() != 2 )
|
||||||
{
|
{
|
||||||
zeek::emit_builtin_error("invalid comparison function in call to sort()");
|
zeek::emit_builtin_error("invalid comparison function in call to sort()");
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
sort_function_comp = comp;
|
|
||||||
|
|
||||||
vv->Sort(sort_function);
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if ( ! comp && ! IsIntegral(elt_type->Tag()) &&
|
||||||
|
elt_type->InternalType() != TYPE_INTERNAL_DOUBLE )
|
||||||
{
|
{
|
||||||
if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED )
|
zeek::emit_builtin_error("comparison function required for sort() with non-numeric types");
|
||||||
vv->Sort(unsigned_sort_function);
|
return rval;
|
||||||
else
|
|
||||||
vv->Sort(signed_sort_function);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto vv = v->As<zeek::VectorVal*>();
|
||||||
|
vv->Sort(comp);
|
||||||
|
|
||||||
return rval;
|
return rval;
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
@ -1473,19 +1407,22 @@ function sort%(v: any, ...%) : any
|
||||||
## .. zeek:see:: sort
|
## .. zeek:see:: sort
|
||||||
function order%(v: any, ...%) : index_vec
|
function order%(v: any, ...%) : index_vec
|
||||||
%{
|
%{
|
||||||
auto result_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::index_vec);
|
auto err_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::index_vec);
|
||||||
|
|
||||||
if ( v->GetType()->Tag() != zeek::TYPE_VECTOR )
|
if ( v->GetType()->Tag() != zeek::TYPE_VECTOR )
|
||||||
{
|
{
|
||||||
zeek::emit_builtin_error("order() requires vector");
|
zeek::emit_builtin_error("order() requires vector");
|
||||||
return result_v;
|
return err_v;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& elt_type = v->GetType()->Yield();
|
const auto& elt_type = v->GetType()->Yield();
|
||||||
zeek::Func* comp = nullptr;
|
zeek::Func* comp = nullptr;
|
||||||
|
|
||||||
if ( @ARG@.size() > 2 )
|
if ( @ARG@.size() > 2 )
|
||||||
|
{
|
||||||
zeek::emit_builtin_error("order() called with extraneous argument");
|
zeek::emit_builtin_error("order() called with extraneous argument");
|
||||||
|
return err_v;
|
||||||
|
}
|
||||||
|
|
||||||
if ( @ARG@.size() == 2 )
|
if ( @ARG@.size() == 2 )
|
||||||
{
|
{
|
||||||
|
@ -1497,58 +1434,27 @@ function order%(v: any, ...%) : index_vec
|
||||||
}
|
}
|
||||||
|
|
||||||
comp = comp_val->AsFunc();
|
comp = comp_val->AsFunc();
|
||||||
}
|
|
||||||
|
|
||||||
if ( ! comp && ! IsIntegral(elt_type->Tag()) )
|
|
||||||
zeek::emit_builtin_error("comparison function required for order() with non-integral types");
|
|
||||||
|
|
||||||
auto vv = v->As<zeek::VectorVal*>();
|
|
||||||
auto n = vv->Size();
|
|
||||||
|
|
||||||
// Set up initial mapping of indices directly to corresponding
|
|
||||||
// elements.
|
|
||||||
vector<size_t> ind_vv(n);
|
|
||||||
index_map.reserve(n);
|
|
||||||
size_t i;
|
|
||||||
for ( i = 0; i < n; ++i )
|
|
||||||
{
|
|
||||||
ind_vv[i] = i;
|
|
||||||
auto tmp_until_later_commit = vv->At(i);
|
|
||||||
index_map.emplace_back(&tmp_until_later_commit);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( comp )
|
|
||||||
{
|
|
||||||
const auto& comp_type = comp->GetType();
|
const auto& comp_type = comp->GetType();
|
||||||
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
|
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
|
||||||
! comp_type->ParamList()->AllMatch(elt_type, 0) )
|
! comp_type->ParamList()->AllMatch(elt_type, 0) ||
|
||||||
|
comp_type->ParamList()->GetTypes().size() != 2 )
|
||||||
{
|
{
|
||||||
zeek::emit_builtin_error("invalid comparison function in call to order()");
|
zeek::emit_builtin_error("invalid comparison function in call to order()");
|
||||||
return zeek::ValPtr{zeek::NewRef{}, v};
|
return zeek::ValPtr{zeek::NewRef{}, v};
|
||||||
}
|
}
|
||||||
|
|
||||||
sort_function_comp = comp;
|
|
||||||
|
|
||||||
sort(ind_vv.begin(), ind_vv.end(), indirect_sort_function);
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if ( ! comp && ! IsIntegral(elt_type->Tag()) &&
|
||||||
|
elt_type->InternalType() != TYPE_INTERNAL_DOUBLE )
|
||||||
{
|
{
|
||||||
if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED )
|
zeek::emit_builtin_error("comparison function required for order() with non-numeric types");
|
||||||
sort(ind_vv.begin(), ind_vv.end(), indirect_unsigned_sort_function);
|
return err_v;
|
||||||
else
|
|
||||||
sort(ind_vv.begin(), ind_vv.end(), indirect_signed_sort_function);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
index_map = {};
|
auto vv = v->As<zeek::VectorVal*>();
|
||||||
|
|
||||||
// Now spin through ind_vv to read out the rearrangement.
|
return vv->Order(comp);
|
||||||
for ( i = 0; i < n; ++i )
|
|
||||||
{
|
|
||||||
int ind = ind_vv[i];
|
|
||||||
result_v->Assign(i, zeek::val_mgr->Count(ind));
|
|
||||||
}
|
|
||||||
|
|
||||||
return result_v;
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue