mirror of
https://github.com/zeek/zeek.git
synced 2025-10-06 00:28:21 +00:00
moving sort()/order() functionality into VectorVal
This commit is contained in:
parent
0299ea0894
commit
13e7ba3a00
3 changed files with 194 additions and 126 deletions
158
src/Val.cc
158
src/Val.cc
|
@ -3358,9 +3358,163 @@ ValPtr VectorVal::At(unsigned int index) const
|
|||
return (*vector_val)[index].ToVal(t);
|
||||
}
|
||||
|
||||
void VectorVal::Sort(bool cmp_func(const ValPtr& a, const ValPtr& b))
|
||||
static Func* sort_function_comp = nullptr;
|
||||
|
||||
// Used for indirect sorting to support order().
|
||||
static std::vector<const ZVal*> index_map;
|
||||
|
||||
// The yield type of the vector being sorted.
|
||||
static TypePtr sort_type;
|
||||
static bool sort_type_is_managed = false;
|
||||
|
||||
static bool sort_function(const ZVal& a, const ZVal& b)
|
||||
{
|
||||
// Placeholder - will be filled in by a later commit.
|
||||
// Missing values are only applicable for managed types.
|
||||
if ( sort_type_is_managed )
|
||||
{
|
||||
if ( ! a.ManagedVal() )
|
||||
return 0;
|
||||
if ( ! b.ManagedVal() )
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto a_v = a.ToVal(sort_type);
|
||||
auto b_v = b.ToVal(sort_type);
|
||||
|
||||
auto result = sort_function_comp->Invoke(a_v, b_v);
|
||||
int int_result = result->CoerceToInt();
|
||||
|
||||
return int_result < 0;
|
||||
}
|
||||
|
||||
static bool signed_sort_function (const ZVal& a, const ZVal& b)
|
||||
{
|
||||
return a.AsInt() < b.AsInt();
|
||||
}
|
||||
|
||||
static bool unsigned_sort_function (const ZVal& a, const ZVal& b)
|
||||
{
|
||||
return a.AsCount() < b.AsCount();
|
||||
}
|
||||
|
||||
static bool double_sort_function (const ZVal& a, const ZVal& b)
|
||||
{
|
||||
return a.AsDouble() < b.AsDouble();
|
||||
}
|
||||
|
||||
static bool indirect_sort_function(size_t a, size_t b)
|
||||
{
|
||||
return sort_function(*index_map[a], *index_map[b]);
|
||||
}
|
||||
|
||||
static bool indirect_signed_sort_function(size_t a, size_t b)
|
||||
{
|
||||
return signed_sort_function(*index_map[a], *index_map[b]);
|
||||
}
|
||||
|
||||
static bool indirect_unsigned_sort_function(size_t a, size_t b)
|
||||
{
|
||||
return unsigned_sort_function(*index_map[a], *index_map[b]);
|
||||
}
|
||||
|
||||
static bool indirect_double_sort_function(size_t a, size_t b)
|
||||
{
|
||||
return double_sort_function(*index_map[a], *index_map[b]);
|
||||
}
|
||||
|
||||
void VectorVal::Sort(Func* cmp_func)
|
||||
{
|
||||
if ( yield_types )
|
||||
reporter->RuntimeError(GetLocationInfo(), "cannot sort a vector-of-any");
|
||||
|
||||
sort_type = yield_type;
|
||||
sort_type_is_managed = IsManagedType(sort_type);
|
||||
|
||||
bool (*sort_func)(const ZVal&, const ZVal&);
|
||||
|
||||
if ( cmp_func )
|
||||
{
|
||||
sort_function_comp = cmp_func;
|
||||
sort_func = sort_function;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
auto eti = sort_type->InternalType();
|
||||
|
||||
if ( eti == TYPE_INTERNAL_INT )
|
||||
sort_func = signed_sort_function;
|
||||
else if ( eti == TYPE_INTERNAL_UNSIGNED )
|
||||
sort_func = unsigned_sort_function;
|
||||
else
|
||||
{
|
||||
ASSERT(eti == TYPE_INTERNAL_DOUBLE);
|
||||
sort_func = double_sort_function;
|
||||
}
|
||||
}
|
||||
|
||||
sort(vector_val->begin(), vector_val->end(), sort_func);
|
||||
}
|
||||
|
||||
VectorValPtr VectorVal::Order(Func* cmp_func)
|
||||
{
|
||||
if ( yield_types )
|
||||
{
|
||||
reporter->RuntimeError(GetLocationInfo(), "cannot order a vector-of-any");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
sort_type = yield_type;
|
||||
sort_type_is_managed = IsManagedType(sort_type);
|
||||
|
||||
bool (*sort_func)(size_t, size_t);
|
||||
|
||||
if ( cmp_func )
|
||||
{
|
||||
sort_function_comp = cmp_func;
|
||||
sort_func = indirect_sort_function;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
auto eti = sort_type->InternalType();
|
||||
|
||||
if ( eti == TYPE_INTERNAL_INT )
|
||||
sort_func = indirect_signed_sort_function;
|
||||
else if ( eti == TYPE_INTERNAL_UNSIGNED )
|
||||
sort_func = indirect_unsigned_sort_function;
|
||||
else
|
||||
{
|
||||
ASSERT(eti == TYPE_INTERNAL_DOUBLE);
|
||||
sort_func = indirect_double_sort_function;
|
||||
}
|
||||
}
|
||||
|
||||
int n = Size();
|
||||
|
||||
// Set up initial mapping of indices directly to corresponding
|
||||
// elements.
|
||||
vector<size_t> ind_vv(n);
|
||||
size_t i;
|
||||
for ( i = 0; i < n; ++i )
|
||||
{
|
||||
ind_vv[i] = i;
|
||||
index_map.emplace_back(&(*vector_val)[i]);
|
||||
}
|
||||
|
||||
sort(ind_vv.begin(), ind_vv.end(), sort_func);
|
||||
|
||||
index_map.clear();
|
||||
|
||||
// Now spin through ind_vv to read out the rearrangement.
|
||||
auto result_v = make_intrusive<VectorVal>(zeek::id::index_vec);
|
||||
for ( i = 0; i < n; ++i )
|
||||
{
|
||||
int ind = ind_vv[i];
|
||||
result_v->Assign(i, zeek::val_mgr->Count(ind));
|
||||
}
|
||||
|
||||
return result_v;
|
||||
}
|
||||
|
||||
unsigned int VectorVal::Resize(unsigned int new_num_elements)
|
||||
|
|
12
src/Val.h
12
src/Val.h
|
@ -1354,10 +1354,18 @@ public:
|
|||
bool Remove(unsigned int index);
|
||||
|
||||
/**
|
||||
* Sorts the vector in place, using the given comparison function.
|
||||
* Sorts the vector in place, using the given optional
|
||||
* comparison function.
|
||||
* @param cmp_func Comparison function for vector elements.
|
||||
*/
|
||||
void Sort(bool cmp_func(const ValPtr& a, const ValPtr& b));
|
||||
void Sort(Func* cmp_func = nullptr);
|
||||
|
||||
/**
|
||||
* Returns a "vector of count" holding the indices of this
|
||||
* vector when sorted using the given (optional) comparison function.
|
||||
* @param cmp_func Comparison function for vector elements.
|
||||
*/
|
||||
VectorValPtr Order(Func* cmp_func = nullptr);
|
||||
|
||||
protected:
|
||||
void ValDescribe(ODesc* d) const override;
|
||||
|
|
150
src/zeek.bif
150
src/zeek.bif
|
@ -1328,72 +1328,12 @@ function all_set%(v: any%) : bool
|
|||
return zeek::val_mgr->True();
|
||||
%}
|
||||
|
||||
%%{
|
||||
static zeek::Func* sort_function_comp = nullptr;
|
||||
static std::vector<const zeek::ValPtr*> index_map; // used for indirect sorting to support order()
|
||||
|
||||
bool sort_function(const zeek::ValPtr& a, const zeek::ValPtr& b)
|
||||
{
|
||||
// Sort missing values as "high".
|
||||
if ( ! a )
|
||||
return 0;
|
||||
if ( ! b )
|
||||
return 1;
|
||||
|
||||
auto result = sort_function_comp->Invoke(a, b);
|
||||
int int_result = result->CoerceToInt();
|
||||
|
||||
return int_result < 0;
|
||||
}
|
||||
|
||||
bool indirect_sort_function(size_t a, size_t b)
|
||||
{
|
||||
return sort_function(*index_map[a], *index_map[b]);
|
||||
}
|
||||
|
||||
bool signed_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b)
|
||||
{
|
||||
if ( ! a )
|
||||
return 0;
|
||||
if ( ! b )
|
||||
return 1;
|
||||
|
||||
auto ia = a->CoerceToInt();
|
||||
auto ib = b->CoerceToInt();
|
||||
|
||||
return ia < ib;
|
||||
}
|
||||
|
||||
bool unsigned_sort_function (const zeek::ValPtr& a, const zeek::ValPtr& b)
|
||||
{
|
||||
if ( ! a )
|
||||
return 0;
|
||||
if ( ! b )
|
||||
return 1;
|
||||
|
||||
auto ia = a->CoerceToUnsigned();
|
||||
auto ib = b->CoerceToUnsigned();
|
||||
|
||||
return ia < ib;
|
||||
}
|
||||
|
||||
bool indirect_signed_sort_function(size_t a, size_t b)
|
||||
{
|
||||
return signed_sort_function(*index_map[a], *index_map[b]);
|
||||
}
|
||||
|
||||
bool indirect_unsigned_sort_function(size_t a, size_t b)
|
||||
{
|
||||
return unsigned_sort_function(*index_map[a], *index_map[b]);
|
||||
}
|
||||
%%}
|
||||
|
||||
## Sorts a vector in place. The second argument is a comparison function that
|
||||
## takes two arguments: if the vector type is ``vector of T``, then the
|
||||
## comparison function must be ``function(a: T, b: T): int``, which returns
|
||||
## a value less than zero if ``a < b`` for some type-specific notion of the
|
||||
## less-than operator. The comparison function is optional if the type
|
||||
## is an integral type (int, count, etc.).
|
||||
## is a numeric type (int, count, double, time, etc.).
|
||||
##
|
||||
## v: The vector instance to sort.
|
||||
##
|
||||
|
@ -1415,7 +1355,10 @@ function sort%(v: any, ...%) : any
|
|||
zeek::Func* comp = nullptr;
|
||||
|
||||
if ( @ARG@.size() > 2 )
|
||||
{
|
||||
zeek::emit_builtin_error("sort() called with extraneous argument");
|
||||
return rval;
|
||||
}
|
||||
|
||||
if ( @ARG@.size() == 2 )
|
||||
{
|
||||
|
@ -1427,36 +1370,27 @@ function sort%(v: any, ...%) : any
|
|||
}
|
||||
|
||||
comp = comp_val->AsFunc();
|
||||
}
|
||||
|
||||
if ( ! comp && ! IsIntegral(elt_type->Tag()) )
|
||||
zeek::emit_builtin_error("comparison function required for sort() with non-integral types");
|
||||
|
||||
auto vv = v->As<zeek::VectorVal*>();
|
||||
|
||||
if ( comp )
|
||||
{
|
||||
const auto& comp_type = comp->GetType();
|
||||
|
||||
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
|
||||
! comp_type->ParamList()->AllMatch(elt_type, 0) )
|
||||
! comp_type->ParamList()->AllMatch(elt_type, 0) ||
|
||||
comp_type->ParamList()->GetTypes().size() != 2 )
|
||||
{
|
||||
zeek::emit_builtin_error("invalid comparison function in call to sort()");
|
||||
return rval;
|
||||
}
|
||||
|
||||
sort_function_comp = comp;
|
||||
|
||||
vv->Sort(sort_function);
|
||||
}
|
||||
else
|
||||
|
||||
if ( ! comp && ! IsIntegral(elt_type->Tag()) &&
|
||||
elt_type->InternalType() != TYPE_INTERNAL_DOUBLE )
|
||||
{
|
||||
if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED )
|
||||
vv->Sort(unsigned_sort_function);
|
||||
else
|
||||
vv->Sort(signed_sort_function);
|
||||
zeek::emit_builtin_error("comparison function required for sort() with non-numeric types");
|
||||
return rval;
|
||||
}
|
||||
|
||||
auto vv = v->As<zeek::VectorVal*>();
|
||||
vv->Sort(comp);
|
||||
|
||||
return rval;
|
||||
%}
|
||||
|
||||
|
@ -1473,19 +1407,22 @@ function sort%(v: any, ...%) : any
|
|||
## .. zeek:see:: sort
|
||||
function order%(v: any, ...%) : index_vec
|
||||
%{
|
||||
auto result_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::index_vec);
|
||||
auto err_v = zeek::make_intrusive<zeek::VectorVal>(zeek::id::index_vec);
|
||||
|
||||
if ( v->GetType()->Tag() != zeek::TYPE_VECTOR )
|
||||
{
|
||||
zeek::emit_builtin_error("order() requires vector");
|
||||
return result_v;
|
||||
return err_v;
|
||||
}
|
||||
|
||||
const auto& elt_type = v->GetType()->Yield();
|
||||
zeek::Func* comp = nullptr;
|
||||
|
||||
if ( @ARG@.size() > 2 )
|
||||
{
|
||||
zeek::emit_builtin_error("order() called with extraneous argument");
|
||||
return err_v;
|
||||
}
|
||||
|
||||
if ( @ARG@.size() == 2 )
|
||||
{
|
||||
|
@ -1497,58 +1434,27 @@ function order%(v: any, ...%) : index_vec
|
|||
}
|
||||
|
||||
comp = comp_val->AsFunc();
|
||||
}
|
||||
|
||||
if ( ! comp && ! IsIntegral(elt_type->Tag()) )
|
||||
zeek::emit_builtin_error("comparison function required for order() with non-integral types");
|
||||
|
||||
auto vv = v->As<zeek::VectorVal*>();
|
||||
auto n = vv->Size();
|
||||
|
||||
// Set up initial mapping of indices directly to corresponding
|
||||
// elements.
|
||||
vector<size_t> ind_vv(n);
|
||||
index_map.reserve(n);
|
||||
size_t i;
|
||||
for ( i = 0; i < n; ++i )
|
||||
{
|
||||
ind_vv[i] = i;
|
||||
auto tmp_until_later_commit = vv->At(i);
|
||||
index_map.emplace_back(&tmp_until_later_commit);
|
||||
}
|
||||
|
||||
if ( comp )
|
||||
{
|
||||
const auto& comp_type = comp->GetType();
|
||||
if ( comp_type->Yield()->Tag() != zeek::TYPE_INT ||
|
||||
! comp_type->ParamList()->AllMatch(elt_type, 0) )
|
||||
! comp_type->ParamList()->AllMatch(elt_type, 0) ||
|
||||
comp_type->ParamList()->GetTypes().size() != 2 )
|
||||
{
|
||||
zeek::emit_builtin_error("invalid comparison function in call to order()");
|
||||
return zeek::ValPtr{zeek::NewRef{}, v};
|
||||
}
|
||||
|
||||
sort_function_comp = comp;
|
||||
|
||||
sort(ind_vv.begin(), ind_vv.end(), indirect_sort_function);
|
||||
}
|
||||
else
|
||||
|
||||
if ( ! comp && ! IsIntegral(elt_type->Tag()) &&
|
||||
elt_type->InternalType() != TYPE_INTERNAL_DOUBLE )
|
||||
{
|
||||
if ( elt_type->InternalType() == zeek::TYPE_INTERNAL_UNSIGNED )
|
||||
sort(ind_vv.begin(), ind_vv.end(), indirect_unsigned_sort_function);
|
||||
else
|
||||
sort(ind_vv.begin(), ind_vv.end(), indirect_signed_sort_function);
|
||||
zeek::emit_builtin_error("comparison function required for order() with non-numeric types");
|
||||
return err_v;
|
||||
}
|
||||
|
||||
index_map = {};
|
||||
auto vv = v->As<zeek::VectorVal*>();
|
||||
|
||||
// Now spin through ind_vv to read out the rearrangement.
|
||||
for ( i = 0; i < n; ++i )
|
||||
{
|
||||
int ind = ind_vv[i];
|
||||
result_v->Assign(i, zeek::val_mgr->Count(ind));
|
||||
}
|
||||
|
||||
return result_v;
|
||||
return vv->Order(comp);
|
||||
%}
|
||||
|
||||
# ===========================================================================
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue