mirror of
https://github.com/zeek/zeek.git
synced 2025-10-15 21:18:20 +00:00
Merge remote-tracking branch 'origin/topic/bernhard/topk' into topic/robin/topk-merge
* origin/topic/bernhard/topk: update documentation, rename get* to Get* and make hasher persistent Conflicts: src/probabilistic/Topk.cc src/probabilistic/Topk.h src/probabilistic/top-k.bif
This commit is contained in:
commit
f6e5de91fa
4 changed files with 178 additions and 86 deletions
|
@ -38,51 +38,101 @@ declare(PDict, Element);
|
|||
class TopkVal : public OpaqueVal {
|
||||
|
||||
public:
|
||||
// Initialize a TopkVal. Size specifies how many total elements are
|
||||
// tracked
|
||||
/**
|
||||
* Construct a TopkVal.
|
||||
*
|
||||
* @param size specifies how many total elements are tracked
|
||||
*
|
||||
* @return A newly initialized TopkVal
|
||||
*/
|
||||
TopkVal(uint64 size);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
~TopkVal();
|
||||
|
||||
// Call this, when a new value is encountered. Note that on the first call,
|
||||
// the Bro-Type of the value types that are counted is set. All following calls
|
||||
// to encountered have to specify the same type
|
||||
void Encountered(Val* value);
|
||||
/**
|
||||
* Call this, when a new value is encountered. Note that on the first call,
|
||||
* the Bro-Type of the value types that are counted is set. All following calls
|
||||
* to encountered have to specify the same type.
|
||||
*
|
||||
* @param value The encountered element
|
||||
*/
|
||||
void Encountered(Val* value);
|
||||
|
||||
// Return the first k elements of the result vector. At the moment, this does
|
||||
// not check if it is in the right order or if we can prove that these are
|
||||
// the correct top-k. Use count and epsilon for this.
|
||||
VectorVal* getTopK(int k) const; // returns vector
|
||||
/**
|
||||
* Get the first k elements of the result vector. At the moment, this does
|
||||
* not check if it is in the right order or if we can prove that these are
|
||||
* the correct top-k. Use count and epsilon for this.
|
||||
*
|
||||
* @param k Number of top-elements to return
|
||||
*
|
||||
* @returns The top-k encountered elements
|
||||
*/
|
||||
VectorVal* GetTopK(int k) const;
|
||||
|
||||
// Get the current count tracked in the top-k data structure for a certain val.
|
||||
// Returns 0 if the val is unknown (and logs the error to reporter)
|
||||
uint64_t getCount(Val* value) const;
|
||||
/**
|
||||
* Get the current count tracked in the top-k data structure for a certain val.
|
||||
* Returns 0 if the val is unknown (and logs the error to reporter)
|
||||
*
|
||||
* @param value Bro value to get counts for
|
||||
*
|
||||
* @returns internal count for val, 0 if unknown
|
||||
*/
|
||||
uint64_t GetCount(Val* value) const;
|
||||
|
||||
// Get the current epsilon tracked in the top-k data structure for a certain val.
|
||||
// Returns 0 if the val is unknown (and logs the error to reporter)
|
||||
uint64_t getEpsilon(Val* value) const;
|
||||
/**
|
||||
* Get the current epsilon tracked in the top-k data structure for a certain val.
|
||||
*
|
||||
* @param value Bro value to get epsilons for
|
||||
*
|
||||
* @returns the epsilon. Returns 0 if the val is unknown (and logs the error to reporter)
|
||||
*/
|
||||
uint64_t GetEpsilon(Val* value) const;
|
||||
|
||||
// Get the size set in the constructor
|
||||
uint64_t getSize() const { return size; }
|
||||
/**
|
||||
* Get the size set in the constructor
|
||||
*
|
||||
* @returns size of the top-k structure
|
||||
*/
|
||||
uint64_t GetSize() const { return size; }
|
||||
|
||||
// Get the sum of all counts of all tracked elements. This is equal to the number
|
||||
// of total observations up to this moment, if no elements were pruned from the data
|
||||
// structure.
|
||||
uint64_t getSum() const;
|
||||
/**
|
||||
* Get the sum of all counts of all tracked elements. This is equal to the number
|
||||
* of total observations up to this moment, if no elements were pruned from the data
|
||||
* structure.
|
||||
*
|
||||
* @returns sum of all counts
|
||||
*/
|
||||
uint64_t GetSum() const;
|
||||
|
||||
// Merge another top-k data structure in this one.
|
||||
// doPrune specifies if the total count of elements is limited to size after
|
||||
// merging.
|
||||
// Please note, that pruning will invalidate the results of getSum.
|
||||
/**
|
||||
* Merge another top-k data structure into this one.
|
||||
* doPrune specifies if the total count of elements is limited to size after
|
||||
* merging.
|
||||
* Please note, that pruning will invalidate the results of getSum.
|
||||
*
|
||||
* @param value TopkVal to merge into this TopkVal
|
||||
*
|
||||
* @param doPrune prune resulting TopkVal to size after merging
|
||||
*/
|
||||
void Merge(const TopkVal* value, bool doPrune=false);
|
||||
|
||||
protected:
|
||||
TopkVal(); // for deserialize
|
||||
/**
|
||||
* Construct an empty TopkVal.
|
||||
* Only used for deserialization
|
||||
*/
|
||||
TopkVal();
|
||||
|
||||
private:
|
||||
void IncrementCounter(Element* e, unsigned int count = 1);
|
||||
HashKey* GetHash(Val*) const; // this probably should go somewhere else.
|
||||
|
||||
void Typify(BroType*);
|
||||
|
||||
BroType* type;
|
||||
CompositeHash* hash;
|
||||
std::list<Bucket*> buckets;
|
||||
PDict(Element)* elementDict;
|
||||
uint64 size; // how many elements are we tracking?
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue