mirror of
https://github.com/zeek/zeek.git
synced 2025-10-14 20:48:21 +00:00
113 lines
2.3 KiB
C++
113 lines
2.3 KiB
C++
#include <math.h>
|
|
#include <stdint.h>
|
|
#include "HyperLogLog.h"
|
|
#include <iostream>
|
|
|
|
using namespace std;
|
|
|
|
int CardinalityCounter::optimalB(double error){
|
|
double initial_estimate = 2*(log(1.04)-log(error))/log(2);
|
|
int answer = (int) floor(initial_estimate);
|
|
double k;
|
|
|
|
do{
|
|
answer++;
|
|
k = pow(2, (answer - initial_estimate)/2);
|
|
}while(erf(k/sqrt(2)) < conf);
|
|
|
|
return answer;
|
|
}
|
|
|
|
|
|
CardinalityCounter :: CardinalityCounter(double error_margin){
|
|
int b = optimalB(error_margin);
|
|
m = (uint64_t) pow(2, b);
|
|
buckets = new uint8_t[m];
|
|
|
|
if(m == 16)
|
|
alpha_m = 0.673;
|
|
else if(m == 32)
|
|
alpha_m = 0.697;
|
|
else if(m == 64)
|
|
alpha_m = 0.709;
|
|
else
|
|
alpha_m = 0.7213/(1+1.079/m);
|
|
|
|
for(uint64_t i = 0; i < m; i++){
|
|
buckets[i] = 0;
|
|
}
|
|
|
|
V = m;
|
|
}
|
|
|
|
CardinalityCounter :: ~CardinalityCounter(){
|
|
delete [] buckets;
|
|
delete &m;
|
|
delete &V;
|
|
delete &alpha_m;
|
|
}
|
|
|
|
uint8_t CardinalityCounter :: rank(uint64_t hash_modified){
|
|
uint8_t answer = 0;
|
|
hash_modified = (uint64_t)(hash_modified/m);
|
|
hash_modified *= 2;
|
|
do{
|
|
hash_modified = (uint64_t) (hash_modified/2);
|
|
answer++;
|
|
}while(hash_modified%2 == 0);
|
|
return answer;
|
|
}
|
|
|
|
|
|
|
|
void CardinalityCounter::addElement(uint64_t hash){
|
|
uint64_t index = hash % m;
|
|
hash = hash-index;
|
|
|
|
if(buckets[index] == 0)
|
|
V--;
|
|
uint8_t temp = rank(hash);
|
|
if(temp > buckets[index]){
|
|
buckets[index] = temp;
|
|
}
|
|
}
|
|
|
|
double CardinalityCounter::size(){
|
|
double answer = 0;
|
|
for(int i = 0; i < m; i++){
|
|
answer += pow(2, -(int)buckets[i]);
|
|
}
|
|
answer = 1/answer;
|
|
answer = alpha_m*m*m*answer;
|
|
|
|
if(answer <= 5*(double)(m/2)){
|
|
return m*log((double) m/V);
|
|
}
|
|
else if(answer <= pow(2,64)/30){
|
|
return answer;
|
|
}
|
|
else{
|
|
return -pow(2,64)*log(1-answer/pow(2,64));
|
|
}
|
|
}
|
|
|
|
void CardinalityCounter::merge(CardinalityCounter* c){
|
|
uint8_t* temp = (*c).getBuckets();
|
|
V = 0;
|
|
for(int i = 0; i < m; i++){
|
|
if(temp[i] > buckets[i]){
|
|
buckets[i] = temp[i];
|
|
}
|
|
if(buckets[i] == 0){
|
|
V += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
uint8_t* CardinalityCounter::getBuckets(){
|
|
return buckets;
|
|
}
|
|
|
|
uint64_t CardinalityCounter::getM(){
|
|
return m;
|
|
}
|