mirror of
https://github.com/zeek/zeek.git
synced 2025-10-05 08:08:19 +00:00
Change Hashing from H3 to Siphash.
This commit mostly changes the hash function that is used for Internal hashing of data < 36 bytes from H3 to Siphash. This change is motivated by the fact that it turns out that H3 apparently does not deliver a very good source of data uniqueness; running HLL with H3 as a hashing function results in quite poor results (up to of 75% off in my tests). In difference, running HLL with Siphash (or HMAC-MD5) changes this factor to ~2%. This also fixes a long-standing bug in Hash.h which truncated our hash values to 32 bit on most machines. Furthermore, it once again fixes a problem with the Rank function in HLL.
This commit is contained in:
parent
c15f48661d
commit
e1218cc7fa
10 changed files with 257 additions and 25 deletions
|
@ -117,7 +117,9 @@ CardinalityCounter::~CardinalityCounter()
|
|||
uint8_t CardinalityCounter::Rank(uint64_t hash_modified) const
|
||||
{
|
||||
hash_modified = hash_modified >> p;
|
||||
int answer = 64 - p - fls(hash_modified) + 1;
|
||||
int answer = 64 - p - CardinalityCounter::flsll(hash_modified) + 1;
|
||||
assert(answer > 0 && answer < 64);
|
||||
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
@ -238,3 +240,50 @@ CardinalityCounter* CardinalityCounter::Unserialize(UnserialInfo* info)
|
|||
|
||||
return c;
|
||||
}
|
||||
|
||||
/* The following function is copied from libc/string/flsll.c from the FreeBSD source
|
||||
* tree. Original copyright message follows
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 1990, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Find Last Set bit
|
||||
*/
|
||||
int
|
||||
CardinalityCounter::flsll(uint64_t mask)
|
||||
{
|
||||
int bit;
|
||||
|
||||
if (mask == 0)
|
||||
return (0);
|
||||
for (bit = 1; mask != 1; bit++)
|
||||
mask = (uint64_t)mask >> 1;
|
||||
return (bit);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue