Replace siphash with Google implementation

This adds the entirety of the highwayhash implementation of Google.
This includes siphash as well as severl highwayhash variants - which
are faster.

This first commit only switches out the siphash implementation. All
hashes that are generated are exactly the same as before. However, this
does make all other hashes available to be used by us.

I did some performance tests vs the previous siphash implementation by
running the 2009-M57-day11-18 trace 100x through both cases. The average
runtime was virtually the same (within 0.014 seconds of each other).

Note that the way that I included the highwayhash implementation in our
cmake setup is... well, let's say hacky. This definitely needs to be
changed a bit before including this in a real build.
This commit is contained in:
Johanna Amann 2020-04-22 19:08:20 -07:00
parent 6e2cd3ae44
commit 3937fff57f
10 changed files with 46 additions and 164 deletions

3
.gitmodules vendored
View file

@ -37,3 +37,6 @@
[submodule "aux/libkqueue"]
path = aux/libkqueue
url = https://github.com/zeek/libkqueue
[submodule "aux/highwayhash"]
path = aux/highwayhash
url = https://github.com/google/highwayhash

View file

@ -500,31 +500,25 @@ POSSIBILITY OF SUCH DAMAGE.
==============================================================================
%%% siphash24.c
%%% aux/highwayhash
==============================================================================
Taken from https://github.com/majek/csiphash with MIT License:
Taken from https://github.com/google/highwayhash with Apache 2 License:
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
Copyright 2017 Google Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
http://www.apache.org/licenses/LICENSE-2.0
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================

1
aux/highwayhash Submodule

@ -0,0 +1 @@
Subproject commit 0aaf66bb8a1634ceee4b778df51a652bdf4e1f17

View file

@ -135,6 +135,24 @@ list(APPEND BINPAC_OUTPUTS "${BINPAC_OUTPUT_CC}")
binpac_target(binpac_bro-lib.pac)
list(APPEND BINPAC_OUTPUTS "${BINPAC_OUTPUT_CC}")
########################################################################
## Highwayhash
# Well, this is dirty - but it works. Copy highwayhash over to the build
# directory and build it there using make.
add_custom_target(
libhighwayhash
BYPRODUCTS ${CMAKE_BINARY_DIR}/aux/highwayhash/lib/libhighwayhash.a
DEPENDS ${CMAKE_BINARY_DIR}/aux/highwayhash
COMMAND make
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/aux/highwayhash
)
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/aux/highwayhash
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_SOURCE_DIR}/aux/highwayhash ${CMAKE_BINARY_DIR}/aux/highwayhash)
include_directories(BEFORE ${CMAKE_BINARY_DIR}/aux/highwayhash)
########################################################################
## Including subdirectories.
########################################################################
@ -290,7 +308,6 @@ set(MAIN_SRCS
PacketDumper.cc
strsep.c
modp_numtoa.c
siphash24.c
supervisor/Supervisor.cc
@ -339,7 +356,7 @@ set(bro_SRCS
collect_headers(bro_HEADERS ${bro_SRCS})
add_executable(zeek ${bro_SRCS} ${bro_HEADERS} ${bro_SUBDIR_LIBS} ${bro_PLUGIN_LIBS})
target_link_libraries(zeek ${zeekdeps} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
target_link_libraries(zeek ${zeekdeps} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS} ${CMAKE_BINARY_DIR}/aux/highwayhash/lib/libhighwayhash.a)
if ( NOT "${bro_LINKER_FLAGS}" STREQUAL "" )
set_target_properties(zeek PROPERTIES LINK_FLAGS "${bro_LINKER_FLAGS}")

View file

@ -21,7 +21,7 @@
#include "Reporter.h"
#include "BroString.h"
#include "siphash24.h"
#include "highwayhash/sip_hash.h"
void init_hash_function()
{
@ -158,8 +158,7 @@ hash_t HashKey::HashBytes(const void* bytes, int size)
{
if ( size <= UHASH_KEY_SIZE )
{
hash_t digest;
siphash(&digest, (const uint8_t *)bytes, size, shared_siphash_key);
hash_t digest = highwayhash::SipHash(shared_siphash_key, reinterpret_cast<const char *>(bytes), size);
return digest;
}

View file

@ -8,7 +8,7 @@
#include "NetVar.h"
#include "digest.h"
#include "siphash24.h"
#include "highwayhash/sip_hash.h"
#include <broker/data.hh>
@ -106,14 +106,10 @@ UHF::UHF(Hasher::seed_t arg_seed)
// times.
Hasher::digest UHF::hash(const void* x, size_t n) const
{
assert(sizeof(Hasher::seed_t) == SIPHASH_KEYLEN);
assert(sizeof(Hasher::seed_t) == 16); // siphash always needs a 128 bit seed
if ( n <= UHASH_KEY_SIZE )
{
hash_t outdigest;
siphash(&outdigest, reinterpret_cast<const uint8_t*>(x), n, reinterpret_cast<const uint8_t*>(&seed));
return outdigest;
}
return highwayhash::SipHash(*(reinterpret_cast<const highwayhash::SipHashState::Key*>(&seed)), reinterpret_cast<const char*>(x), n);
union {
unsigned char d[16];

View file

@ -1,114 +0,0 @@
/* <MIT License>
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
</MIT License>
Original location:
https://github.com/majek/csiphash/
Solution inspired by code from:
Samuel Neves (supercop/crypto_auth/siphash24/little)
djb (supercop/crypto_auth/siphash24/little2)
Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
*/
#include <stdint.h>
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define _le64toh(x) ((uint64_t)(x))
#elif defined(_WIN32)
/* Windows is always little endian, unless you're on xbox360
http://msdn.microsoft.com/en-us/library/b0084kay(v=vs.80).aspx */
# define _le64toh(x) ((uint64_t)(x))
#elif defined(__APPLE__)
# include <libkern/OSByteOrder.h>
# define _le64toh(x) OSSwapLittleToHostInt64(x)
#else
/* See: http://sourceforge.net/p/predef/wiki/Endianness/ */
# if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
# include <sys/endian.h>
# else
# include <endian.h>
# endif
# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
__BYTE_ORDER == __LITTLE_ENDIAN
# define _le64toh(x) ((uint64_t)(x))
# else
# define _le64toh(x) le64toh(x)
# endif
#endif
#define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
#define HALF_ROUND(a,b,c,d,s,t) \
a += b; c += d; \
b = ROTATE(b, s) ^ a; \
d = ROTATE(d, t) ^ c; \
a = ROTATE(a, 32);
#define DOUBLE_ROUND(v0,v1,v2,v3) \
HALF_ROUND(v0,v1,v2,v3,13,16); \
HALF_ROUND(v2,v1,v0,v3,17,21); \
HALF_ROUND(v0,v1,v2,v3,13,16); \
HALF_ROUND(v2,v1,v0,v3,17,21);
uint64_t siphash24(const void *src, unsigned long src_sz, const uint64_t* key) {
uint64_t k0 = _le64toh(key[0]);
uint64_t k1 = _le64toh(key[1]);
uint64_t b = (uint64_t)src_sz << 56;
const uint64_t *in = (uint64_t*)src;
uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
uint64_t v3 = k1 ^ 0x7465646279746573ULL;
while (src_sz >= 8) {
uint64_t mi = _le64toh(*in);
in += 1; src_sz -= 8;
v3 ^= mi;
DOUBLE_ROUND(v0,v1,v2,v3);
v0 ^= mi;
}
uint64_t t = 0; uint8_t *pt = (uint8_t *)&t; uint8_t *m = (uint8_t *)in;
switch (src_sz) {
case 7: pt[6] = m[6];
case 6: pt[5] = m[5];
case 5: pt[4] = m[4];
case 4: *((uint32_t*)&pt[0]) = *((uint32_t*)&m[0]); break;
case 3: pt[2] = m[2];
case 2: pt[1] = m[1];
case 1: pt[0] = m[0];
}
b |= _le64toh(t);
v3 ^= b;
DOUBLE_ROUND(v0,v1,v2,v3);
v0 ^= b; v2 ^= 0xff;
DOUBLE_ROUND(v0,v1,v2,v3);
DOUBLE_ROUND(v0,v1,v2,v3);
return (v0 ^ v1) ^ (v2 ^ v3);
}

View file

@ -1,15 +0,0 @@
#pragma once
#include <stdint.h>
#define SIPHASH_KEYLEN 16
extern "C" {
uint64_t siphash24(const void* src, unsigned long src_sz, const uint64_t* key);
}
// [Bro] Wrapper for better type-safety.
inline void siphash(uint64_t* digest, const uint8_t* in, uint64_t inlen, const uint8_t* key)
{
*digest = siphash24(in, inlen, (const uint64_t*)key);
}

View file

@ -1001,7 +1001,7 @@ bool hmac_key_set = false;
uint8_t shared_hmac_md5_key[16];
bool siphash_key_set = false;
uint8_t shared_siphash_key[SIPHASH_KEYLEN];
highwayhash::SipHashState::Key shared_siphash_key;
void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16])
{
@ -1181,8 +1181,9 @@ void init_random_seed(const char* read_file, const char* write_file)
if ( ! siphash_key_set )
{
assert(sizeof(buf) - 64 == SIPHASH_KEYLEN);
memcpy(shared_siphash_key, reinterpret_cast<const char*>(buf) + 64, SIPHASH_KEYLEN);
assert(sizeof(buf) - 64 == 16); // siphash key length is always 128 bytes, independent of implementation
assert(sizeof(shared_siphash_key) == 16);
memcpy(shared_siphash_key, reinterpret_cast<const char*>(buf) + 64, 16);
siphash_key_set = true;
}

View file

@ -25,9 +25,9 @@
#include <stdarg.h>
#include <libgen.h>
#include <memory> // std::unique_ptr
#include "highwayhash/sip_hash.h"
#include "zeek-config.h"
#include "siphash24.h"
#ifdef DEBUG
@ -203,7 +203,7 @@ extern std::string strstrip(std::string s);
extern bool hmac_key_set;
extern uint8_t shared_hmac_md5_key[16];
extern bool siphash_key_set;
extern uint8_t shared_siphash_key[SIPHASH_KEYLEN];
extern highwayhash::SipHashState::Key shared_siphash_key;
extern void hmac_md5(size_t size, const unsigned char* bytes,
unsigned char digest[16]);