Improvements in FNV1A hash functions

- Add FNV1a64 hash function
- make hash function type aware
- add unit tests
This commit is contained in:
Martin van Hensbergen 2024-10-09 21:19:14 +02:00
parent 4656faed6c
commit ee84c0c59a
9 changed files with 154 additions and 50 deletions

View file

@ -252,6 +252,7 @@ static std::unordered_map<std::string, unsigned int> func_attrs = {
{"fmt", ATTR_FOLDABLE},
{"fmt_ftp_port", ATTR_IDEMPOTENT}, // can error
{"fnv1a32", ATTR_FOLDABLE},
{"fnv1a64", ATTR_FOLDABLE},
{"generate_all_events", ATTR_NO_SCRIPT_SIDE_EFFECTS},
{"get_broker_stats", ATTR_NO_ZEEK_SIDE_EFFECTS},
{"get_conn_stats", ATTR_NO_ZEEK_SIDE_EFFECTS},

View file

@ -922,6 +922,37 @@ function paraglob_equals%(p_one: opaque of paraglob, p_two: opaque of paraglob%)
);
%}
%%{
template<typename IntType>
IntType fnva(zeek::Val* input, IntType offset, IntType prime) {
zeek::ODesc desc(zeek::DESC_BINARY);
auto length = 0;
const u_char* bytes = NULL;
if (input->GetType()->Tag() == zeek::TYPE_STRING)
{
length = ((zeek::StringVal*) input) -> Len();
bytes = ((zeek::StringVal*) input)-> Bytes();
}
else
{
input->Describe(&desc);
bytes = desc.Bytes();
length = desc.Len();
}
IntType rval = offset;
for ( auto i = 0; i < length; ++i )
{
rval ^= bytes[i];
rval *= prime;
}
return rval;
}
%%}
## Returns 32-bit digest of arbitrary input values using FNV-1a hash algorithm.
## See `<https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function>`_.
##
@ -932,21 +963,26 @@ function paraglob_equals%(p_one: opaque of paraglob, p_two: opaque of paraglob%)
## .. zeek:see:: hrw_weight
function fnv1a32%(input: any%): count
%{
zeek::ODesc desc(DESC_BINARY);
input->Describe(&desc);
auto bytes = desc.Bytes();
uint32_t offset = 2166136261;
uint32_t prime = 16777619;
uint32_t offset32 = 2166136261;
uint32_t prime32 = 16777619;
uint32_t rval = offset32;
auto hash = fnva<uint32_t>(input, offset, prime);
return zeek::val_mgr->Count(hash);
%}
for ( auto i = 0; i < desc.Len(); ++i )
{
rval ^= (uint32_t) bytes[i];
rval *= prime32;
}
## Returns 64-bit digest of arbitrary input values using FNV-1a hash algorithm.
## See `<https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function>`_.
##
## input: The desired input value to hash.
##
## Returns: The hashed value.
function fnv1a64%(input: any%): count
%{
uint64_t offset = 0xCBF29CE484222325;
uint64_t prime = 0x100000001B3;
return zeek::val_mgr->Count(rval);
uint64_t hash = fnva(input, offset, prime);
return zeek::val_mgr->Count(hash);
%}
## Calculates a weight value for use in a Rendezvous Hashing algorithm.

View file

@ -0,0 +1,34 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Test vector: a (string)
fnv1a32: 0xe40c292c
fnv1a64: 0xaf63dc4c8601ec8c
Test vector: foobar (string)
fnv1a32: 0xbf9cf968
fnv1a64: 0x85944171f73967e8
Test vector: a very very long input sort of (string)
fnv1a32: 0xd14bf21b
fnv1a64: 0xb2c1be81250455fb
Test vector: 123 (string)
fnv1a32: 0x7238631b
fnv1a64: 0x456fc2181822c4db
Test vector: 123 (count)
fnv1a32: 0xc5b442fd
fnv1a64: 0xb03db9ae8d1a129d
Test vector: 123.0 (double)
fnv1a32: 0x15d2016f
fnv1a64: 0xaac1b659ecb8cfcf
Test vector: T (string)
fnv1a32: 0xd10c0b43
fnv1a64: 0xaf64094c86023903
Test vector: T (bool)
fnv1a32: 0x6c1151a5
fnv1a64: 0x7e1ef442cd317605
Test vector: F (bool)
fnv1a32: 0xeaff284
fnv1a64: 0x5f242d39c2422be4
Test vector: foobar (string)
fnv1a32: 0xbf9cf968
fnv1a64: 0x85944171f73967e8
Test vector: \x01\x02\x03\x04 (string)
fnv1a32: 0x5734a87d
fnv1a64: 0xbe7a5e775165785d

View file

@ -3,19 +3,19 @@
hrw, 0, zeek/cluster/node/proxy-1/
hrw (custom pool), 0, zeek/cluster/node/proxy-2/
hrw, 1, zeek/cluster/node/proxy-1/
hrw (custom pool), 1, zeek/cluster/node/proxy-2/
hrw (custom pool), 1, zeek/cluster/node/proxy-1/
hrw, 2, zeek/cluster/node/proxy-1/
hrw (custom pool), 2, zeek/cluster/node/proxy-1/
hrw (custom pool), 2, zeek/cluster/node/proxy-2/
hrw, 3, zeek/cluster/node/proxy-1/
hrw (custom pool), 3, zeek/cluster/node/proxy-2/
hrw, 13, zeek/cluster/node/proxy-1/
hrw (custom pool), 13, zeek/cluster/node/proxy-2/
hrw (custom pool), 13, zeek/cluster/node/proxy-1/
hrw, 37, zeek/cluster/node/proxy-1/
hrw (custom pool), 37, zeek/cluster/node/proxy-2/
hrw (custom pool), 37, zeek/cluster/node/proxy-1/
hrw, 42, zeek/cluster/node/proxy-1/
hrw (custom pool), 42, zeek/cluster/node/proxy-1/
hrw, 101, zeek/cluster/node/proxy-1/
hrw (custom pool), 101, zeek/cluster/node/proxy-2/
hrw (custom pool), 101, zeek/cluster/node/proxy-1/
rr, zeek/cluster/node/proxy-1/
rr (custom pool), zeek/cluster/node/proxy-1/
rr, zeek/cluster/node/proxy-1/
@ -35,19 +35,19 @@ rr (custom pool), zeek/cluster/node/proxy-2/
hrw, 0, zeek/cluster/node/proxy-1/
hrw (custom pool), 0, zeek/cluster/node/proxy-2/
hrw, 1, zeek/cluster/node/proxy-1/
hrw (custom pool), 1, zeek/cluster/node/proxy-2/
hrw (custom pool), 1, zeek/cluster/node/proxy-1/
hrw, 2, zeek/cluster/node/proxy-1/
hrw (custom pool), 2, zeek/cluster/node/proxy-1/
hrw (custom pool), 2, zeek/cluster/node/proxy-2/
hrw, 3, zeek/cluster/node/proxy-1/
hrw (custom pool), 3, zeek/cluster/node/proxy-2/
hrw, 13, zeek/cluster/node/proxy-1/
hrw (custom pool), 13, zeek/cluster/node/proxy-2/
hrw (custom pool), 13, zeek/cluster/node/proxy-1/
hrw, 37, zeek/cluster/node/proxy-1/
hrw (custom pool), 37, zeek/cluster/node/proxy-2/
hrw (custom pool), 37, zeek/cluster/node/proxy-1/
hrw, 42, zeek/cluster/node/proxy-1/
hrw (custom pool), 42, zeek/cluster/node/proxy-1/
hrw, 101, zeek/cluster/node/proxy-1/
hrw (custom pool), 101, zeek/cluster/node/proxy-2/
hrw (custom pool), 101, zeek/cluster/node/proxy-1/
2nd stuff
hrw, 0,
hrw (custom pool), 0, zeek/cluster/node/proxy-2/

View file

@ -1,13 +1,13 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
1st stuff
hrw, 0, zeek/cluster/node/proxy-2/
hrw, 1, zeek/cluster/node/proxy-2/
hrw, 2, zeek/cluster/node/proxy-1/
hrw, 1, zeek/cluster/node/proxy-1/
hrw, 2, zeek/cluster/node/proxy-2/
hrw, 3, zeek/cluster/node/proxy-2/
hrw, 13, zeek/cluster/node/proxy-2/
hrw, 37, zeek/cluster/node/proxy-2/
hrw, 13, zeek/cluster/node/proxy-1/
hrw, 37, zeek/cluster/node/proxy-1/
hrw, 42, zeek/cluster/node/proxy-1/
hrw, 101, zeek/cluster/node/proxy-2/
hrw, 101, zeek/cluster/node/proxy-1/
rr, zeek/cluster/node/proxy-1/
rr, zeek/cluster/node/proxy-2/
rr, zeek/cluster/node/proxy-1/
@ -17,13 +17,13 @@ rr, zeek/cluster/node/proxy-2/
rr, zeek/cluster/node/proxy-1/
rr, zeek/cluster/node/proxy-2/
hrw, 0, zeek/cluster/node/proxy-2/
hrw, 1, zeek/cluster/node/proxy-2/
hrw, 2, zeek/cluster/node/proxy-1/
hrw, 1, zeek/cluster/node/proxy-1/
hrw, 2, zeek/cluster/node/proxy-2/
hrw, 3, zeek/cluster/node/proxy-2/
hrw, 13, zeek/cluster/node/proxy-2/
hrw, 37, zeek/cluster/node/proxy-2/
hrw, 13, zeek/cluster/node/proxy-1/
hrw, 37, zeek/cluster/node/proxy-1/
hrw, 42, zeek/cluster/node/proxy-1/
hrw, 101, zeek/cluster/node/proxy-2/
hrw, 101, zeek/cluster/node/proxy-1/
2nd stuff
hrw, 0, zeek/cluster/node/proxy-2/
hrw, 1, zeek/cluster/node/proxy-2/

View file

@ -1,6 +1,9 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
got distributed event hrw, 2
got distributed event hrw, 1
got distributed event hrw, 13
got distributed event hrw, 37
got distributed event hrw, 42
got distributed event hrw, 101
got distributed event rr, 0
got distributed event rr, 2
got distributed event rr, 13

View file

@ -1,10 +1,7 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
got distributed event hrw, 0
got distributed event hrw, 1
got distributed event hrw, 2
got distributed event hrw, 3
got distributed event hrw, 13
got distributed event hrw, 37
got distributed event hrw, 101
got distributed event rr, 1
got distributed event rr, 3
got distributed event rr, 37

View file

@ -9,21 +9,21 @@ T
F
[id=0, user_data=alice]
[id=3, user_data=dave]
[id=4, user_data=eve]
[id=4, user_data=eve]
[id=4, user_data=eve]
[id=0, user_data=alice]
[id=1, user_data=bob]
[id=1, user_data=bob]
[id=1, user_data=bob]
[id=3, user_data=dave]
[id=3, user_data=dave]
[id=1, user_data=bob]
[id=0, user_data=alice]
[id=1, user_data=bob]
T
[id=4, user_data=eve]
[id=1, user_data=bob]
[id=3, user_data=dave]
[id=1, user_data=bob]
[id=1, user_data=bob]
[id=1, user_data=bob]
[id=3, user_data=dave]
[id=4, user_data=eve]
[id=4, user_data=eve]
[id=4, user_data=eve]
[id=4, user_data=eve]
[id=3, user_data=dave]
[id=1, user_data=bob]
[id=4, user_data=eve]
@ -31,10 +31,10 @@ T
T
[id=0, user_data=alice]
[id=3, user_data=dave]
[id=4, user_data=eve]
[id=4, user_data=eve]
[id=4, user_data=eve]
[id=0, user_data=alice]
[id=1, user_data=bob]
[id=1, user_data=bob]
[id=1, user_data=bob]
[id=3, user_data=dave]
[id=3, user_data=dave]
[id=1, user_data=bob]
[id=0, user_data=alice]

View file

@ -0,0 +1,33 @@
# @TEST-EXEC: zeek -b %INPUT >out 2>&1
# @TEST-EXEC: btest-diff out
function output_hashes(val: any)
{
print(fmt("Test vector: %s (%s)", val, type_name(val)));
print(fmt("fnv1a32: 0x%x", fnv1a32(val)));
print(fmt("fnv1a64: 0x%x", fnv1a64(val)));
}
event zeek_init()
{
output_hashes("a");
output_hashes("foobar");
output_hashes("a very very long input sort of");
output_hashes("123");
output_hashes(123);
output_hashes(123.0);
output_hashes("T");
output_hashes(T);
output_hashes(F);
local s: string;
local bytes_from_hex: string;
s = "666f6f626172"; # hex representation of foobar
bytes_from_hex = hexstr_to_bytestring(s);
output_hashes(bytes_from_hex);
s = "01020304"; # hex representation of 4 bytes
bytes_from_hex = hexstr_to_bytestring(s);
output_hashes(bytes_from_hex);
}