zeek/src/OpaqueVal.cc
Robin Sommer 32a403cdaf Merge branch 'topic/robin/bloom-filter-merge'
* topic/robin/bloom-filter-merge:
  Using a real hash function for hashing a BitVector's internal state.
  Support UHF hashing for >= UHASH_KEY_SIZE bytes.
  Changing the Bloom filter hashing so that it's independent of CompositeHash.
  Add new BiF for low-level Bloom filter initialization.
  Introduce global_hash_seed script variable.

Conflicts:
	testing/btest/Baseline/bifs.bloomfilter/output
2013-08-01 10:52:08 -07:00

675 lines
12 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "OpaqueVal.h"
#include "NetVar.h"
#include "Reporter.h"
#include "Serializer.h"
bool HashVal::IsValid() const
{
return valid;
}
bool HashVal::Init()
{
if ( valid )
return false;
valid = DoInit();
return valid;
}
StringVal* HashVal::Get()
{
if ( ! valid )
return new StringVal("");
StringVal* result = DoGet();
valid = false;
return result;
}
bool HashVal::Feed(const void* data, size_t size)
{
if ( valid )
return DoFeed(data, size);
reporter->InternalError("invalid opaque hash value");
return false;
}
bool HashVal::DoInit()
{
assert(! "missing implementation of DoInit()");
return false;
}
bool HashVal::DoFeed(const void*, size_t)
{
assert(! "missing implementation of DoFeed()");
return false;
}
StringVal* HashVal::DoGet()
{
assert(! "missing implementation of DoGet()");
return new StringVal("");
}
HashVal::HashVal(OpaqueType* t) : OpaqueVal(t)
{
valid = false;
}
IMPLEMENT_SERIAL(HashVal, SER_HASH_VAL);
bool HashVal::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_HASH_VAL, OpaqueVal);
return SERIALIZE(valid);
}
bool HashVal::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(OpaqueVal);
return UNSERIALIZE(&valid);
}
MD5Val::MD5Val() : HashVal(md5_type)
{
}
void MD5Val::digest(val_list& vlist, u_char result[MD5_DIGEST_LENGTH])
{
MD5_CTX h;
md5_init(&h);
loop_over_list(vlist, i)
{
Val* v = vlist[i];
if ( v->Type()->Tag() == TYPE_STRING )
{
const BroString* str = v->AsString();
md5_update(&h, str->Bytes(), str->Len());
}
else
{
ODesc d(DESC_BINARY);
v->Describe(&d);
md5_update(&h, (const u_char *) d.Bytes(), d.Len());
}
}
md5_final(&h, result);
}
void MD5Val::hmac(val_list& vlist,
u_char key[MD5_DIGEST_LENGTH],
u_char result[MD5_DIGEST_LENGTH])
{
digest(vlist, result);
for ( int i = 0; i < MD5_DIGEST_LENGTH; ++i )
result[i] ^= key[i];
MD5(result, MD5_DIGEST_LENGTH, result);
}
bool MD5Val::DoInit()
{
assert(! IsValid());
md5_init(&ctx);
return true;
}
bool MD5Val::DoFeed(const void* data, size_t size)
{
if ( ! IsValid() )
return false;
md5_update(&ctx, data, size);
return true;
}
StringVal* MD5Val::DoGet()
{
if ( ! IsValid() )
return new StringVal("");
u_char digest[MD5_DIGEST_LENGTH];
md5_final(&ctx, digest);
return new StringVal(md5_digest_print(digest));
}
IMPLEMENT_SERIAL(MD5Val, SER_MD5_VAL);
bool MD5Val::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_MD5_VAL, HashVal);
if ( ! IsValid() )
return true;
if ( ! (SERIALIZE(ctx.A) &&
SERIALIZE(ctx.B) &&
SERIALIZE(ctx.C) &&
SERIALIZE(ctx.D) &&
SERIALIZE(ctx.Nl) &&
SERIALIZE(ctx.Nh)) )
return false;
for ( int i = 0; i < MD5_LBLOCK; ++i )
{
if ( ! SERIALIZE(ctx.data[i]) )
return false;
}
if ( ! SERIALIZE(ctx.num) )
return false;
return true;
}
bool MD5Val::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(HashVal);
if ( ! IsValid() )
return true;
if ( ! (UNSERIALIZE(&ctx.A) &&
UNSERIALIZE(&ctx.B) &&
UNSERIALIZE(&ctx.C) &&
UNSERIALIZE(&ctx.D) &&
UNSERIALIZE(&ctx.Nl) &&
UNSERIALIZE(&ctx.Nh)) )
return false;
for ( int i = 0; i < MD5_LBLOCK; ++i )
{
if ( ! UNSERIALIZE(&ctx.data[i]) )
return false;
}
if ( ! UNSERIALIZE(&ctx.num) )
return false;
return true;
}
SHA1Val::SHA1Val() : HashVal(sha1_type)
{
}
void SHA1Val::digest(val_list& vlist, u_char result[SHA_DIGEST_LENGTH])
{
SHA_CTX h;
sha1_init(&h);
loop_over_list(vlist, i)
{
Val* v = vlist[i];
if ( v->Type()->Tag() == TYPE_STRING )
{
const BroString* str = v->AsString();
sha1_update(&h, str->Bytes(), str->Len());
}
else
{
ODesc d(DESC_BINARY);
v->Describe(&d);
sha1_update(&h, (const u_char *) d.Bytes(), d.Len());
}
}
sha1_final(&h, result);
}
bool SHA1Val::DoInit()
{
assert(! IsValid());
sha1_init(&ctx);
return true;
}
bool SHA1Val::DoFeed(const void* data, size_t size)
{
if ( ! IsValid() )
return false;
sha1_update(&ctx, data, size);
return true;
}
StringVal* SHA1Val::DoGet()
{
if ( ! IsValid() )
return new StringVal("");
u_char digest[SHA_DIGEST_LENGTH];
sha1_final(&ctx, digest);
return new StringVal(sha1_digest_print(digest));
}
IMPLEMENT_SERIAL(SHA1Val, SER_SHA1_VAL);
bool SHA1Val::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_SHA1_VAL, HashVal);
if ( ! IsValid() )
return true;
if ( ! (SERIALIZE(ctx.h0) &&
SERIALIZE(ctx.h1) &&
SERIALIZE(ctx.h2) &&
SERIALIZE(ctx.h3) &&
SERIALIZE(ctx.h4) &&
SERIALIZE(ctx.Nl) &&
SERIALIZE(ctx.Nh)) )
return false;
for ( int i = 0; i < SHA_LBLOCK; ++i )
{
if ( ! SERIALIZE(ctx.data[i]) )
return false;
}
if ( ! SERIALIZE(ctx.num) )
return false;
return true;
}
bool SHA1Val::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(HashVal);
if ( ! IsValid() )
return true;
if ( ! (UNSERIALIZE(&ctx.h0) &&
UNSERIALIZE(&ctx.h1) &&
UNSERIALIZE(&ctx.h2) &&
UNSERIALIZE(&ctx.h3) &&
UNSERIALIZE(&ctx.h4) &&
UNSERIALIZE(&ctx.Nl) &&
UNSERIALIZE(&ctx.Nh)) )
return false;
for ( int i = 0; i < SHA_LBLOCK; ++i )
{
if ( ! UNSERIALIZE(&ctx.data[i]) )
return false;
}
if ( ! UNSERIALIZE(&ctx.num) )
return false;
return true;
}
SHA256Val::SHA256Val() : HashVal(sha256_type)
{
}
void SHA256Val::digest(val_list& vlist, u_char result[SHA256_DIGEST_LENGTH])
{
SHA256_CTX h;
sha256_init(&h);
loop_over_list(vlist, i)
{
Val* v = vlist[i];
if ( v->Type()->Tag() == TYPE_STRING )
{
const BroString* str = v->AsString();
sha256_update(&h, str->Bytes(), str->Len());
}
else
{
ODesc d(DESC_BINARY);
v->Describe(&d);
sha256_update(&h, (const u_char *) d.Bytes(), d.Len());
}
}
sha256_final(&h, result);
}
bool SHA256Val::DoInit()
{
assert( ! IsValid() );
sha256_init(&ctx);
return true;
}
bool SHA256Val::DoFeed(const void* data, size_t size)
{
if ( ! IsValid() )
return false;
sha256_update(&ctx, data, size);
return true;
}
StringVal* SHA256Val::DoGet()
{
if ( ! IsValid() )
return new StringVal("");
u_char digest[SHA256_DIGEST_LENGTH];
sha256_final(&ctx, digest);
return new StringVal(sha256_digest_print(digest));
}
IMPLEMENT_SERIAL(SHA256Val, SER_SHA256_VAL);
bool SHA256Val::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_SHA256_VAL, HashVal);
if ( ! IsValid() )
return true;
for ( int i = 0; i < 8; ++i )
{
if ( ! SERIALIZE(ctx.h[i]) )
return false;
}
if ( ! (SERIALIZE(ctx.Nl) &&
SERIALIZE(ctx.Nh)) )
return false;
for ( int i = 0; i < SHA_LBLOCK; ++i )
{
if ( ! SERIALIZE(ctx.data[i]) )
return false;
}
if ( ! (SERIALIZE(ctx.num) &&
SERIALIZE(ctx.md_len)) )
return false;
return true;
}
bool SHA256Val::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(HashVal);
if ( ! IsValid() )
return true;
for ( int i = 0; i < 8; ++i )
{
if ( ! UNSERIALIZE(&ctx.h[i]) )
return false;
}
if ( ! (UNSERIALIZE(&ctx.Nl) &&
UNSERIALIZE(&ctx.Nh)) )
return false;
for ( int i = 0; i < SHA_LBLOCK; ++i )
{
if ( ! UNSERIALIZE(&ctx.data[i]) )
return false;
}
if ( ! (UNSERIALIZE(&ctx.num) &&
UNSERIALIZE(&ctx.md_len)) )
return false;
return true;
}
EntropyVal::EntropyVal() : OpaqueVal(entropy_type)
{
}
bool EntropyVal::Feed(const void* data, size_t size)
{
state.add(data, size);
return true;
}
bool EntropyVal::Get(double *r_ent, double *r_chisq, double *r_mean,
double *r_montepicalc, double *r_scc)
{
state.end(r_ent, r_chisq, r_mean, r_montepicalc, r_scc);
return true;
}
IMPLEMENT_SERIAL(EntropyVal, SER_ENTROPY_VAL);
bool EntropyVal::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_ENTROPY_VAL, OpaqueVal);
for ( int i = 0; i < 256; ++i )
{
if ( ! SERIALIZE(state.ccount[i]) )
return false;
}
if ( ! (SERIALIZE(state.totalc) &&
SERIALIZE(state.mp) &&
SERIALIZE(state.sccfirst)) )
return false;
for ( int i = 0; i < RT_MONTEN; ++i )
{
if ( ! SERIALIZE(state.monte[i]) )
return false;
}
if ( ! (SERIALIZE(state.inmont) &&
SERIALIZE(state.mcount) &&
SERIALIZE(state.cexp) &&
SERIALIZE(state.montex) &&
SERIALIZE(state.montey) &&
SERIALIZE(state.montepi) &&
SERIALIZE(state.sccu0) &&
SERIALIZE(state.scclast) &&
SERIALIZE(state.scct1) &&
SERIALIZE(state.scct2) &&
SERIALIZE(state.scct3)) )
return false;
return true;
}
bool EntropyVal::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(OpaqueVal);
for ( int i = 0; i < 256; ++i )
{
if ( ! UNSERIALIZE(&state.ccount[i]) )
return false;
}
if ( ! (UNSERIALIZE(&state.totalc) &&
UNSERIALIZE(&state.mp) &&
UNSERIALIZE(&state.sccfirst)) )
return false;
for ( int i = 0; i < RT_MONTEN; ++i )
{
if ( ! UNSERIALIZE(&state.monte[i]) )
return false;
}
if ( ! (UNSERIALIZE(&state.inmont) &&
UNSERIALIZE(&state.mcount) &&
UNSERIALIZE(&state.cexp) &&
UNSERIALIZE(&state.montex) &&
UNSERIALIZE(&state.montey) &&
UNSERIALIZE(&state.montepi) &&
UNSERIALIZE(&state.sccu0) &&
UNSERIALIZE(&state.scclast) &&
UNSERIALIZE(&state.scct1) &&
UNSERIALIZE(&state.scct2) &&
UNSERIALIZE(&state.scct3)) )
return false;
return true;
}
BloomFilterVal::BloomFilterVal()
: OpaqueVal(bloomfilter_type)
{
type = 0;
hash = 0;
bloom_filter = 0;
}
BloomFilterVal::BloomFilterVal(OpaqueType* t)
: OpaqueVal(t)
{
type = 0;
hash = 0;
bloom_filter = 0;
}
BloomFilterVal::BloomFilterVal(probabilistic::BloomFilter* bf)
: OpaqueVal(bloomfilter_type)
{
type = 0;
hash = 0;
bloom_filter = bf;
}
bool BloomFilterVal::Typify(BroType* arg_type)
{
if ( type )
return false;
type = arg_type;
type->Ref();
TypeList* tl = new TypeList(type);
tl->Append(type);
hash = new CompositeHash(tl);
Unref(tl);
return true;
}
BroType* BloomFilterVal::Type() const
{
return type;
}
void BloomFilterVal::Add(const Val* val)
{
HashKey* key = hash->ComputeHash(val, 1);
bloom_filter->Add(key);
delete key;
}
size_t BloomFilterVal::Count(const Val* val) const
{
HashKey* key = hash->ComputeHash(val, 1);
size_t cnt = bloom_filter->Count(key);
delete key;
return cnt;
}
void BloomFilterVal::Clear()
{
bloom_filter->Clear();
}
bool BloomFilterVal::Empty() const
{
return bloom_filter->Empty();
}
string BloomFilterVal::InternalState() const
{
return bloom_filter->InternalState();
}
BloomFilterVal* BloomFilterVal::Merge(const BloomFilterVal* x,
const BloomFilterVal* y)
{
if ( x->Type() && // any one 0 is ok here
y->Type() &&
! same_type(x->Type(), y->Type()) )
{
reporter->Error("cannot merge Bloom filters with different types");
return 0;
}
if ( typeid(*x->bloom_filter) != typeid(*y->bloom_filter) )
{
reporter->Error("cannot merge different Bloom filter types");
return 0;
}
probabilistic::BloomFilter* copy = x->bloom_filter->Clone();
if ( ! copy->Merge(y->bloom_filter) )
{
reporter->Error("failed to merge Bloom filter");
return 0;
}
BloomFilterVal* merged = new BloomFilterVal(copy);
if ( x->Type() && ! merged->Typify(x->Type()) )
{
reporter->Error("failed to set type on merged Bloom filter");
return 0;
}
return merged;
}
BloomFilterVal::~BloomFilterVal()
{
Unref(type);
delete hash;
delete bloom_filter;
}
IMPLEMENT_SERIAL(BloomFilterVal, SER_BLOOMFILTER_VAL);
bool BloomFilterVal::DoSerialize(SerialInfo* info) const
{
DO_SERIALIZE(SER_BLOOMFILTER_VAL, OpaqueVal);
bool is_typed = (type != 0);
if ( ! SERIALIZE(is_typed) )
return false;
if ( is_typed && ! type->Serialize(info) )
return false;
return bloom_filter->Serialize(info);
}
bool BloomFilterVal::DoUnserialize(UnserialInfo* info)
{
DO_UNSERIALIZE(OpaqueVal);
bool is_typed;
if ( ! UNSERIALIZE(&is_typed) )
return false;
if ( is_typed )
{
BroType* t = BroType::Unserialize(info);
if ( ! Typify(t) )
return false;
Unref(t);
}
bloom_filter = probabilistic::BloomFilter::Unserialize(info);
return bloom_filter != 0;
}