From 01e662b3e0f130543aa698e9c57be00b965d1924 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Sat, 15 Jun 2019 21:19:21 +0000 Subject: [PATCH] Reimplement serialization infrastructure for OpaqueVals. We need this to sender through Broker, and we also leverage it for cloning opaques. The serialization methods now produce Broker data instances directly, and no longer go through the binary formatter. Summary of the new API for types derived from OpaqueVal: - Add DECLARE_OPAQUE_VALUE() to the class declaration - Add IMPLEMENT_OPAQUE_VALUE() to the class' implementation file - Implement these two methods (which are declated by the 1st macro): - broker::data DoSerialize() const - bool DoUnserialize(const broker::data& data) This machinery should work correctly from dynamic plugins as well. OpaqueVal provides a default implementation of DoClone() as well that goes through serialization. Derived classes can provide a more efficient version if they want. The declaration of the "OpaqueVal" class has moved into the header file "OpaqueVal.h", along with the new serialization infrastructure. This is breaking existing code that relies on the location, but because the API is changing anyways that seems fine. This adds an internal BiF "Broker::__opaque_clone_through_serialization" that does what the name says: deep-copying an opaque by serializing, then-deserializing. That can be used to tests the new functionality from btests. Not quite done yet. TODO: - Not all tests pass yet: [ 0%] language.named-set-ctors ... failed [ 16%] language.copy-all-opaques ... failed [ 33%] language.set-type-checking ... failed [ 50%] language.table-init-container-ctors ... failed [ 66%] coverage.sphinx-zeekygen-docs ... failed [ 83%] scripts.base.frameworks.sumstats.basic-cluster ... failed (Some of the serialization may still be buggy.) - Clean up the code a bit more. --- src/OpaqueVal.cc | 530 +++++++++++++++++++ src/OpaqueVal.h | 148 +++++- src/Val.cc | 20 +- src/Val.h | 21 +- src/broker/Data.cc | 142 ++++- src/broker/Data.h | 20 +- src/broker/Store.cc | 14 + src/broker/Store.h | 4 +- src/broker/data.bif | 14 + src/file_analysis/analyzer/x509/OCSP.cc | 28 + src/file_analysis/analyzer/x509/OCSP.h | 4 +- src/file_analysis/analyzer/x509/X509.cc | 29 + src/file_analysis/analyzer/x509/X509.h | 3 +- src/probabilistic/BitVector.cc | 41 ++ src/probabilistic/BitVector.h | 5 + src/probabilistic/BloomFilter.cc | 83 +++ src/probabilistic/BloomFilter.h | 25 + src/probabilistic/CardinalityCounter.cc | 41 ++ src/probabilistic/CardinalityCounter.h | 3 + src/probabilistic/CounterVector.cc | 26 + src/probabilistic/CounterVector.h | 5 + src/probabilistic/Hasher.cc | 41 ++ src/probabilistic/Hasher.h | 19 + src/probabilistic/Topk.cc | 125 ++++- src/probabilistic/Topk.h | 2 + testing/btest/Baseline/broker.opaque/.stderr | 1 + testing/btest/Baseline/broker.opaque/out | 53 ++ testing/btest/broker/opaque.zeek | 161 ++++++ 28 files changed, 1556 insertions(+), 52 deletions(-) create mode 100644 testing/btest/Baseline/broker.opaque/.stderr create mode 100644 testing/btest/Baseline/broker.opaque/out create mode 100644 testing/btest/broker/opaque.zeek diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 7e65cfe35c..360041ef09 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -6,6 +6,147 @@ #include "probabilistic/BloomFilter.h" #include "probabilistic/CardinalityCounter.h" +// Helper to retrieve a broker value out of a broker::vector at a specified +// index, and casted to the expected destination type. +template +inline bool get_vector_idx(const V& v, unsigned int i, D* dst) + { + if ( i >= v.size() ) + return false; + + auto x = caf::get_if(&v[i]); + if ( ! x ) + return false; + + *dst = static_cast(*x); + return true; + } + +OpaqueMgr* OpaqueMgr::mgr() + { + static OpaqueMgr mgr; + return &mgr; + } + +OpaqueVal::OpaqueVal(OpaqueType* t) : Val(t) + { + } + +OpaqueVal::~OpaqueVal() + { + } + +const std::string& OpaqueMgr::TypeID(const OpaqueVal* v) const + { + auto x = _types.find(v->OpaqueName()); + + if ( x == _types.end() ) + reporter->InternalError(fmt("OpaqueMgr::TypeID: opaque type %s not registered", + v->OpaqueName())); + + return x->first; + } + +OpaqueVal* OpaqueMgr::Instantiate(const std::string& id) const + { + auto x = _types.find(id); + return x != _types.end() ? (*x->second)() : nullptr; + } + +broker::expected OpaqueVal::Serialize() const + { + auto type = OpaqueMgr::mgr()->TypeID(this); + + auto d = DoSerialize(); + if ( d == broker::none() ) + return broker::ec::invalid_data; // Cannot serialize + + return {broker::vector{std::move(type), std::move(d)}}; + } + +OpaqueVal* OpaqueVal::Unserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + + if ( ! (v && v->size() == 2) ) + return nullptr; + + auto type = caf::get_if(&(*v)[0]); + if ( ! type ) + return nullptr; + + auto val = OpaqueMgr::mgr()->Instantiate(*type); + if ( ! val ) + return nullptr; + + if ( ! val->DoUnserialize((*v)[1]) ) + { + Unref(val); + return nullptr; + } + + return val; + } + +broker::data OpaqueVal::SerializeType(BroType* t) + { + if ( t->InternalType() == TYPE_INTERNAL_ERROR ) + return broker::none(); + + if ( t->InternalType() == TYPE_INTERNAL_OTHER ) + { + // Serialize by name. + assert(t->GetName().size()); + return broker::vector{true, t->GetName()}; + } + + // A base type. + return broker::vector{false, static_cast(t->Tag())}; + } + +BroType* OpaqueVal::UnserializeType(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() == 2) ) + return nullptr; + + auto by_name = caf::get_if(&(*v)[0]); + if ( ! by_name ) + return nullptr; + + if ( *by_name ) + { + auto name = caf::get_if(&(*v)[1]); + if ( ! name ) + return nullptr; + + ID* id = global_scope()->Lookup(name->c_str()); + if ( ! id ) + return nullptr; + + BroType* t = id->AsType(); + if ( ! t ) + return nullptr; + + return t->Ref(); + } + + auto tag = caf::get_if(&(*v)[1]); + if ( ! tag ) + return nullptr; + + return base_type(static_cast(*tag)); + } + +Val* OpaqueVal::DoClone(CloneState* state) + { + auto d = OpaqueVal::Serialize(); + if ( ! d ) + return nullptr; + + return OpaqueVal::Unserialize(std::move(*d)); + } + bool HashVal::IsValid() const { return valid; @@ -145,6 +286,75 @@ StringVal* MD5Val::DoGet() return new StringVal(md5_digest_print(digest)); } +IMPLEMENT_OPAQUE_VALUE(MD5Val) + +broker::data MD5Val::DoSerialize() const + { + if ( ! IsValid() ) + return broker::vector{false}; + + MD5_CTX* md = (MD5_CTX*) EVP_MD_CTX_md_data(ctx); + + broker::vector d = { + true, + static_cast(md->A), + static_cast(md->B), + static_cast(md->C), + static_cast(md->D), + static_cast(md->Nl), + static_cast(md->Nh), + static_cast(md->num) + }; + + for ( int i = 0; i < MD5_LBLOCK; ++i ) + d.emplace_back(static_cast(md->data[i])); + + return d; + } + +bool MD5Val::DoUnserialize(const broker::data& data) + { + auto d = caf::get_if(&data); + if ( ! d ) + return false; + + auto valid = caf::get_if(&(*d)[0]); + if ( ! valid ) + return false; + + if ( ! *valid ) + { + assert(! IsValid()); // default set by ctor + return true; + } + + Init(); + MD5_CTX* md = (MD5_CTX*) EVP_MD_CTX_md_data(ctx); + + if ( ! get_vector_idx(*d, 1, &md->A) ) + return false; + if ( ! get_vector_idx(*d, 2, &md->B) ) + return false; + if ( ! get_vector_idx(*d, 3, &md->C) ) + return false; + if ( ! get_vector_idx(*d, 4, &md->D) ) + return false; + if ( ! get_vector_idx(*d, 5, &md->Nl) ) + return false; + if ( ! get_vector_idx(*d, 6, &md->Nh) ) + return false; + if ( ! get_vector_idx(*d, 7, &md->num) ) + return false; + + for ( int i = 0; i < MD5_LBLOCK; ++i ) + { + if ( ! get_vector_idx(*d, 8 + i, &md->data[i]) ) + return false; + } + + return true; + } + SHA1Val::SHA1Val() : HashVal(sha1_type) { } @@ -217,6 +427,78 @@ StringVal* SHA1Val::DoGet() return new StringVal(sha1_digest_print(digest)); } +IMPLEMENT_OPAQUE_VALUE(SHA1Val) + +broker::data SHA1Val::DoSerialize() const + { + if ( ! IsValid() ) + return broker::vector{false}; + + SHA_CTX* md = (SHA_CTX*) EVP_MD_CTX_md_data(ctx); + + broker::vector d = { + true, + static_cast(md->h0), + static_cast(md->h1), + static_cast(md->h2), + static_cast(md->h3), + static_cast(md->h4), + static_cast(md->Nl), + static_cast(md->Nh), + static_cast(md->num) + }; + + for ( int i = 0; i < SHA_LBLOCK; ++i ) + d.emplace_back(static_cast(md->data[i])); + + return d; + } + +bool SHA1Val::DoUnserialize(const broker::data& data) + { + auto d = caf::get_if(&data); + if ( ! d ) + return false; + + auto valid = caf::get_if(&(*d)[0]); + if ( ! valid ) + return false; + + if ( ! *valid ) + { + assert(! IsValid()); // default set by ctor + return true; + } + + Init(); + SHA_CTX* md = (SHA_CTX*) EVP_MD_CTX_md_data(ctx); + + if ( ! get_vector_idx(*d, 1, &md->h0) ) + return false; + if ( ! get_vector_idx(*d, 2, &md->h1) ) + return false; + if ( ! get_vector_idx(*d, 3, &md->h2) ) + return false; + if ( ! get_vector_idx(*d, 4, &md->h3) ) + return false; + if ( ! get_vector_idx(*d, 5, &md->h4) ) + return false; + if ( ! get_vector_idx(*d, 6, &md->Nl) ) + return false; + if ( ! get_vector_idx(*d, 7, &md->Nh) ) + return false; + if ( ! get_vector_idx(*d, 8, &md->num) ) + return false; + + for ( int i = 0; i < SHA_LBLOCK; ++i ) + { + if ( ! get_vector_idx(*d, 9 + i, &md->data[i]) ) + return false; + } + + return true; + } + SHA256Val::SHA256Val() : HashVal(sha256_type) { } @@ -289,6 +571,75 @@ StringVal* SHA256Val::DoGet() return new StringVal(sha256_digest_print(digest)); } +IMPLEMENT_OPAQUE_VALUE(SHA256Val) + +broker::data SHA256Val::DoSerialize() const + { + if ( ! IsValid() ) + return broker::vector{false}; + + SHA256_CTX* md = (SHA256_CTX*) EVP_MD_CTX_md_data(ctx); + + broker::vector d = { + true, + static_cast(md->Nl), + static_cast(md->Nh), + static_cast(md->num), + static_cast(md->md_len) + }; + + for ( int i = 0; i < 8; ++i ) + d.emplace_back(static_cast(md->h[i])); + + for ( int i = 0; i < SHA_LBLOCK; ++i ) + d.emplace_back(static_cast(md->data[i])); + + return d; + } + +bool SHA256Val::DoUnserialize(const broker::data& data) + { + auto d = caf::get_if(&data); + if ( ! d ) + return false; + + auto valid = caf::get_if(&(*d)[0]); + if ( ! valid ) + return false; + + if ( ! *valid ) + { + assert(! IsValid()); // default set by ctor + return true; + } + + Init(); + SHA256_CTX* md = (SHA256_CTX*) EVP_MD_CTX_md_data(ctx); + + if ( ! get_vector_idx(*d, 1, &md->Nl) ) + return false; + if ( ! get_vector_idx(*d, 2, &md->Nh) ) + return false; + if ( ! get_vector_idx(*d, 3, &md->num) ) + return false; + if ( ! get_vector_idx(*d, 4, &md->md_len) ) + return false; + + for ( int i = 0; i < 8; ++i ) + { + if ( ! get_vector_idx(*d, 5 + i, &md->h[i]) ) + return false; + } + + for ( int i = 0; i < SHA_LBLOCK; ++i ) + { + if ( ! get_vector_idx(*d, 13 + i, &md->data[i]) ) + return false; + } + + return true; + } + EntropyVal::EntropyVal() : OpaqueVal(entropy_type) { } @@ -312,6 +663,89 @@ bool EntropyVal::Get(double *r_ent, double *r_chisq, double *r_mean, return true; } +IMPLEMENT_OPAQUE_VALUE(EntropyVal) + +broker::data EntropyVal::DoSerialize() const + { + broker::vector d = + { + static_cast(state.totalc), + static_cast(state.mp), + static_cast(state.sccfirst), + static_cast(state.inmont), + static_cast(state.mcount), + static_cast(state.cexp), + static_cast(state.montex), + static_cast(state.montey), + static_cast(state.montepi), + static_cast(state.sccu0), + static_cast(state.scclast), + static_cast(state.scct1), + static_cast(state.scct2), + static_cast(state.scct3), + }; + + d.reserve(256 + 3 + RT_MONTEN + 11); + + for ( int i = 0; i < 256; ++i ) + d.emplace_back(static_cast(state.ccount[i])); + + for ( int i = 0; i < RT_MONTEN; ++i ) + d.emplace_back(static_cast(state.monte[i])); + + return d; + } + +bool EntropyVal::DoUnserialize(const broker::data& data) + { + auto d = caf::get_if(&data); + if ( ! d ) + return false; + + if ( ! get_vector_idx(*d, 0, &state.totalc) ) + return false; + if ( ! get_vector_idx(*d, 1, &state.mp) ) + return false; + if ( ! get_vector_idx(*d, 2, &state.sccfirst) ) + return false; + if ( ! get_vector_idx(*d, 3, &state.inmont) ) + return false; + if ( ! get_vector_idx(*d, 4, &state.mcount) ) + return false; + if ( ! get_vector_idx(*d, 5, &state.cexp) ) + return false; + if ( ! get_vector_idx(*d, 6, &state.montex) ) + return false; + if ( ! get_vector_idx(*d, 7, &state.montey) ) + return false; + if ( ! get_vector_idx(*d, 8, &state.montepi) ) + return false; + if ( ! get_vector_idx(*d, 9, &state.sccu0) ) + return false; + if ( ! get_vector_idx(*d, 10, &state.scclast) ) + return false; + if ( ! get_vector_idx(*d, 11, &state.scct1) ) + return false; + if ( ! get_vector_idx(*d, 12, &state.scct2) ) + return false; + if ( ! get_vector_idx(*d, 13, &state.scct3) ) + return false; + + for ( int i = 0; i < 256; ++i ) + { + if ( ! get_vector_idx(*d, 14 + i, &state.ccount[i]) ) + return false; + } + + for ( int i = 0; i < RT_MONTEN; ++i ) + { + if ( ! get_vector_idx(*d, 14 + 256 + i, &state.monte[i]) ) + return false; + } + + return true; + } + BloomFilterVal::BloomFilterVal() : OpaqueVal(bloomfilter_type) { @@ -442,6 +876,54 @@ BloomFilterVal::~BloomFilterVal() delete bloom_filter; } +IMPLEMENT_OPAQUE_VALUE(BloomFilterVal) + +broker::data BloomFilterVal::DoSerialize() const + { + broker::vector d; + + if ( type ) + { + auto t = SerializeType(type); + if ( t == broker::none() ) + return broker::none(); + + d.emplace_back(t); + } + else + d.emplace_back(broker::none()); + + auto bf = bloom_filter->Serialize(); + if ( ! bf ) + return broker::none(); + + d.emplace_back(*bf); + return d; + } + +bool BloomFilterVal::DoUnserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + + if ( ! (v && v->size() == 2) ) + return false; + + auto no_type = caf::get_if(&(*v)[0]); + if ( ! no_type ) + { + BroType* t = UnserializeType((*v)[0]); + if ( ! (t && Typify(t)) ) + return false; + } + + auto bf = probabilistic::BloomFilter::Unserialize((*v)[1]); + if ( ! bf ) + return false; + + bloom_filter = bf.release(); + return true; + } + CardinalityVal::CardinalityVal() : OpaqueVal(cardinality_type) { c = 0; @@ -496,3 +978,51 @@ void CardinalityVal::Add(const Val* val) c->AddElement(key->Hash()); delete key; } + +IMPLEMENT_OPAQUE_VALUE(CardinalityVal) + +broker::data CardinalityVal::DoSerialize() const + { + broker::vector d; + + if ( type ) + { + auto t = SerializeType(type); + if ( t == broker::none() ) + return broker::none(); + + d.emplace_back(t); + } + else + d.emplace_back(broker::none()); + + auto cs = c->Serialize(); + if ( ! cs ) + return broker::none(); + + d.emplace_back(*cs); + return d; + } + +bool CardinalityVal::DoUnserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + + if ( ! (v && v->size() == 2) ) + return false; + + auto no_type = caf::get_if(&(*v)[0]); + if ( ! no_type ) + { + BroType* t = UnserializeType((*v)[0]); + if ( ! (t && Typify(t)) ) + return false; + } + + auto cu = probabilistic::CardinalityCounter::Unserialize((*v)[1]); + if ( ! cu ) + return false; + + c = cu.release(); + return true; + } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index c2fd1cf129..805cd1dad6 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -9,6 +9,145 @@ #include "Val.h" #include "digest.h" +class OpaqueVal; + +/** + * Singleton that registers all available all available types of opaque + * values. This faciliates their serialization into Broker values. + */ +class OpaqueMgr { +public: + using Factory = OpaqueVal* (); + + /** + * Return's a unique ID for the type of an opaque value. + + * @param v opaque value to return type for; it's classmust have been + * registered with the manager, otherwise this method will abort + * execugtion. + * + * @return type ID, which can used with *Instantiate()* to create a + * new + * instnace of the same type. + */ + const std::string& TypeID(const OpaqueVal* v) const; + + /** + * Instantiates a new opaque value of a specific opaque type. + * + * @param id unique type ID for the class to instantiate; this will + * normally have been returned earlier by *TypeID()*. + * + * @return A freshly instantiated value of the OpaqueVal-derived + * classes that *id* specifies, with reference count at +1. If *id* + * is unknown, this will return null. + * + */ + OpaqueVal* Instantiate(const std::string& id) const; + + /** Returns the global manager singleton object. */ + static OpaqueMgr* mgr(); + + /** + * Internal helper class to register an OpaqueVal-derived classes + * with the manager. + */ + template + class Register { + public: + Register(const char* id) + { OpaqueMgr::mgr()->_types.emplace(id, &T::OpaqueInstantiate); } + }; + +private: + std::unordered_map _types; +}; + +/** Macro to insert into an OpaqueVal-derived class's declaration. */ +#define DECLARE_OPAQUE_VALUE(T) \ + friend class OpaqueMgr::Register; \ + broker::data DoSerialize() const; \ + bool DoUnserialize(const broker::data& data); \ + const char* OpaqueName() const override { return #T; } \ + static OpaqueVal* OpaqueInstantiate() { return new T(); } + +#define __OPAQUE_MERGE(a, b) a ## b +#define __OPAQUE_ID(x) __OPAQUE_MERGE(_opaque, x) + +/** Macro to insert into an OpaqueVal-derived class's implementation file. */ +#define IMPLEMENT_OPAQUE_VALUE(T) static OpaqueMgr::Register __OPAQUE_ID(__LINE__)(#T); + +/** + * Base class for all opaque values. Opaque values are types that are managed + * completely internally, with no further script-level operators provided + * (other than bif functions). See OpaqueVal.h for derived classes. + */ +class OpaqueVal : public Val { +public: + explicit OpaqueVal(OpaqueType* t); + ~OpaqueVal() override; + + /** + * Serializes the value into a Broker representation. + * + * @return the broker representatio, or an error if serialization + * isn't supported or failed. + */ + broker::expected Serialize() const; + + /** + * Reinstantiates a value from its serialized Broker representation. + * + * @param data Broker representation as returned by *Serialize()*. + * @return unserialized instances with referecnce count at +1 + */ + static OpaqueVal* Unserialize(const broker::data& data); + +protected: + friend class Val; + friend class OpaqueMgr; + OpaqueVal() { } + + /** + * Must be overriden to provide a serialized version of the derived + * class' state. Returns 'broker::none()' if serialization fails, or + * is not supported. + */ + virtual broker::data DoSerialize() const = 0; + + /** + * Must be overriden to recreate the the derived class' state from a + * serialization. Returns true if successfull. + */ + virtual bool DoUnserialize(const broker::data& data) = 0; + + /** + * Internal helper for the serialization machinery. Automatically + * overridden by the `DECLARE_OPAQUE_VALUE` macro. + */ + virtual const char* OpaqueName() const = 0; + + /** + * Provides an implementation of *Val::DoClone()* that leverages the + * serialization methods to deep-copy an instance. Derived classes + * may also override this with a more efficient custom clone + * implementation of their own. + */ + Val* DoClone(CloneState* state) override; + + /** + * Helper function for derived class that need to record a type + * during serialization. + */ + static broker::data SerializeType(BroType* t); + + /** + * Helper function for derived class that need to restore a type + * during unserialization. Returns the type at reference count +1. + */ + static BroType* UnserializeType(const broker::data& data); +}; + namespace probabilistic { class BloomFilter; class CardinalityCounter; @@ -22,7 +161,7 @@ public: virtual StringVal* Get(); protected: - HashVal() { }; + HashVal() { valid = false; } explicit HashVal(OpaqueType* t); virtual bool DoInit(); @@ -54,6 +193,7 @@ protected: bool DoFeed(const void* data, size_t size) override; StringVal* DoGet() override; + DECLARE_OPAQUE_VALUE(MD5Val) private: EVP_MD_CTX* ctx; }; @@ -74,6 +214,7 @@ protected: bool DoFeed(const void* data, size_t size) override; StringVal* DoGet() override; + DECLARE_OPAQUE_VALUE(SHA1Val) private: EVP_MD_CTX* ctx; }; @@ -94,6 +235,7 @@ protected: bool DoFeed(const void* data, size_t size) override; StringVal* DoGet() override; + DECLARE_OPAQUE_VALUE(SHA256Val) private: EVP_MD_CTX* ctx; }; @@ -111,6 +253,7 @@ public: protected: friend class Val; + DECLARE_OPAQUE_VALUE(EntropyVal) private: RandTest state; }; @@ -139,6 +282,7 @@ protected: BloomFilterVal(); explicit BloomFilterVal(OpaqueType* t); + DECLARE_OPAQUE_VALUE(BloomFilterVal) private: // Disable. BloomFilterVal(const BloomFilterVal&); @@ -162,12 +306,12 @@ public: BroType* Type() const; bool Typify(BroType* type); - probabilistic::CardinalityCounter* Get() { return c; }; protected: CardinalityVal(); + DECLARE_OPAQUE_VALUE(CardinalityVal) private: BroType* type; CompositeHash* hash; diff --git a/src/Val.cc b/src/Val.cc index f0c0c031ed..1171375649 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -73,7 +73,11 @@ Val::~Val() Val* Val::Clone() { Val::CloneState state; - return Clone(&state); + auto v = Clone(&state); + if ( ! v ) + reporter->RuntimeError(GetLocationInfo(), "cannot clone value"); + + return v; } Val* Val::Clone(CloneState* state) @@ -2775,20 +2779,6 @@ void VectorVal::ValDescribe(ODesc* d) const d->Add("]"); } -OpaqueVal::OpaqueVal(OpaqueType* t) : Val(t) - { - } - -OpaqueVal::~OpaqueVal() - { - } - -Val* OpaqueVal::DoClone(CloneState* state) - { - reporter->InternalError("cloning opaque type without clone implementation"); - return nullptr; - } - Val* check_and_promote(Val* v, const BroType* t, int is_init) { if ( ! v ) diff --git a/src/Val.h b/src/Val.h index 59fef19ac0..265fd07ab6 100644 --- a/src/Val.h +++ b/src/Val.h @@ -8,6 +8,9 @@ #include #include #include +#include + +#include #include "net_util.h" #include "Type.h" @@ -50,6 +53,7 @@ class ListVal; class StringVal; class EnumVal; class MutableVal; +class OpaqueVal; class StateAccess; @@ -304,6 +308,7 @@ public: CONVERTER(TYPE_STRING, StringVal*, AsStringVal) CONVERTER(TYPE_VECTOR, VectorVal*, AsVectorVal) CONVERTER(TYPE_ENUM, EnumVal*, AsEnumVal) + CONVERTER(TYPE_OPAQUE, OpaqueVal*, AsOpaqueVal) #define CONST_CONVERTER(tag, ctype, name) \ const ctype name() const \ @@ -321,6 +326,7 @@ public: CONST_CONVERTER(TYPE_LIST, ListVal*, AsListVal) CONST_CONVERTER(TYPE_STRING, StringVal*, AsStringVal) CONST_CONVERTER(TYPE_VECTOR, VectorVal*, AsVectorVal) + CONST_CONVERTER(TYPE_OPAQUE, OpaqueVal*, AsOpaqueVal) bool IsMutableVal() const { @@ -1154,21 +1160,6 @@ protected: VectorType* vector_type; }; -// Base class for values with types that are managed completely internally, -// with no further script-level operators provided (other than bif -// functions). See OpaqueVal.h for derived classes. -class OpaqueVal : public Val { -public: - explicit OpaqueVal(OpaqueType* t); - ~OpaqueVal() override; - -protected: - friend class Val; - OpaqueVal() { } - - Val* DoClone(CloneState* state) override; -}; - // Checks the given value for consistency with the given type. If an // exact match, returns it. If promotable, returns the promoted version, // Unref()'ing the original. If not a match, generates an error message diff --git a/src/broker/Data.cc b/src/broker/Data.cc index 63efd58d44..93986306ec 100644 --- a/src/broker/Data.cc +++ b/src/broker/Data.cc @@ -126,11 +126,6 @@ struct val_converter { return rval->Ref(); } - case TYPE_OPAQUE: - { - // Fixme: Johanna - return nullptr; - } default: return nullptr; } @@ -430,6 +425,8 @@ struct val_converter { auto rval = new PatternVal(re); return rval; } + else if ( type->Tag() == TYPE_OPAQUE ) + return OpaqueVal::Unserialize(a); return nullptr; } @@ -504,12 +501,6 @@ struct type_checker { return true; } - case TYPE_OPAQUE: - { - // TODO - // Fixme: johanna - return false; - } default: return false; } @@ -756,6 +747,11 @@ struct type_checker { return true; } + else if ( type->Tag() == TYPE_OPAQUE ) + // Correct if successful. TODO: Should avoid do the + // full unserialization here, and just check the + // type. + return OpaqueVal::Unserialize(a); return false; } @@ -970,10 +966,17 @@ broker::expected bro_broker::val_to_data(Val* v) broker::vector rval = {p->PatternText(), p->AnywherePatternText()}; return {std::move(rval)}; } - // Fixme: johanna - // case TYPE_OPAQUE: - // { - // } + case TYPE_OPAQUE: + { + auto c = v->AsOpaqueVal()->Serialize(); + if ( ! c ) + { + reporter->Error("unsupported opaque type for serialization"); + break; + } + + return {c}; + } default: reporter->Error("unsupported Broker::Data type: %s", type_name(v->Type()->Tag())); @@ -1109,6 +1112,115 @@ Val* bro_broker::DataVal::castTo(BroType* t) return data_to_val(data, t); } +IMPLEMENT_OPAQUE_VALUE(bro_broker::DataVal) + +broker::data bro_broker::DataVal::DoSerialize() const + { + return data; + } + +bool bro_broker::DataVal::DoUnserialize(const broker::data& data_) + { + data = data_; + return true; + } + +IMPLEMENT_OPAQUE_VALUE(bro_broker::SetIterator) + +broker::data bro_broker::SetIterator::DoSerialize() const + { + return broker::vector{dat, *it}; + } + +bool bro_broker::SetIterator::DoUnserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() == 2) ) + return false; + + auto x = caf::get_if(&(*v)[0]); + + // We set the iterator by finding the element it used to point to. + // This is not perfect, as there's no guarantee that the restored + // container will list the elements in the same order. But it's as + // good as we can do, and it should generally work out. + if( x->find((*v)[1]) == x->end() ) + return false; + + dat = *x; + it = dat.find((*v)[1]); + return true; + } + +IMPLEMENT_OPAQUE_VALUE(bro_broker::TableIterator) + +broker::data bro_broker::TableIterator::DoSerialize() const + { + return broker::vector{dat, it->first}; + } + +bool bro_broker::TableIterator::DoUnserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() == 2) ) + return false; + + auto x = caf::get_if(&(*v)[0]); + + // We set the iterator by finding the element it used to point to. + // This is not perfect, as there's no guarantee that the restored + // container will list the elements in the same order. But it's as + // good as we can do, and it should generally work out. + if( x->find((*v)[1]) == x->end() ) + return false; + + dat = *x; + it = dat.find((*v)[1]); + return true; + } + +IMPLEMENT_OPAQUE_VALUE(bro_broker::VectorIterator) + +broker::data bro_broker::VectorIterator::DoSerialize() const + { + return broker::vector{dat, it - dat.begin()}; + } + +bool bro_broker::VectorIterator::DoUnserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() == 2) ) + return false; + + auto x = caf::get_if(&(*v)[0]); + auto y = caf::get_if(&(*v)[1]); + + dat = *x; + it = dat.begin() + *y; + return true; + } + +IMPLEMENT_OPAQUE_VALUE(bro_broker::RecordIterator) + +broker::data bro_broker::RecordIterator::DoSerialize() const + { + return broker::vector{dat, it - dat.begin()}; + } + +bool bro_broker::RecordIterator::DoUnserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() == 2) ) + return false; + + auto x = caf::get_if(&(*v)[0]); + auto y = caf::get_if(&(*v)[1]); + + dat = *x; + it = dat.begin() + *y; + return true; + } + broker::data bro_broker::threading_field_to_data(const threading::Field* f) { auto name = f->name; diff --git a/src/broker/Data.h b/src/broker/Data.h index bf7bdd8cad..eda8f6550c 100644 --- a/src/broker/Data.h +++ b/src/broker/Data.h @@ -2,7 +2,7 @@ #define BRO_COMM_DATA_H #include -#include "Val.h" +#include "OpaqueVal.h" #include "Reporter.h" #include "Frame.h" #include "Expr.h" @@ -126,6 +126,8 @@ protected: DataVal() {} + DECLARE_OPAQUE_VALUE(bro_broker::DataVal) + static BroType* script_data_type; }; @@ -240,6 +242,10 @@ public: broker::set dat; broker::set::iterator it; + +protected: + SetIterator() {} + DECLARE_OPAQUE_VALUE(bro_broker::SetIterator) }; class TableIterator : public OpaqueVal { @@ -253,6 +259,10 @@ public: broker::table dat; broker::table::iterator it; + +protected: + TableIterator() {} + DECLARE_OPAQUE_VALUE(bro_broker::TableIterator) }; class VectorIterator : public OpaqueVal { @@ -266,6 +276,10 @@ public: broker::vector dat; broker::vector::iterator it; + +protected: + VectorIterator() {} + DECLARE_OPAQUE_VALUE(bro_broker::VectorIterator) }; class RecordIterator : public OpaqueVal { @@ -279,6 +293,10 @@ public: broker::vector dat; broker::vector::iterator it; + +protected: + RecordIterator() {} + DECLARE_OPAQUE_VALUE(bro_broker::RecordIterator) }; } // namespace bro_broker diff --git a/src/broker/Store.cc b/src/broker/Store.cc index f4db09f030..7462485bc3 100644 --- a/src/broker/Store.cc +++ b/src/broker/Store.cc @@ -49,6 +49,20 @@ void StoreHandleVal::ValDescribe(ODesc* d) const d->Add("}"); } +IMPLEMENT_OPAQUE_VALUE(StoreHandleVal) + +broker::data StoreHandleVal::DoSerialize() const + { + // Cannot serialize. + return broker::none(); + } + +bool StoreHandleVal::DoUnserialize(const broker::data& data) + { + // Cannot unserialize. + return false; + } + broker::backend to_backend_type(BifEnum::Broker::BackendType type) { switch ( type ) { diff --git a/src/broker/Store.h b/src/broker/Store.h index 190417d71d..7e5b2bde07 100644 --- a/src/broker/Store.h +++ b/src/broker/Store.h @@ -5,7 +5,7 @@ #include "broker/data.bif.h" #include "Reporter.h" #include "Type.h" -#include "Val.h" +#include "OpaqueVal.h" #include "Trigger.h" #include @@ -121,6 +121,8 @@ public: protected: StoreHandleVal() = default; + + DECLARE_OPAQUE_VALUE(StoreHandleVal) }; // Helper function to construct a broker backend type from script land. diff --git a/src/broker/data.bif b/src/broker/data.bif index 53ce5d506c..b6b7c08f72 100644 --- a/src/broker/data.bif +++ b/src/broker/data.bif @@ -41,6 +41,20 @@ function Broker::__data_type%(d: Broker::Data%): Broker::DataType return bro_broker::get_data_type(d->AsRecordVal(), frame); %} +# For testing only. +function Broker::__opaque_clone_through_serialization%(d: any%): any + %{ + auto x = d->AsOpaqueVal()->Serialize(); + + if ( ! x ) + { + builtin_error("cannot serialize object to clone"); + return val_mgr->GetFalse(); + } + + return OpaqueVal::Unserialize(std::move(*x)); + %} + function Broker::__set_create%(%): Broker::Data %{ return bro_broker::make_data_val(broker::set()); diff --git a/src/file_analysis/analyzer/x509/OCSP.cc b/src/file_analysis/analyzer/x509/OCSP.cc index 9512d43260..269126c9d9 100644 --- a/src/file_analysis/analyzer/x509/OCSP.cc +++ b/src/file_analysis/analyzer/x509/OCSP.cc @@ -711,3 +711,31 @@ OCSP_RESPONSE* OCSP_RESPVal::GetResp() const return ocsp_resp; } +IMPLEMENT_OPAQUE_VALUE(OCSP_RESPVal) + +broker::data OCSP_RESPVal::DoSerialize() const + { + unsigned char *buf = NULL; + int length = i2d_OCSP_RESPONSE(ocsp_resp, &buf); + if ( length < 0 ) + return broker::none(); + + auto d = std::string(reinterpret_cast(buf), length); + OPENSSL_free(buf); + + return d; + } + +bool OCSP_RESPVal::DoUnserialize(const broker::data& data) + { + if ( caf::get_if(&data) ) + return false; + + auto s = caf::get_if(&data); + if ( ! s ) + return false; + + auto opensslbuf = reinterpret_cast(s->data()); + ocsp_resp = d2i_OCSP_RESPONSE(NULL, &opensslbuf, s->size()); + return (ocsp_resp != nullptr); + } diff --git a/src/file_analysis/analyzer/x509/OCSP.h b/src/file_analysis/analyzer/x509/OCSP.h index 9bb7b5712f..4f706b8f64 100644 --- a/src/file_analysis/analyzer/x509/OCSP.h +++ b/src/file_analysis/analyzer/x509/OCSP.h @@ -5,7 +5,7 @@ #include -#include "Val.h" +#include "OpaqueVal.h" #include "../File.h" #include "Analyzer.h" #include "X509Common.h" @@ -44,6 +44,8 @@ public: OCSP_RESPONSE *GetResp() const; protected: OCSP_RESPVal(); + + DECLARE_OPAQUE_VALUE(OCSP_RESPVal) private: OCSP_RESPONSE *ocsp_resp; }; diff --git a/src/file_analysis/analyzer/x509/X509.cc b/src/file_analysis/analyzer/x509/X509.cc index a06b4681f5..bd6d796a6b 100644 --- a/src/file_analysis/analyzer/x509/X509.cc +++ b/src/file_analysis/analyzer/x509/X509.cc @@ -489,3 +489,32 @@ Val* X509Val::DoClone(CloneState* state) return certificate; } +IMPLEMENT_OPAQUE_VALUE(X509Val) + +broker::data X509Val::DoSerialize() const + { + unsigned char *buf = NULL; + int length = i2d_X509(certificate, &buf); + + if ( length < 0 ) + return broker::none(); + + auto d = std::string(reinterpret_cast(buf), length); + OPENSSL_free(buf); + + return d; + } + +bool X509Val::DoUnserialize(const broker::data& data) + { + if ( caf::get_if(&data) ) + return false; + + auto s = caf::get_if(&data); + if ( ! s ) + return false; + + auto opensslbuf = reinterpret_cast(s->data()); + certificate = d2i_X509(NULL, &opensslbuf, s->size()); + return (certificate != nullptr); + } diff --git a/src/file_analysis/analyzer/x509/X509.h b/src/file_analysis/analyzer/x509/X509.h index b8a4e8d6e5..f20712cab2 100644 --- a/src/file_analysis/analyzer/x509/X509.h +++ b/src/file_analysis/analyzer/x509/X509.h @@ -5,7 +5,7 @@ #include -#include "Val.h" +#include "OpaqueVal.h" #include "X509Common.h" #if ( OPENSSL_VERSION_NUMBER < 0x10002000L ) || defined(LIBRESSL_VERSION_NUMBER) @@ -151,6 +151,7 @@ protected: */ X509Val(); + DECLARE_OPAQUE_VALUE(X509Val) private: ::X509* certificate; // the wrapped certificate }; diff --git a/src/probabilistic/BitVector.cc b/src/probabilistic/BitVector.cc index 6e09c370a4..a7d6bf4f11 100644 --- a/src/probabilistic/BitVector.cc +++ b/src/probabilistic/BitVector.cc @@ -505,6 +505,47 @@ uint64 BitVector::Hash() const return digest; } +broker::expected BitVector::Serialize() const + { + broker::vector v = {static_cast(num_bits), static_cast(bits.size())}; + v.reserve(2 + bits.size()); + + for ( size_t i = 0; i < bits.size(); ++i ) + v.emplace_back(static_cast(bits[i])); + + return {v}; + } + +std::unique_ptr BitVector::Unserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() >= 2) ) + return nullptr; + + auto num_bits = caf::get_if(&(*v)[0]); + auto size = caf::get_if(&(*v)[1]); + + if ( ! (num_bits && size) ) + return nullptr; + + if ( v->size() != 2 + *size ) + return nullptr; + + auto bv = std::unique_ptr(new BitVector()); + bv->num_bits = *num_bits; + + for ( size_t i = 0; i < *size; ++i ) + { + auto x = caf::get_if(&(*v)[2 + i]); + if ( ! x ) + return nullptr; + + bv->bits.push_back(*x); + } + + return std::move(bv); + } + BitVector::size_type BitVector::lowest_bit(block_type block) { block_type x = block - (block & (block - 1)); diff --git a/src/probabilistic/BitVector.h b/src/probabilistic/BitVector.h index a87b27e55b..12d628cacf 100644 --- a/src/probabilistic/BitVector.h +++ b/src/probabilistic/BitVector.h @@ -6,6 +6,8 @@ #include #include +#include + namespace probabilistic { /** @@ -281,6 +283,9 @@ public: */ uint64_t Hash() const; + broker::expected Serialize() const; + static std::unique_ptr Unserialize(const broker::data& data); + private: /** * Computes the number of excess/unused bits in the bit vector. diff --git a/src/probabilistic/BloomFilter.cc b/src/probabilistic/BloomFilter.cc index e12de2f049..2148fac556 100644 --- a/src/probabilistic/BloomFilter.cc +++ b/src/probabilistic/BloomFilter.cc @@ -28,6 +28,51 @@ BloomFilter::~BloomFilter() delete hasher; } +broker::expected BloomFilter::Serialize() const + { + auto h = hasher->Serialize(); + auto d = DoSerialize(); + + if ( (! h) || d == broker::none() ) + return broker::ec::invalid_data; // Cannot serialize + + return broker::vector{static_cast(Type()), std::move(*h), std::move(d)}; + } + +std::unique_ptr BloomFilter::Unserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + + if ( ! (v && v->size() == 3) ) + return nullptr; + + auto type = caf::get_if(&(*v)[0]); + if ( ! type ) + return nullptr; + + auto hasher_ = Hasher::Unserialize((*v)[1]); + if ( ! hasher_ ) + return nullptr; + + std::unique_ptr bf; + + switch ( *type ) { + case Basic: + bf = std::unique_ptr(new BasicBloomFilter()); + break; + + case Counting: + bf = std::unique_ptr(new CountingBloomFilter()); + break; + } + + if ( ! bf->DoUnserialize((*v)[2]) ) + return nullptr; + + bf->hasher = hasher_.release(); + return std::move(bf); + } + size_t BasicBloomFilter::M(double fp, size_t capacity) { double ln2 = std::log(2); @@ -126,6 +171,25 @@ size_t BasicBloomFilter::Count(const HashKey* key) const return 1; } +broker::data BasicBloomFilter::DoSerialize() const + { + auto b = bits->Serialize(); + if ( ! b ) + return broker::none(); + + return *b; + } + +bool BasicBloomFilter::DoUnserialize(const broker::data& data) + { + auto b = BitVector::Unserialize(data); + if ( ! b ) + return false; + + bits = b.release(); + return true; + } + CountingBloomFilter::CountingBloomFilter() { cells = 0; @@ -217,3 +281,22 @@ size_t CountingBloomFilter::Count(const HashKey* key) const return min; } + +broker::data CountingBloomFilter::DoSerialize() const + { + auto c = cells->Serialize(); + if ( ! c ) + return broker::none(); + + return *c; + } + +bool CountingBloomFilter::DoUnserialize(const broker::data& data) + { + auto c = CounterVector::Unserialize(data); + if ( ! c ) + return false; + + cells = c.release(); + return true; + } diff --git a/src/probabilistic/BloomFilter.h b/src/probabilistic/BloomFilter.h index 03acfd17d6..556ffd4bc0 100644 --- a/src/probabilistic/BloomFilter.h +++ b/src/probabilistic/BloomFilter.h @@ -4,6 +4,9 @@ #define PROBABILISTIC_BLOOMFILTER_H #include + +#include + #include "BitVector.h" #include "Hasher.h" @@ -11,6 +14,9 @@ namespace probabilistic { class CounterVector; +/** Types of derived BloomFilter classes. */ +enum BloomFilterType { Basic, Counting }; + /** * The abstract base class for Bloom filters. */ @@ -71,6 +77,9 @@ public: */ virtual string InternalState() const = 0; + broker::expected Serialize() const; + static std::unique_ptr Unserialize(const broker::data& data); + protected: /** * Default constructor. @@ -84,6 +93,10 @@ protected: */ explicit BloomFilter(const Hasher* hasher); + virtual broker::data DoSerialize() const = 0; + virtual bool DoUnserialize(const broker::data& data) = 0; + virtual BloomFilterType Type() const = 0; + const Hasher* hasher; }; @@ -144,6 +157,8 @@ public: string InternalState() const override; protected: + friend class BloomFilter; + /** * Default constructor. */ @@ -152,6 +167,10 @@ protected: // Overridden from BloomFilter. void Add(const HashKey* key) override; size_t Count(const HashKey* key) const override; + broker::data DoSerialize() const override; + bool DoUnserialize(const broker::data& data) override; + BloomFilterType Type() const override + { return BloomFilterType::Basic; } private: BitVector* bits; @@ -187,6 +206,8 @@ public: string InternalState() const override; protected: + friend class BloomFilter; + /** * Default constructor. */ @@ -195,6 +216,10 @@ protected: // Overridden from BloomFilter. void Add(const HashKey* key) override; size_t Count(const HashKey* key) const override; + broker::data DoSerialize() const override; + bool DoUnserialize(const broker::data& data) override; + BloomFilterType Type() const override + { return BloomFilterType::Counting; } private: CounterVector* cells; diff --git a/src/probabilistic/CardinalityCounter.cc b/src/probabilistic/CardinalityCounter.cc index 17caec3e0e..bcb9579ec2 100644 --- a/src/probabilistic/CardinalityCounter.cc +++ b/src/probabilistic/CardinalityCounter.cc @@ -196,6 +196,47 @@ uint64_t CardinalityCounter::GetM() const return m; } +broker::expected CardinalityCounter::Serialize() const + { + broker::vector v = {m, V, alpha_m}; + v.reserve(3 + m); + + for ( size_t i = 0; i < m; ++i ) + v.emplace_back(static_cast(buckets[i])); + + return {v}; + } + +std::unique_ptr CardinalityCounter::Unserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() >= 3) ) + return nullptr; + + auto m = caf::get_if(&(*v)[0]); + auto V = caf::get_if(&(*v)[1]); + auto alpha_m = caf::get_if(&(*v)[2]); + + if ( ! (m && V && alpha_m) ) + return nullptr; + + if ( v->size() != 3 + *m ) + return nullptr; + + auto cc = std::unique_ptr(new CardinalityCounter(*m, *V, *alpha_m)); + + for ( size_t i = 0; i < *m; ++i ) + { + auto x = caf::get_if(&(*v)[3 + i]); + if ( ! x ) + return nullptr; + + cc->buckets.push_back(*x); + } + + return std::move(cc); + } + /** * The following function is copied from libc/string/flsll.c from the FreeBSD source * tree. Original copyright message follows diff --git a/src/probabilistic/CardinalityCounter.h b/src/probabilistic/CardinalityCounter.h index 7d898b3c47..a2d69d0809 100644 --- a/src/probabilistic/CardinalityCounter.h +++ b/src/probabilistic/CardinalityCounter.h @@ -84,6 +84,9 @@ public: */ bool Merge(CardinalityCounter* c); + broker::expected Serialize() const; + static std::unique_ptr Unserialize(const broker::data& data); + protected: /** * Return the number of buckets. diff --git a/src/probabilistic/CounterVector.cc b/src/probabilistic/CounterVector.cc index e234d5c9d9..ec307f33f2 100644 --- a/src/probabilistic/CounterVector.cc +++ b/src/probabilistic/CounterVector.cc @@ -158,3 +158,29 @@ uint64_t CounterVector::Hash() const return bits->Hash(); } +broker::expected CounterVector::Serialize() const + { + auto b = bits->Serialize(); + if ( ! b ) + return broker::ec::invalid_data; // Cannot serialize + + return broker::vector{static_cast(width), std::move(*b)}; + } + +std::unique_ptr CounterVector::Unserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + if ( ! (v && v->size() >= 2) ) + return nullptr; + + auto width = caf::get_if(&(*v)[0]); + auto bits = BitVector::Unserialize((*v)[1]); + + auto cv = std::unique_ptr(new CounterVector()); + cv->width = *width; + cv->bits = bits.release(); + return std::move(cv); + } + + + diff --git a/src/probabilistic/CounterVector.h b/src/probabilistic/CounterVector.h index 04394ebca2..41674efd11 100644 --- a/src/probabilistic/CounterVector.h +++ b/src/probabilistic/CounterVector.h @@ -6,6 +6,8 @@ #include #include +#include + namespace probabilistic { class BitVector; @@ -134,6 +136,9 @@ public: */ uint64_t Hash() const; + broker::expected Serialize() const; + static std::unique_ptr Unserialize(const broker::data& data); + protected: friend CounterVector operator|(const CounterVector& x, const CounterVector& y); diff --git a/src/probabilistic/Hasher.cc b/src/probabilistic/Hasher.cc index 8508cd01ad..ffa60dacfd 100644 --- a/src/probabilistic/Hasher.cc +++ b/src/probabilistic/Hasher.cc @@ -46,6 +46,47 @@ Hasher::Hasher(size_t arg_k, seed_t arg_seed) seed = arg_seed; } +broker::expected Hasher::Serialize() const + { + return broker::vector{ + static_cast(Type()), static_cast(k), + seed.h1, seed.h2 }; + } + +std::unique_ptr Hasher::Unserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + + if ( ! (v && v->size() == 4) ) + return nullptr; + + auto type = caf::get_if(&(*v)[0]); + auto k = caf::get_if(&(*v)[1]); + auto h1 = caf::get_if(&(*v)[2]); + auto h2 = caf::get_if(&(*v)[3]); + + if ( ! (type && k && h1 && h2) ) + return nullptr; + + std::unique_ptr hasher; + + switch ( *type ) { + case Default: + hasher = std::unique_ptr(new DefaultHasher(*k, {*h1, *h2})); + break; + + case Double: + hasher = std::unique_ptr(new DoubleHasher(*k, {*h1, *h2})); + break; + } + + // Note that the derived classed don't hold any further state of + // their own. They reconstruct all their information from their + // constructors' arguments. + + return std::move(hasher); + } + UHF::UHF() { memset(&seed, 0, sizeof(seed)); diff --git a/src/probabilistic/Hasher.h b/src/probabilistic/Hasher.h index baceb45fff..3218ec4d7a 100644 --- a/src/probabilistic/Hasher.h +++ b/src/probabilistic/Hasher.h @@ -3,10 +3,15 @@ #ifndef PROBABILISTIC_HASHER_H #define PROBABILISTIC_HASHER_H +#include + #include "Hash.h" namespace probabilistic { +/** Types of derived Hasher classes. */ +enum HasherType { Default, Double }; + /** * Abstract base class for hashers. A hasher creates a family of hash * functions to hash an element *k* times. @@ -98,6 +103,9 @@ public: */ seed_t Seed() const { return seed; } + broker::expected Serialize() const; + static std::unique_ptr Unserialize(const broker::data& data); + protected: Hasher() { } @@ -110,6 +118,8 @@ protected: */ Hasher(size_t arg_k, seed_t arg_seed); + virtual HasherType Type() const = 0; + private: size_t k; seed_t seed; @@ -175,6 +185,9 @@ public: return ! (x == y); } + broker::expected Serialize() const; + static UHF Unserialize(const broker::data& data); + private: static size_t compute_seed(Hasher::seed_t seed); @@ -205,6 +218,9 @@ public: private: DefaultHasher() { } + HasherType Type() const override + { return HasherType::Default; } + std::vector hash_functions; }; @@ -231,6 +247,9 @@ public: private: DoubleHasher() { } + HasherType Type() const override + { return HasherType::Double; } + UHF h1; UHF h2; }; diff --git a/src/probabilistic/Topk.cc b/src/probabilistic/Topk.cc index 42a2401a2f..b612e1a8a4 100644 --- a/src/probabilistic/Topk.cc +++ b/src/probabilistic/Topk.cc @@ -1,5 +1,6 @@ // See the file "COPYING" in the main distribution directory for copyright. +#include "broker/Data.h" #include "probabilistic/Topk.h" #include "CompHash.h" #include "Reporter.h" @@ -405,4 +406,126 @@ void TopkVal::IncrementCounter(Element* e, unsigned int count) } } -}; +IMPLEMENT_OPAQUE_VALUE(TopkVal) + +broker::data TopkVal::DoSerialize() const + { + broker::vector d = {size, numElements, pruned}; + + if ( type ) + { + auto t = SerializeType(type); + if ( t == broker::none() ) + return broker::none(); + + d.emplace_back(t); + } + else + d.emplace_back(broker::none()); + + uint64_t i = 0; + std::list::const_iterator it = buckets.begin(); + while ( it != buckets.end() ) + { + Bucket* b = *it; + uint32_t elements_count = b->elements.size(); + + d.emplace_back(static_cast(b->elements.size())); + d.emplace_back(b->count); + + std::list::const_iterator eit = b->elements.begin(); + while ( eit != b->elements.end() ) + { + Element* element = *eit; + d.emplace_back(element->epsilon); + auto v = bro_broker::val_to_data(element->value); + if ( ! v ) + return broker::none(); + + d.emplace_back(*v); + + eit++; + i++; + } + + it++; + } + + assert(i == numElements); + return d; + } + + +bool TopkVal::DoUnserialize(const broker::data& data) + { + auto v = caf::get_if(&data); + + if ( ! (v && v->size() >= 4) ) + return false; + + auto size_ = caf::get_if(&(*v)[0]); + auto numElements_ = caf::get_if(&(*v)[1]); + auto pruned_ = caf::get_if(&(*v)[2]); + + if ( ! (size_ && numElements_ && pruned_) ) + return false; + + size = *size_; + numElements = *numElements_; + pruned = *pruned_; + + auto no_type = caf::get_if(&(*v)[3]); + if ( ! no_type ) + { + BroType* t = UnserializeType((*v)[3]); + if ( ! t ) + return false; + + Typify(t); + Unref(t); + } + + uint64_t i = 0; + uint64_t idx = 4; + + while ( i < numElements ) + { + Bucket* b = new Bucket(); + auto elements_count = caf::get_if(&(*v)[idx++]); + auto count = caf::get_if(&(*v)[idx++]); + + if ( ! (elements_count && count) ) + return false; + + b->count = *count; + b->bucketPos = buckets.insert(buckets.end(), b); + + for ( uint64_t j = 0; j < *elements_count; j++ ) + { + Element* e = new Element(); + auto epsilon = caf::get_if(&(*v)[idx++]); + Val* val = bro_broker::data_to_val((*v)[idx++], type); + + if ( ! (epsilon && val) ) + return false; + + e->epsilon = *epsilon; + e->value = val; + e->parent = b; + + b->elements.insert(b->elements.end(), e); + + HashKey* key = GetHash(e->value); + assert (elementDict->Lookup(key) == 0); + + elementDict->Insert(key, e); + delete key; + + i++; + } + } + + assert(i == numElements); + return true; + } +} diff --git a/src/probabilistic/Topk.h b/src/probabilistic/Topk.h index 52f8a39034..24d05e12af 100644 --- a/src/probabilistic/Topk.h +++ b/src/probabilistic/Topk.h @@ -131,6 +131,8 @@ public: */ Val* DoClone(CloneState* state) override; + DECLARE_OPAQUE_VALUE(TopkVal) + protected: /** * Construct an empty TopkVal. Only used for deserialization diff --git a/testing/btest/Baseline/broker.opaque/.stderr b/testing/btest/Baseline/broker.opaque/.stderr new file mode 100644 index 0000000000..bf07a71a21 --- /dev/null +++ b/testing/btest/Baseline/broker.opaque/.stderr @@ -0,0 +1 @@ +error: incompatible Bloom filter types diff --git a/testing/btest/Baseline/broker.opaque/out b/testing/btest/Baseline/broker.opaque/out new file mode 100644 index 0000000000..f35f4e3284 --- /dev/null +++ b/testing/btest/Baseline/broker.opaque/out @@ -0,0 +1,53 @@ +============ Topk +[b, a, c] +[b, a, c] +============ HLL +3.000069 +3.000069 +============ Bloom +0 +1 +0 +1 +============ Hashes +5b9164ad6f496d9dee12ec7634ce253f +5b9164ad6f496d9dee12ec7634ce253f +30ae97492ce1da88d0e7117ace0a60a6f9e1e0bc +30ae97492ce1da88d0e7117ace0a60a6f9e1e0bc +25b6746d5172ed6352966a013d93ac846e1110d5a25e8f183b5931f4688842a1 +25b6746d5172ed6352966a013d93ac846e1110d5a25e8f183b5931f4688842a1 +============ X509 +[version=3, serial=040000000001154B5AC394, subject=CN=GlobalSign Root CA,OU=Root CA,O=GlobalSign nv-sa,C=BE, issuer=CN=GlobalSign Root CA,OU=Root CA,O=GlobalSign nv-sa,C=BE, cn=GlobalSign Root CA, not_valid_before=904651200.0, not_valid_after=1832673600.0, key_alg=rsaEncryption, sig_alg=sha1WithRSAEncryption, key_type=rsa, key_length=2048, exponent=65537, curve=] +[version=3, serial=040000000001154B5AC394, subject=CN=GlobalSign Root CA,OU=Root CA,O=GlobalSign nv-sa,C=BE, issuer=CN=GlobalSign Root CA,OU=Root CA,O=GlobalSign nv-sa,C=BE, cn=GlobalSign Root CA, not_valid_before=904651200.0, not_valid_after=1832673600.0, key_alg=rsaEncryption, sig_alg=sha1WithRSAEncryption, key_type=rsa, key_length=2048, exponent=65537, curve=] +============ OCSP_RESPVal +============ Entropy +[entropy=4.715374, chi_square=591.981818, mean=75.472727, monte_carlo_pi=4.0, serial_correlation=-0.11027] +[entropy=4.715374, chi_square=591.981818, mean=75.472727, monte_carlo_pi=4.0, serial_correlation=-0.11027] +============ broker::Data +broker::data{{hi, there}} +broker::data{{hi, there}} +T +============ broker::Set +| [data=broker::data{!}] | [data=broker::data{!}] + > [data=broker::data{hi}] | [data=broker::data{hi}] +| [data=broker::data{hi}] | [data=broker::data{hi}] + > [data=broker::data{there}] | [data=broker::data{there}] +| [data=broker::data{there}] | [data=broker::data{there}] +============ broker::Table +| [key=[data=broker::data{!}], val=[data=broker::data{30}]] | [key=[data=broker::data{!}], val=[data=broker::data{30}]] + > [key=[data=broker::data{hi}], val=[data=broker::data{10}]] | [key=[data=broker::data{hi}], val=[data=broker::data{10}]] +| [key=[data=broker::data{hi}], val=[data=broker::data{10}]] | [key=[data=broker::data{hi}], val=[data=broker::data{10}]] + > [key=[data=broker::data{there}], val=[data=broker::data{20}]] | [key=[data=broker::data{there}], val=[data=broker::data{20}]] +| [key=[data=broker::data{there}], val=[data=broker::data{20}]] | [key=[data=broker::data{there}], val=[data=broker::data{20}]] +============ broker::Vector +| [data=broker::data{hi}] | [data=broker::data{hi}] + > [data=broker::data{there}] | [data=broker::data{there}] +| [data=broker::data{there}] | [data=broker::data{there}] + > [data=broker::data{!}] | [data=broker::data{!}] +| [data=broker::data{!}] | [data=broker::data{!}] +============ broker::Record +| [data=broker::data{hi}] | [data=broker::data{hi}] + > [data=broker::data{there}] | [data=broker::data{there}] +| [data=broker::data{there}] | [data=broker::data{there}] + > [data=broker::data{!}] | [data=broker::data{!}] +| [data=broker::data{!}] | [data=broker::data{!}] diff --git a/testing/btest/broker/opaque.zeek b/testing/btest/broker/opaque.zeek new file mode 100644 index 0000000000..f65fc191dd --- /dev/null +++ b/testing/btest/broker/opaque.zeek @@ -0,0 +1,161 @@ +# @TEST-EXEC: zeek -b %INPUT >out +# @TEST-EXEC: btest-diff out +# @TEST-EXEC: btest-diff .stderr + +event zeek_init() + { + print "============ Topk"; + local k1: opaque of topk = topk_init(4); + topk_add(k1, "a"); + topk_add(k1, "b"); + topk_add(k1, "b"); + topk_add(k1, "c"); + local k2 = Broker::__opaque_clone_through_serialization(k1); + print topk_get_top(k1, 5); + topk_add(k1, "shoulnotshowup"); + print topk_get_top(k2, 5); + + print "============ HLL"; + local c1 = hll_cardinality_init(0.01, 0.95); + hll_cardinality_add(c1, 2001); + hll_cardinality_add(c1, 2002); + hll_cardinality_add(c1, 2003); + + print hll_cardinality_estimate(c1); + local c2 = Broker::__opaque_clone_through_serialization(c1); + hll_cardinality_add(c1, 2004); + print hll_cardinality_estimate(c2); + + print "============ Bloom"; + local bf_cnt = bloomfilter_basic_init(0.1, 1000); + bloomfilter_add(bf_cnt, 42); + bloomfilter_add(bf_cnt, 84); + bloomfilter_add(bf_cnt, 168); + print bloomfilter_lookup(bf_cnt, 0); + print bloomfilter_lookup(bf_cnt, 42); + local bf_copy = Broker::__opaque_clone_through_serialization(bf_cnt); + bloomfilter_add(bf_cnt, 0); + print bloomfilter_lookup(bf_copy, 0); + print bloomfilter_lookup(bf_copy, 42); + # check that typefication transfered. + bloomfilter_add(bf_copy, 0.5); # causes stderr output "error: incompatible Bloom filter types" + + print "============ Hashes"; + local md5a = md5_hash_init(); + md5_hash_update(md5a, "one"); + local md5b = Broker::__opaque_clone_through_serialization(md5a); + md5_hash_update(md5a, "two"); + md5_hash_update(md5b, "two"); + print md5_hash_finish(md5a); + print md5_hash_finish(md5b); + + local sha1a = sha1_hash_init(); + sha1_hash_update(sha1a, "one"); + local sha1b = Broker::__opaque_clone_through_serialization(sha1a); + sha1_hash_update(sha1a, "two"); + sha1_hash_update(sha1b, "two"); + print sha1_hash_finish(sha1a); + print sha1_hash_finish(sha1b); + + local sha256a = sha256_hash_init(); + sha256_hash_update(sha256a, "one"); + local sha256b = Broker::__opaque_clone_through_serialization(sha256a); + sha256_hash_update(sha256a, "two"); + sha256_hash_update(sha256b, "two"); + print sha256_hash_finish(sha256a); + print sha256_hash_finish(sha256b); + + print "============ X509"; + local x509 = x509_from_der("\x30\x82\x03\x75\x30\x82\x02\x5D\xA0\x03\x02\x01\x02\x02\x0B\x04\x00\x00\x00\x00\x01\x15\x4B\x5A\xC3\x94\x30\x0D\x06\x09\x2A\x86\x48\x86\xF7\x0D\x01\x01\x05\x05\x00\x30\x57\x31\x0B\x30\x09\x06\x03\x55\x04\x06\x13\x02\x42\x45\x31\x19\x30\x17\x06\x03\x55\x04\x0A\x13\x10\x47\x6C\x6F\x62\x61\x6C\x53\x69\x67\x6E\x20\x6E\x76\x2D\x73\x61\x31\x10\x30\x0E\x06\x03\x55\x04\x0B\x13\x07\x52\x6F\x6F\x74\x20\x43\x41\x31\x1B\x30\x19\x06\x03\x55\x04\x03\x13\x12\x47\x6C\x6F\x62\x61\x6C\x53\x69\x67\x6E\x20\x52\x6F\x6F\x74\x20\x43\x41\x30\x1E\x17\x0D\x39\x38\x30\x39\x30\x31\x31\x32\x30\x30\x30\x30\x5A\x17\x0D\x32\x38\x30\x31\x32\x38\x31\x32\x30\x30\x30\x30\x5A\x30\x57\x31\x0B\x30\x09\x06\x03\x55\x04\x06\x13\x02\x42\x45\x31\x19\x30\x17\x06\x03\x55\x04\x0A\x13\x10\x47\x6C\x6F\x62\x61\x6C\x53\x69\x67\x6E\x20\x6E\x76\x2D\x73\x61\x31\x10\x30\x0E\x06\x03\x55\x04\x0B\x13\x07\x52\x6F\x6F\x74\x20\x43\x41\x31\x1B\x30\x19\x06\x03\x55\x04\x03\x13\x12\x47\x6C\x6F\x62\x61\x6C\x53\x69\x67\x6E\x20\x52\x6F\x6F\x74\x20\x43\x41\x30\x82\x01\x22\x30\x0D\x06\x09\x2A\x86\x48\x86\xF7\x0D\x01\x01\x01\x05\x00\x03\x82\x01\x0F\x00\x30\x82\x01\x0A\x02\x82\x01\x01\x00\xDA\x0E\xE6\x99\x8D\xCE\xA3\xE3\x4F\x8A\x7E\xFB\xF1\x8B\x83\x25\x6B\xEA\x48\x1F\xF1\x2A\xB0\xB9\x95\x11\x04\xBD\xF0\x63\xD1\xE2\x67\x66\xCF\x1C\xDD\xCF\x1B\x48\x2B\xEE\x8D\x89\x8E\x9A\xAF\x29\x80\x65\xAB\xE9\xC7\x2D\x12\xCB\xAB\x1C\x4C\x70\x07\xA1\x3D\x0A\x30\xCD\x15\x8D\x4F\xF8\xDD\xD4\x8C\x50\x15\x1C\xEF\x50\xEE\xC4\x2E\xF7\xFC\xE9\x52\xF2\x91\x7D\xE0\x6D\xD5\x35\x30\x8E\x5E\x43\x73\xF2\x41\xE9\xD5\x6A\xE3\xB2\x89\x3A\x56\x39\x38\x6F\x06\x3C\x88\x69\x5B\x2A\x4D\xC5\xA7\x54\xB8\x6C\x89\xCC\x9B\xF9\x3C\xCA\xE5\xFD\x89\xF5\x12\x3C\x92\x78\x96\xD6\xDC\x74\x6E\x93\x44\x61\xD1\x8D\xC7\x46\xB2\x75\x0E\x86\xE8\x19\x8A\xD5\x6D\x6C\xD5\x78\x16\x95\xA2\xE9\xC8\x0A\x38\xEB\xF2\x24\x13\x4F\x73\x54\x93\x13\x85\x3A\x1B\xBC\x1E\x34\xB5\x8B\x05\x8C\xB9\x77\x8B\xB1\xDB\x1F\x20\x91\xAB\x09\x53\x6E\x90\xCE\x7B\x37\x74\xB9\x70\x47\x91\x22\x51\x63\x16\x79\xAE\xB1\xAE\x41\x26\x08\xC8\x19\x2B\xD1\x46\xAA\x48\xD6\x64\x2A\xD7\x83\x34\xFF\x2C\x2A\xC1\x6C\x19\x43\x4A\x07\x85\xE7\xD3\x7C\xF6\x21\x68\xEF\xEA\xF2\x52\x9F\x7F\x93\x90\xCF\x02\x03\x01\x00\x01\xA3\x42\x30\x40\x30\x0E\x06\x03\x55\x1D\x0F\x01\x01\xFF\x04\x04\x03\x02\x01\x06\x30\x0F\x06\x03\x55\x1D\x13\x01\x01\xFF\x04\x05\x30\x03\x01\x01\xFF\x30\x1D\x06\x03\x55\x1D\x0E\x04\x16\x04\x14\x60\x7B\x66\x1A\x45\x0D\x97\xCA\x89\x50\x2F\x7D\x04\xCD\x34\xA8\xFF\xFC\xFD\x4B\x30\x0D\x06\x09\x2A\x86\x48\x86\xF7\x0D\x01\x01\x05\x05\x00\x03\x82\x01\x01\x00\xD6\x73\xE7\x7C\x4F\x76\xD0\x8D\xBF\xEC\xBA\xA2\xBE\x34\xC5\x28\x32\xB5\x7C\xFC\x6C\x9C\x2C\x2B\xBD\x09\x9E\x53\xBF\x6B\x5E\xAA\x11\x48\xB6\xE5\x08\xA3\xB3\xCA\x3D\x61\x4D\xD3\x46\x09\xB3\x3E\xC3\xA0\xE3\x63\x55\x1B\xF2\xBA\xEF\xAD\x39\xE1\x43\xB9\x38\xA3\xE6\x2F\x8A\x26\x3B\xEF\xA0\x50\x56\xF9\xC6\x0A\xFD\x38\xCD\xC4\x0B\x70\x51\x94\x97\x98\x04\xDF\xC3\x5F\x94\xD5\x15\xC9\x14\x41\x9C\xC4\x5D\x75\x64\x15\x0D\xFF\x55\x30\xEC\x86\x8F\xFF\x0D\xEF\x2C\xB9\x63\x46\xF6\xAA\xFC\xDF\xBC\x69\xFD\x2E\x12\x48\x64\x9A\xE0\x95\xF0\xA6\xEF\x29\x8F\x01\xB1\x15\xB5\x0C\x1D\xA5\xFE\x69\x2C\x69\x24\x78\x1E\xB3\xA7\x1C\x71\x62\xEE\xCA\xC8\x97\xAC\x17\x5D\x8A\xC2\xF8\x47\x86\x6E\x2A\xC4\x56\x31\x95\xD0\x67\x89\x85\x2B\xF9\x6C\xA6\x5D\x46\x9D\x0C\xAA\x82\xE4\x99\x51\xDD\x70\xB7\xDB\x56\x3D\x61\xE4\x6A\xE1\x5C\xD6\xF6\xFE\x3D\xDE\x41\xCC\x07\xAE\x63\x52\xBF\x53\x53\xF4\x2B\xE9\xC7\xFD\xB6\xF7\x82\x5F\x85\xD2\x41\x18\xDB\x81\xB3\x04\x1C\xC5\x1F\xA4\x80\x6F\x15\x20\xC9\xDE\x0C\x88\x0A\x1D\xD6\x66\x55\xE2\xFC\x48\xC9\x29\x26\x69\xE0"); + local x5092 = Broker::__opaque_clone_through_serialization(x509); + print x509_parse(x509); + print x509_parse(x5092); + + print "============ OCSP_RESPVal"; + # TODO: Not sure how to test? + + print "============ Entropy"; + local handle = entropy_test_init(); + entropy_test_add(handle, "dh3Hie02uh^s#Sdf9L3frd243h$d78r2G4cM6*Q05d(7rh46f!0|4-f"); + local handle2 = Broker::__opaque_clone_through_serialization(handle); + print entropy_test_finish(handle); + print entropy_test_finish(handle2); + + print "============ broker::Data"; + local s1: Broker::Data = Broker::set_create(); + Broker::set_insert(s1, "hi"); + Broker::set_insert(s1, "there"); + local d2 = Broker::__opaque_clone_through_serialization(s1$data); + print s1$data; + print d2; + print same_object(s1$data, d2) == F; + + print "============ broker::Set"; + local cs = Broker::set_create(); + Broker::set_insert(cs, "hi"); + Broker::set_insert(cs, "there"); + Broker::set_insert(cs, "!"); + + local i = Broker::set_iterator(cs); + while ( ! Broker::set_iterator_last(i) ) + { + local ci = Broker::__opaque_clone_through_serialization(i); + print fmt("| %s | %s", Broker::set_iterator_value(i), Broker::set_iterator_value(ci)); + Broker::set_iterator_next(i); + Broker::set_iterator_next(ci); + if ( ! Broker::set_iterator_last(i) ) + print fmt(" > %s | %s", Broker::set_iterator_value(i), Broker::set_iterator_value(ci)); + } + + print "============ broker::Table"; + local ct = Broker::table_create(); + Broker::table_insert(ct, "hi", 10); + Broker::table_insert(ct, "there", 20); + Broker::table_insert(ct, "!", 30); + + local j = Broker::table_iterator(ct); + while ( ! Broker::table_iterator_last(j) ) + { + local cj = Broker::__opaque_clone_through_serialization(j); + print fmt("| %s | %s", Broker::table_iterator_value(j), Broker::table_iterator_value(cj)); + Broker::table_iterator_next(j); + Broker::table_iterator_next(cj); + if ( ! Broker::table_iterator_last(j) ) + print fmt(" > %s | %s", Broker::table_iterator_value(j), Broker::table_iterator_value(cj)); + } + + print "============ broker::Vector"; + local cv = Broker::vector_create(); + Broker::vector_insert(cv, 0, "hi"); + Broker::vector_insert(cv, 1, "there"); + Broker::vector_insert(cv, 2, "!"); + + local k = Broker::vector_iterator(cv); + while ( ! Broker::vector_iterator_last(k) ) + { + local ck = Broker::__opaque_clone_through_serialization(k); + print fmt("| %s | %s", Broker::vector_iterator_value(k), Broker::vector_iterator_value(ck)); + Broker::vector_iterator_next(k); + Broker::vector_iterator_next(ck); + if ( ! Broker::vector_iterator_last(k) ) + print fmt(" > %s | %s", Broker::vector_iterator_value(k), Broker::vector_iterator_value(ck)); + } + + print "============ broker::Record"; + local cr = Broker::record_create(3); + Broker::record_assign(cr, 0, "hi"); + Broker::record_assign(cr, 1, "there"); + Broker::record_assign(cr, 2, "!"); + + local l = Broker::record_iterator(cr); + while ( ! Broker::record_iterator_last(l) ) + { + local cl = Broker::__opaque_clone_through_serialization(l); + print fmt("| %s | %s", Broker::record_iterator_value(l), Broker::record_iterator_value(cl)); + Broker::record_iterator_next(l); + Broker::record_iterator_next(cl); + if ( ! Broker::record_iterator_last(l) ) + print fmt(" > %s | %s", Broker::record_iterator_value(l), Broker::record_iterator_value(cl)); + } + + }