From e1520a0d67a9ed7592f6f804ced0f088dbe3447a Mon Sep 17 00:00:00 2001 From: ZekeMedley Date: Tue, 28 May 2019 16:59:50 -0700 Subject: [PATCH 1/8] Initial paraglob integration. --- .gitmodules | 3 + CMakeLists.txt | 4 ++ aux/paraglob | 1 + src/OpaqueVal.cc | 25 ++++++++ src/OpaqueVal.h | 11 ++++ src/Type.h | 1 + src/bro.bif | 59 ++++++++++++++++++- src/input/Manager.cc | 33 ++++++++++- src/main.cc | 2 + src/threading/SerialTypes.h | 1 + src/threading/formatters/Ascii.cc | 22 +++++++ testing/btest/Baseline/language.paraglob/out | 6 ++ .../.stderr | 9 +++ .../out | 9 +++ testing/btest/language/paraglob.zeek | 34 +++++++++++ .../base/frameworks/input/bad_patterns.zeek | 38 ++++++++++++ .../base/frameworks/input/patterns.zeek | 47 +++++++++++++++ 17 files changed, 302 insertions(+), 3 deletions(-) create mode 160000 aux/paraglob create mode 100644 testing/btest/Baseline/language.paraglob/out create mode 100644 testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr create mode 100644 testing/btest/Baseline/scripts.base.frameworks.input.patterns/out create mode 100644 testing/btest/language/paraglob.zeek create mode 100644 testing/btest/scripts/base/frameworks/input/bad_patterns.zeek create mode 100644 testing/btest/scripts/base/frameworks/input/patterns.zeek diff --git a/.gitmodules b/.gitmodules index d151b3d288..1bceead3d6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -28,3 +28,6 @@ [submodule "doc"] path = doc url = https://github.com/zeek/zeek-docs +[submodule "aux/paraglob"] + path = aux/paraglob + url = https://github.com/zeek/paraglob diff --git a/CMakeLists.txt b/CMakeLists.txt index f5edf896c0..a51783711f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -325,6 +325,10 @@ include_directories(BEFORE ${CAF_INCLUDE_DIR_CORE}) include_directories(BEFORE ${CAF_INCLUDE_DIR_IO}) include_directories(BEFORE ${CAF_INCLUDE_DIR_OPENSSL}) +add_subdirectory(aux/paraglob) +set(zeekdeps ${zeekdeps} paraglob) +include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/aux/paraglob) + add_subdirectory(src) add_subdirectory(scripts) add_subdirectory(man) diff --git a/aux/paraglob b/aux/paraglob new file mode 160000 index 0000000000..757e00b651 --- /dev/null +++ b/aux/paraglob @@ -0,0 +1 @@ +Subproject commit 757e00b6510d2b0e92510c9c26f9e3279aa442a4 diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 5b6c9aa483..e1c182ca73 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -869,3 +869,28 @@ void CardinalityVal::Add(const Val* val) c->AddElement(key->Hash()); delete key; } + + +ParaglobVal::ParaglobVal(paraglob::Paraglob* p) +: OpaqueVal(paraglob_type) + { + this->internal_paraglob = p; + } + +VectorVal* ParaglobVal::get(StringVal* &pattern) + { + VectorVal* rval = new VectorVal(internal_type("string_vec")->AsVectorType()); + std::string string_pattern (pattern->CheckString(), pattern->Len()); + std::vector matches = this->internal_paraglob->get(string_pattern); + + for (unsigned int i = 0; i < matches.size(); i++) { + rval->Assign(i, new StringVal(matches.at(i).c_str())); + } + + return rval; + } + +bool ParaglobVal::operator==(const ParaglobVal *other) + { + return (*(this->internal_paraglob) == *(other->internal_paraglob)); + } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 5fe0823436..34e7ae9998 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -8,6 +8,7 @@ #include "RandTest.h" #include "Val.h" #include "digest.h" +#include "src/paraglob.h" namespace probabilistic { class BloomFilter; @@ -188,4 +189,14 @@ private: DECLARE_SERIAL(CardinalityVal); }; +class ParaglobVal : public OpaqueVal { +public: + explicit ParaglobVal(paraglob::Paraglob* p); + VectorVal* get(StringVal* &pattern); + bool operator==(const ParaglobVal *other); + +private: + paraglob::Paraglob* internal_paraglob; +}; + #endif diff --git a/src/Type.h b/src/Type.h index c537bb6203..6cc4f3e84a 100644 --- a/src/Type.h +++ b/src/Type.h @@ -639,6 +639,7 @@ extern OpaqueType* topk_type; extern OpaqueType* bloomfilter_type; extern OpaqueType* x509_opaque_type; extern OpaqueType* ocsp_resp_opaque_type; +extern OpaqueType* paraglob_type; // Returns the Bro basic (non-parameterized) type with the given type. // The reference count of the type is not increased. diff --git a/src/bro.bif b/src/bro.bif index 039053f4f2..972665d8fe 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -789,6 +789,63 @@ function sha256_hash_finish%(handle: opaque of sha256%): string return static_cast(handle)->Get(); %} + +## Initializes and returns a new paraglob. +## +## v: Vector of patterns to initialize the paraglob with. +## +## Returns: A new, compiled, paraglob with the patterns in *v* +## +## .. zeek:see::paraglob_get paraglob_equals paraglob_add +function paraglob_init%(v: any%) : opaque of paraglob + %{ + if ( v->Type()->Tag() != TYPE_VECTOR || + v->Type()->YieldType()->Tag() != TYPE_STRING ) + { + builtin_error("paraglob requires a vector for initialization."); + return nullptr; + } + + std::vector patterns; + VectorVal* vv = v->AsVectorVal(); + for ( unsigned int i = 0; i < vv->Size(); ++i ) + { + const BroString* s = vv->Lookup(i)->AsString(); + patterns.push_back(std::string(s->CheckString(), s->Len())); + } + + return new ParaglobVal(new paraglob::Paraglob(patterns)); + %} + +## Gets all the strings inside the handle associated with an input pattern. +## +## handle: A compiled paraglob. +## pattern: A glob style pattern. +## +## Returns: A vector of strings matching the input pattern +## +## ## .. zeek:see::paraglob_add paraglob_equals paraglob_init +function paraglob_get%(handle: opaque of paraglob, pat: string%): string_vec + %{ + return static_cast(handle)->get(pat); + %} + +## Compares two paraglobs for equality. +## +## p_one: A compiled paraglob. +## p_two: A compiled paraglob. +## +## Returns: True of both paraglobs contain the same patterns, false otherwise. +## +## ## .. zeek:see::paraglob_add paraglob_get paraglob_init +function paraglob_equals%(p_one: opaque of paraglob, p_two: opaque of paraglob%) + : bool + %{ + bool eq = + (static_cast(p_one) == static_cast(p_two)); + return val_mgr->GetBool(eq); + %} + ## Returns 32-bit digest of arbitrary input values using FNV-1a hash algorithm. ## See ``_. ## @@ -3071,7 +3128,7 @@ function strptime%(fmt: string, d: string%) : time const time_t timeval = time_t(); struct tm t; - if ( ! localtime_r(&timeval, &t) || + if ( ! localtime_r(&timeval, &t) || ! strptime(d->CheckString(), fmt->CheckString(), &t) ) { reporter->Warning("strptime conversion failed: fmt:%s d:%s", fmt->CheckString(), d->CheckString()); diff --git a/src/input/Manager.cc b/src/input/Manager.cc index bcd3e84bf3..34e8960193 100644 --- a/src/input/Manager.cc +++ b/src/input/Manager.cc @@ -224,7 +224,7 @@ ReaderBackend* Manager::CreateBackend(ReaderFrontend* frontend, EnumVal* tag) return backend; } -// Create a new input reader object to be used at whomevers leisure lateron. +// Create a new input reader object to be used at whomevers leisure later on. bool Manager::CreateStream(Stream* info, RecordVal* description) { RecordType* rtype = description->Type()->AsRecordType(); @@ -232,7 +232,7 @@ bool Manager::CreateStream(Stream* info, RecordVal* description) || same_type(rtype, BifType::Record::Input::EventDescription, 0) || same_type(rtype, BifType::Record::Input::AnalysisDescription, 0) ) ) { - reporter->Error("Streamdescription argument not of right type for new input stream"); + reporter->Error("Stream description argument not of right type for new input stream"); return false; } @@ -824,6 +824,7 @@ bool Manager::IsCompatibleType(BroType* t, bool atomic_only) case TYPE_INTERVAL: case TYPE_ENUM: case TYPE_STRING: + case TYPE_PATTERN: return true; case TYPE_RECORD: @@ -2074,6 +2075,12 @@ int Manager::GetValueLength(const Value* val) const } break; + case TYPE_PATTERN: + { + length += strlen(val->val.pattern_text_val) + 1; + break; + } + case TYPE_TABLE: { for ( int i = 0; i < val->val.set_val.size; i++ ) @@ -2193,6 +2200,14 @@ int Manager::CopyValue(char *data, const int startpos, const Value* val) const return length; } + case TYPE_PATTERN: + { + // include null-terminator + int length = strlen(val->val.pattern_text_val) + 1; + memcpy(data + startpos, val->val.pattern_text_val, length); + return length; + } + case TYPE_TABLE: { int length = 0; @@ -2350,6 +2365,13 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, BroType* request_typ return subnetval; } + case TYPE_PATTERN: + { + RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val); + re->Compile(); + return new PatternVal(re); + } + case TYPE_TABLE: { // all entries have to have the same type... @@ -2492,6 +2514,13 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, bool& have_error) co return subnetval; } + case TYPE_PATTERN: + { + RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val); + re->Compile(); + return new PatternVal(re); + } + case TYPE_TABLE: { TypeList* set_index; diff --git a/src/main.cc b/src/main.cc index 10026eea7e..c3c0a2ca6d 100644 --- a/src/main.cc +++ b/src/main.cc @@ -122,6 +122,7 @@ OpaqueType* topk_type = 0; OpaqueType* bloomfilter_type = 0; OpaqueType* x509_opaque_type = 0; OpaqueType* ocsp_resp_opaque_type = 0; +OpaqueType* paraglob_type = 0; // Keep copy of command line int bro_argc; @@ -809,6 +810,7 @@ int main(int argc, char** argv) bloomfilter_type = new OpaqueType("bloomfilter"); x509_opaque_type = new OpaqueType("x509"); ocsp_resp_opaque_type = new OpaqueType("ocsp_resp"); + paraglob_type = new OpaqueType("paraglob"); // The leak-checker tends to produce some false // positives (memory which had already been diff --git a/src/threading/SerialTypes.h b/src/threading/SerialTypes.h index 65bb79b659..b9a9c6c718 100644 --- a/src/threading/SerialTypes.h +++ b/src/threading/SerialTypes.h @@ -126,6 +126,7 @@ struct Value { vec_t vector_val; addr_t addr_val; subnet_t subnet_val; + const char* pattern_text_val; struct { char* data; diff --git a/src/threading/formatters/Ascii.cc b/src/threading/formatters/Ascii.cc index 147305485b..fde6fa9380 100644 --- a/src/threading/formatters/Ascii.cc +++ b/src/threading/formatters/Ascii.cc @@ -325,6 +325,28 @@ threading::Value* Ascii::ParseValue(const string& s, const string& name, TypeTag break; } + case TYPE_PATTERN: + { + string cannidate = get_unescaped_string(s); + // A string is a cannidate pattern iff it begins and ends with + // a '/'. Rather or not the rest of the string is legal will + // be determined later when it is given to the RE engine. + if ( cannidate.size() >= 2 ) + { + if ( cannidate.front() == cannidate.back() && + cannidate.back() == '/' ) + { + // Remove the '/'s + cannidate.erase(0, 1); + cannidate.erase(cannidate.size() - 1); + val->val.pattern_text_val = copy_string(cannidate.c_str()); + break; + } + } + GetThread()->Error(GetThread()->Fmt("String '%s' contained no parseable pattern.", cannidate.c_str())); + goto parse_error; + } + case TYPE_TABLE: case TYPE_VECTOR: // First - common initialization diff --git a/testing/btest/Baseline/language.paraglob/out b/testing/btest/Baseline/language.paraglob/out new file mode 100644 index 0000000000..d375f0c6b6 --- /dev/null +++ b/testing/btest/Baseline/language.paraglob/out @@ -0,0 +1,6 @@ +[T, T, T, T, T] +T +[*, *og, d?g, d[!wl]g] +[once] +[*.gov*, *malware*] +[*.gov*, *malware*] diff --git a/testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr b/testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr new file mode 100644 index 0000000000..e0a7be2cc3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr @@ -0,0 +1,9 @@ +error: input.log/Input::READER_ASCII: String '/cat/sss' contained no parseable pattern. +warning: input.log/Input::READER_ASCII: Could not convert line '2 /cat/sss' of input.log to Val. Ignoring line. +error: input.log/Input::READER_ASCII: String '/foo|bar' contained no parseable pattern. +warning: input.log/Input::READER_ASCII: Could not convert line '3 /foo|bar' of input.log to Val. Ignoring line. +error: input.log/Input::READER_ASCII: String 'this is not a pattern' contained no parseable pattern. +warning: input.log/Input::READER_ASCII: Could not convert line '4 this is not a pattern' of input.log to Val. Ignoring line. +error: input.log/Input::READER_ASCII: String '/5' contained no parseable pattern. +warning: input.log/Input::READER_ASCII: Could not convert line '5 /5' of input.log to Val. Ignoring line. +received termination signal diff --git a/testing/btest/Baseline/scripts.base.frameworks.input.patterns/out b/testing/btest/Baseline/scripts.base.frameworks.input.patterns/out new file mode 100644 index 0000000000..9852d0d5d5 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.input.patterns/out @@ -0,0 +1,9 @@ +T +F +T +{ +[2] = [p=/^?(cat)$?/], +[4] = [p=/^?(^oob)$?/], +[1] = [p=/^?(dog)$?/], +[3] = [p=/^?(foo|bar)$?/] +} diff --git a/testing/btest/language/paraglob.zeek b/testing/btest/language/paraglob.zeek new file mode 100644 index 0000000000..b5a2dcba11 --- /dev/null +++ b/testing/btest/language/paraglob.zeek @@ -0,0 +1,34 @@ +# @TEST-EXEC: bro -b %INPUT >out +# @TEST-EXEC: btest-diff out + +event zeek_init () +{ + local v1 = vector("*", "d?g", "*og", "d?", "d[!wl]g"); + local v2 = vector("once", "!o*", "once"); + local v3 = vector("https://*.google.com/*", "*malware*", "*.gov*"); + + local p1 = paraglob_init(v1); + local p2: opaque of paraglob = paraglob_init(v2); + local p3 = paraglob_init(v3); + local p_eq = paraglob_init(v1); + + # paraglob_init should not modify v1 + print (v1 == vector("*", "d?g", "*og", "d?", "d[!wl]g")); + # p_eq and p1 should be the same paraglobs + print paraglob_equals(p1, p_eq); + + print paraglob_get(p1, "dog"); + print paraglob_get(p2, "once"); + print paraglob_get(p3, "www.strange-malware-domain.gov"); + + # This looks like a lot, but really should complete quickly. + # Paraglob should stop addition of duplicate patterns. + local i = 1000000; + while (i > 0) { + i = i - 1; + v3 += v3[1]; + } + + local large_glob: opaque of paraglob = paraglob_init(v3); + print paraglob_get(large_glob, "www.strange-malware-domain.gov"); +} diff --git a/testing/btest/scripts/base/frameworks/input/bad_patterns.zeek b/testing/btest/scripts/base/frameworks/input/bad_patterns.zeek new file mode 100644 index 0000000000..23d25b516b --- /dev/null +++ b/testing/btest/scripts/base/frameworks/input/bad_patterns.zeek @@ -0,0 +1,38 @@ +# @TEST-EXEC: zeek -b %INPUT +# @TEST-EXEC: btest-diff .stderr + +@TEST-START-FILE input.log +#separator \x09 +#fields i p +#types count pattern +1 /d/og/ +2 /cat/sss +3 /foo|bar +4 this is not a pattern +5 /5 +@TEST-END-FILE + +redef exit_only_after_terminate = T; + +module A; + +type Idx: record { + i: int; +}; + +type Val: record { + p: pattern; +}; + +event kill_me() + { + terminate(); + } + +global pats: table[int] of Val = table(); + +event zeek_init() + { + Input::add_table([$source="input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]); + schedule 10msec { kill_me() }; + } diff --git a/testing/btest/scripts/base/frameworks/input/patterns.zeek b/testing/btest/scripts/base/frameworks/input/patterns.zeek new file mode 100644 index 0000000000..eeed7ac602 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/input/patterns.zeek @@ -0,0 +1,47 @@ +# @TEST-EXEC: btest-bg-run zeek zeek -b %INPUT +# @TEST-EXEC: btest-bg-wait 10 + + +redef exit_only_after_terminate = T; + +@TEST-START-FILE input.log +#separator \x09 +#fields i p +#types count pattern +1 /dog/ +2 /cat/ +3 /foo|bar/ +4 /^oob/ +@TEST-END-FILE + +global outfile: file; + +module A; + +type Idx: record { + i: int; +}; + +type Val: record { + p: pattern; +}; + +global pats: table[int] of Val = table(); + +event zeek_init() + { + outfile = open("../out"); + # first read in the old stuff into the table... + Input::add_table([$source="../input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]); + } + +event Input::end_of_data(name: string, source:string) + { + print outfile, (pats[3]$p in "foobar"); # T + print outfile, (pats[4]$p in "foobar"); # F + print outfile, (pats[3]$p == "foo"); # T + print outfile, pats; + Input::remove("pats"); + close(outfile); + terminate(); + } From 42b1f4fd2eededf8756d5bdf211819a8defeff55 Mon Sep 17 00:00:00 2001 From: ZekeMedley Date: Wed, 29 May 2019 14:18:20 -0700 Subject: [PATCH 2/8] Make paraglob serializable and copyable. --- aux/paraglob | 2 +- src/IPAddr.cc | 1 - src/OpaqueVal.cc | 48 ++++++++- src/OpaqueVal.h | 19 +++- src/SerialTypes.h | 4 + src/SerializationFormat.cc | 22 +++- src/SerializationFormat.h | 5 + src/Serializer.h | 3 + src/Val.cc | 2 +- src/bro.bif | 21 ++-- src/broker/Manager.cc | 66 +++++------- src/input/Manager.cc | 33 +----- src/input/readers/ascii/Ascii.cc | 14 ++- src/input/readers/config/Config.cc | 14 ++- src/threading/SerialTypes.h | 1 - src/threading/formatters/Ascii.cc | 22 ---- .../Baseline/language.copy-all-opaques/out | 3 + .../recv.recv.out | 12 +++ .../send.send.out | 11 ++ .../.stderr | 9 -- .../out | 9 -- testing/btest/language/copy-all-opaques.zeek | 8 ++ .../language/paraglob-serialization.zeek | 102 ++++++++++++++++++ testing/btest/language/paraglob.zeek | 4 +- .../base/frameworks/input/bad_patterns.zeek | 38 ------- .../base/frameworks/input/patterns.zeek | 47 -------- 26 files changed, 295 insertions(+), 225 deletions(-) create mode 100644 testing/btest/Baseline/language.paraglob-serialization/recv.recv.out create mode 100644 testing/btest/Baseline/language.paraglob-serialization/send.send.out delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr delete mode 100644 testing/btest/Baseline/scripts.base.frameworks.input.patterns/out create mode 100644 testing/btest/language/paraglob-serialization.zeek delete mode 100644 testing/btest/scripts/base/frameworks/input/bad_patterns.zeek delete mode 100644 testing/btest/scripts/base/frameworks/input/patterns.zeek diff --git a/aux/paraglob b/aux/paraglob index 757e00b651..bdff7b7634 160000 --- a/aux/paraglob +++ b/aux/paraglob @@ -1 +1 @@ -Subproject commit 757e00b6510d2b0e92510c9c26f9e3279aa442a4 +Subproject commit bdff7b76349fa740f049e794d3f7881a0d65c766 diff --git a/src/IPAddr.cc b/src/IPAddr.cc index c215b463b9..76aa34f79a 100644 --- a/src/IPAddr.cc +++ b/src/IPAddr.cc @@ -281,4 +281,3 @@ string IPPrefix::AsString() const return prefix.AsString() +"/" + l; } - diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index e1c182ca73..234a9cfe81 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -871,18 +871,18 @@ void CardinalityVal::Add(const Val* val) } -ParaglobVal::ParaglobVal(paraglob::Paraglob* p) +ParaglobVal::ParaglobVal(std::unique_ptr p) : OpaqueVal(paraglob_type) { - this->internal_paraglob = p; + this->internal_paraglob = std::move(p); } VectorVal* ParaglobVal::get(StringVal* &pattern) { VectorVal* rval = new VectorVal(internal_type("string_vec")->AsVectorType()); std::string string_pattern (pattern->CheckString(), pattern->Len()); - std::vector matches = this->internal_paraglob->get(string_pattern); + std::vector matches = this->internal_paraglob->get(string_pattern); for (unsigned int i = 0; i < matches.size(); i++) { rval->Assign(i, new StringVal(matches.at(i).c_str())); } @@ -890,7 +890,45 @@ VectorVal* ParaglobVal::get(StringVal* &pattern) return rval; } -bool ParaglobVal::operator==(const ParaglobVal *other) +bool ParaglobVal::operator==(const ParaglobVal& other) const { - return (*(this->internal_paraglob) == *(other->internal_paraglob)); + return *(this->internal_paraglob) == *(other.internal_paraglob); + } + +IMPLEMENT_SERIAL(ParaglobVal, SER_PARAGLOB_VAL) + +bool ParaglobVal::DoSerialize(SerialInfo* info) const + { + DO_SERIALIZE(SER_PARAGLOB_VAL, OpaqueVal) + + std::unique_ptr> iv = this->internal_paraglob->serialize(); + + return SERIALIZE(iv.get()); + } + +bool ParaglobVal::DoUnserialize(UnserialInfo* info) + { + DO_UNSERIALIZE(OpaqueVal) + + std::unique_ptr> iv (new std::vector); + + bool success = UNSERIALIZE(iv.get()); + + try { + this->internal_paraglob = build_unique(std::move(iv)); + } catch (const paraglob::underflow_error& e) { + reporter->Error(e.what()); + return false; + } catch (const paraglob::overflow_error& e) { + reporter->Error(e.what()); + return false; + } + + return success; + } + +Val* ParaglobVal::DoClone(CloneState* state) + { + return new ParaglobVal + (build_unique(this->internal_paraglob->serialize())); } diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index 34e7ae9998..17a9dd1918 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -4,6 +4,7 @@ #define OPAQUEVAL_H #include +#include // std::unique_ptr #include "RandTest.h" #include "Val.h" @@ -191,12 +192,24 @@ private: class ParaglobVal : public OpaqueVal { public: - explicit ParaglobVal(paraglob::Paraglob* p); + explicit ParaglobVal(std::unique_ptr p); VectorVal* get(StringVal* &pattern); - bool operator==(const ParaglobVal *other); + Val* DoClone(CloneState* state) override; + bool operator==(const ParaglobVal& other) const; + +protected: + ParaglobVal() : OpaqueVal(paraglob_type) {} private: - paraglob::Paraglob* internal_paraglob; + std::unique_ptr internal_paraglob; + // Small convenience function. Does what std::make_unique does in C++14. Wont + // work on arrays. + template + std::unique_ptr build_unique (Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); + } + + DECLARE_SERIAL(ParaglobVal) }; #endif diff --git a/src/SerialTypes.h b/src/SerialTypes.h index 029048a80f..15e0e0b3dc 100644 --- a/src/SerialTypes.h +++ b/src/SerialTypes.h @@ -53,6 +53,7 @@ SERIAL_IS(BITVECTOR, 0x1500) SERIAL_IS(COUNTERVECTOR, 0x1600) SERIAL_IS(BLOOMFILTER, 0x1700) SERIAL_IS(HASHER, 0x1800) +SERIAL_IS(PARAGLOB, 0x1900) // These are the externally visible types. const SerialType SER_NONE = 0; @@ -116,6 +117,7 @@ SERIAL_VAL(X509_VAL, 23) SERIAL_VAL(COMM_STORE_HANDLE_VAL, 24) SERIAL_VAL(COMM_DATA_VAL, 25) SERIAL_VAL(OCSP_RESP_VAL, 26) +SERIAL_VAL(PARAGLOB_VAL, 27) #define SERIAL_EXPR(name, val) SERIAL_CONST(name, val, EXPR) SERIAL_EXPR(EXPR, 1) @@ -224,6 +226,8 @@ SERIAL_HASHER(HASHER, 1) SERIAL_HASHER(DEFAULTHASHER, 2) SERIAL_HASHER(DOUBLEHASHER, 3) +SERIAL_CONST(PARAGLOB, 1, PARAGLOB) + SERIAL_CONST2(ID) SERIAL_CONST2(STATE_ACCESS) SERIAL_CONST2(CASE) diff --git a/src/SerializationFormat.cc b/src/SerializationFormat.cc index d5f366f7fd..14341a84a6 100644 --- a/src/SerializationFormat.cc +++ b/src/SerializationFormat.cc @@ -233,6 +233,20 @@ bool BinarySerializationFormat::Read(string* v, const char* tag) return true; } +bool BinarySerializationFormat::Read (vector* v, const char* tag) + { + uint64_t size = 0; + if ( ! Read(&size, "vec-size")) + return false; + + v->resize(size); + uint8_t* data = v->data(); + if ( ! ReadData(data, size*sizeof(uint8_t)) ) + return false; + + return true; + } + bool BinarySerializationFormat::Read(IPAddr* addr, const char* tag) { int n = 0; @@ -367,6 +381,13 @@ bool BinarySerializationFormat::Write(const string& s, const char* tag) return Write(s.data(), s.size(), tag); } +bool BinarySerializationFormat::Write (const vector* v, const char* tag) + { + uint64_t size = v->size(); + bool valid = Write(size, "vec-size"); + return valid && WriteData(v->data(), size); + } + bool BinarySerializationFormat::Write(const IPAddr& addr, const char* tag) { const uint32_t* raw; @@ -435,4 +456,3 @@ bool BinarySerializationFormat::Write(const char* buf, int len, const char* tag) uint32 l = htonl(len); return WriteData(&l, sizeof(l)) && WriteData(buf, len); } - diff --git a/src/SerializationFormat.h b/src/SerializationFormat.h index 43e6fde693..56d8b9777c 100644 --- a/src/SerializationFormat.h +++ b/src/SerializationFormat.h @@ -31,6 +31,7 @@ public: virtual bool Read(bool* v, const char* tag) = 0; virtual bool Read(double* d, const char* tag) = 0; virtual bool Read(string* s, const char* tag) = 0; + virtual bool Read(vector* v, const char* tag) = 0; virtual bool Read(IPAddr* addr, const char* tag) = 0; virtual bool Read(IPPrefix* prefix, const char* tag) = 0; virtual bool Read(struct in_addr* addr, const char* tag) = 0; @@ -65,6 +66,7 @@ public: virtual bool Write(const char* s, const char* tag) = 0; virtual bool Write(const char* buf, int len, const char* tag) = 0; virtual bool Write(const string& s, const char* tag) = 0; + virtual bool Write(const vector* v, const char* tag) = 0; virtual bool Write(const IPAddr& addr, const char* tag) = 0; virtual bool Write(const IPPrefix& prefix, const char* tag) = 0; virtual bool Write(const struct in_addr& addr, const char* tag) = 0; @@ -110,10 +112,12 @@ public: bool Read(double* d, const char* tag) override; bool Read(char** str, int* len, const char* tag) override; bool Read(string* s, const char* tag) override; + bool Read(vector* v, const char* tag) override; bool Read(IPAddr* addr, const char* tag) override; bool Read(IPPrefix* prefix, const char* tag) override; bool Read(struct in_addr* addr, const char* tag) override; bool Read(struct in6_addr* addr, const char* tag) override; + bool Write(int v, const char* tag) override; bool Write(uint16 v, const char* tag) override; bool Write(uint32 v, const char* tag) override; @@ -125,6 +129,7 @@ public: bool Write(const char* s, const char* tag) override; bool Write(const char* buf, int len, const char* tag) override; bool Write(const string& s, const char* tag) override; + bool Write(const vector* v, const char* tag) override; bool Write(const IPAddr& addr, const char* tag) override; bool Write(const IPPrefix& prefix, const char* tag) override; bool Write(const struct in_addr& addr, const char* tag) override; diff --git a/src/Serializer.h b/src/Serializer.h index 2c30ef5443..2f455d2022 100644 --- a/src/Serializer.h +++ b/src/Serializer.h @@ -69,6 +69,7 @@ public: { return format->Read(const_cast(str), len, tag); } bool Read(string* s, const char* tag); + bool Read(vector* v, const char* tag) { return format->Read(v, tag); } bool Read(IPAddr* a, const char* tag) { return format->Read(a, tag); } bool Read(IPPrefix* p, const char* tag) { return format->Read(p, tag); } @@ -78,6 +79,8 @@ public: { return format->Write(buf, len, tag); } bool Write(const string& s, const char* tag) { return format->Write(s.data(), s.size(), tag); } + bool Write(const vector* v, const char* tag) + { return format->Write(v, tag); } bool Write(const IPAddr& a, const char* tag) { return format->Write(a, tag); } bool Write(const IPPrefix& p, const char* tag) { return format->Write(p, tag); } diff --git a/src/Val.cc b/src/Val.cc index 50c36d7239..0fb8ea0b7c 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -212,7 +212,7 @@ Val* Val::Unserialize(UnserialInfo* info, TypeTag type, const BroType* exact_typ } } - + return v; } diff --git a/src/bro.bif b/src/bro.bif index 972665d8fe..d08f9c5792 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -802,7 +802,7 @@ function paraglob_init%(v: any%) : opaque of paraglob if ( v->Type()->Tag() != TYPE_VECTOR || v->Type()->YieldType()->Tag() != TYPE_STRING ) { - builtin_error("paraglob requires a vector for initialization."); + builtin_error("paraglob requires a vector of strings for initialization."); return nullptr; } @@ -814,7 +814,17 @@ function paraglob_init%(v: any%) : opaque of paraglob patterns.push_back(std::string(s->CheckString(), s->Len())); } - return new ParaglobVal(new paraglob::Paraglob(patterns)); + try + { + std::unique_ptr p (new paraglob::Paraglob(patterns)); + return new ParaglobVal(std::move(p)); + } + // Thrown if paraglob fails to add a pattern. + catch (const paraglob::add_error& e) + { + builtin_error(e.what()); + return nullptr; + } %} ## Gets all the strings inside the handle associated with an input pattern. @@ -835,15 +845,14 @@ function paraglob_get%(handle: opaque of paraglob, pat: string%): string_vec ## p_one: A compiled paraglob. ## p_two: A compiled paraglob. ## -## Returns: True of both paraglobs contain the same patterns, false otherwise. +## Returns: True if both paraglobs contain the same patterns, false otherwise. ## ## ## .. zeek:see::paraglob_add paraglob_get paraglob_init function paraglob_equals%(p_one: opaque of paraglob, p_two: opaque of paraglob%) : bool %{ - bool eq = - (static_cast(p_one) == static_cast(p_two)); - return val_mgr->GetBool(eq); + return val_mgr->GetBool( + *(static_cast(p_one)) == *(static_cast(p_two))); %} ## Returns 32-bit digest of arbitrary input values using FNV-1a hash algorithm. diff --git a/src/broker/Manager.cc b/src/broker/Manager.cc index 9baf20ef02..f5e374e239 100644 --- a/src/broker/Manager.cc +++ b/src/broker/Manager.cc @@ -83,17 +83,17 @@ struct scoped_reporter_location { }; #ifdef DEBUG -static std::string RenderMessage(std::string topic, broker::data x) +static std::string RenderMessage(std::string topic, const broker::data& x) { return fmt("%s -> %s", broker::to_string(x).c_str(), topic.c_str()); } -static std::string RenderEvent(std::string topic, std::string name, broker::data args) +static std::string RenderEvent(std::string topic, std::string name, const broker::data& args) { return fmt("%s(%s) -> %s", name.c_str(), broker::to_string(args).c_str(), topic.c_str()); } -static std::string RenderMessage(broker::store::response x) +static std::string RenderMessage(const broker::store::response& x) { return fmt("%s [id %" PRIu64 "]", (x.answer ? broker::to_string(*x.answer).c_str() : ""), x.id); } @@ -358,7 +358,7 @@ bool Manager::PublishEvent(string topic, std::string name, broker::vector args) DBG_LOG(DBG_BROKER, "Publishing event: %s", RenderEvent(topic, name, args).c_str()); broker::zeek::Event ev(std::move(name), std::move(args)); - bstate->endpoint.publish(move(topic), std::move(ev)); + bstate->endpoint.publish(move(topic), ev.move_data()); ++statistics.num_events_outgoing; return true; } @@ -420,8 +420,8 @@ bool Manager::PublishIdentifier(std::string topic, std::string id) broker::zeek::IdentifierUpdate msg(move(id), move(*data)); DBG_LOG(DBG_BROKER, "Publishing id-update: %s", - RenderMessage(topic, msg).c_str()); - bstate->endpoint.publish(move(topic), move(msg)); + RenderMessage(topic, msg.as_data()).c_str()); + bstate->endpoint.publish(move(topic), msg.move_data()); ++statistics.num_ids_outgoing; return true; } @@ -471,14 +471,14 @@ bool Manager::PublishLogCreate(EnumVal* stream, EnumVal* writer, auto bwriter_id = broker::enum_value(move(writer_id)); broker::zeek::LogCreate msg(move(bstream_id), move(bwriter_id), move(writer_info), move(fields_data)); - DBG_LOG(DBG_BROKER, "Publishing log creation: %s", RenderMessage(topic, msg).c_str()); + DBG_LOG(DBG_BROKER, "Publishing log creation: %s", RenderMessage(topic, msg.as_data()).c_str()); if ( peer.node != NoPeer.node ) // Direct message. - bstate->endpoint.publish(peer, move(topic), move(msg)); + bstate->endpoint.publish(peer, move(topic), msg.move_data()); else // Broadcast. - bstate->endpoint.publish(move(topic), move(msg)); + bstate->endpoint.publish(move(topic), msg.move_data()); return true; } @@ -560,7 +560,7 @@ bool Manager::PublishLogWrite(EnumVal* stream, EnumVal* writer, string path, int broker::zeek::LogWrite msg(move(bstream_id), move(bwriter_id), move(path), move(serial_data)); - DBG_LOG(DBG_BROKER, "Buffering log record: %s", RenderMessage(topic, msg).c_str()); + DBG_LOG(DBG_BROKER, "Buffering log record: %s", RenderMessage(topic, msg.as_data()).c_str()); if ( log_buffers.size() <= (unsigned int)stream_id_num ) log_buffers.resize(stream_id_num + 1); @@ -568,7 +568,7 @@ bool Manager::PublishLogWrite(EnumVal* stream, EnumVal* writer, string path, int auto& lb = log_buffers[stream_id_num]; ++lb.message_count; auto& pending_batch = lb.msgs[topic]; - pending_batch.emplace_back(std::move(msg)); + pending_batch.emplace_back(msg.move_data()); if ( lb.message_count >= log_batch_size || (network_time - lb.last_flush >= log_batch_interval ) ) @@ -594,7 +594,7 @@ size_t Manager::LogBuffer::Flush(broker::endpoint& endpoint, size_t log_batch_si batch.reserve(log_batch_size + 1); pending_batch.swap(batch); broker::zeek::Batch msg(std::move(batch)); - endpoint.publish(topic, move(msg)); + endpoint.publish(topic, msg.move_data()); } auto rval = message_count; @@ -805,15 +805,8 @@ bool Manager::Unsubscribe(const string& topic_prefix) void Manager::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, iosource::FD_Set* except) { - if ( bstate->status_subscriber.available() || bstate->subscriber.available() ) - SetIdle(false); - read->Insert(bstate->subscriber.fd()); read->Insert(bstate->status_subscriber.fd()); - write->Insert(bstate->subscriber.fd()); - write->Insert(bstate->status_subscriber.fd()); - except->Insert(bstate->subscriber.fd()); - except->Insert(bstate->status_subscriber.fd()); for ( auto& x : data_stores ) read->Insert(x.second->proxy.mailbox().descriptor()); @@ -821,19 +814,10 @@ void Manager::GetFds(iosource::FD_Set* read, iosource::FD_Set* write, double Manager::NextTimestamp(double* local_network_time) { - if ( ! IsIdle() ) - return timer_mgr->Time(); - - if ( bstate->status_subscriber.available() || bstate->subscriber.available() ) - return timer_mgr->Time(); - - for ( auto& s : data_stores ) - { - if ( ! s.second->proxy.mailbox().empty() ) - return timer_mgr->Time(); - } - - return -1; + // We're only asked for a timestamp if either (1) a FD was ready + // or (2) we're not idle (and we go idle if when Process is no-op), + // so there's no case where returning -1 to signify a skip will help. + return timer_mgr->Time(); } void Manager::DispatchMessage(const broker::topic& topic, broker::data msg) @@ -933,11 +917,15 @@ void Manager::Process() for ( auto& s : data_stores ) { - while ( ! s.second->proxy.mailbox().empty() ) + auto num_available = s.second->proxy.mailbox().size(); + + if ( num_available > 0 ) { had_input = true; - auto response = s.second->proxy.receive(); - ProcessStoreResponse(s.second, move(response)); + auto responses = s.second->proxy.receive(num_available); + + for ( auto& r : responses ) + ProcessStoreResponse(s.second, move(r)); } } @@ -975,7 +963,7 @@ void Manager::ProcessEvent(const broker::topic& topic, broker::zeek::Event ev) if ( ! ev.valid() ) { reporter->Warning("received invalid broker Event: %s", - broker::to_string(ev).data()); + broker::to_string(ev.as_data()).data()); return; } @@ -1048,7 +1036,7 @@ void Manager::ProcessEvent(const broker::topic& topic, broker::zeek::Event ev) bool bro_broker::Manager::ProcessLogCreate(broker::zeek::LogCreate lc) { - DBG_LOG(DBG_BROKER, "Received log-create: %s", RenderMessage(lc).c_str()); + DBG_LOG(DBG_BROKER, "Received log-create: %s", RenderMessage(lc.as_data()).c_str()); if ( ! lc.valid() ) { reporter->Warning("received invalid broker LogCreate: %s", @@ -1118,7 +1106,7 @@ bool bro_broker::Manager::ProcessLogCreate(broker::zeek::LogCreate lc) bool bro_broker::Manager::ProcessLogWrite(broker::zeek::LogWrite lw) { - DBG_LOG(DBG_BROKER, "Received log-write: %s", RenderMessage(lw).c_str()); + DBG_LOG(DBG_BROKER, "Received log-write: %s", RenderMessage(lw.as_data()).c_str()); if ( ! lw.valid() ) { @@ -1205,7 +1193,7 @@ bool bro_broker::Manager::ProcessLogWrite(broker::zeek::LogWrite lw) bool Manager::ProcessIdentifierUpdate(broker::zeek::IdentifierUpdate iu) { - DBG_LOG(DBG_BROKER, "Received id-update: %s", RenderMessage(iu).c_str()); + DBG_LOG(DBG_BROKER, "Received id-update: %s", RenderMessage(iu.as_data()).c_str()); if ( ! iu.valid() ) { diff --git a/src/input/Manager.cc b/src/input/Manager.cc index 34e8960193..bcd3e84bf3 100644 --- a/src/input/Manager.cc +++ b/src/input/Manager.cc @@ -224,7 +224,7 @@ ReaderBackend* Manager::CreateBackend(ReaderFrontend* frontend, EnumVal* tag) return backend; } -// Create a new input reader object to be used at whomevers leisure later on. +// Create a new input reader object to be used at whomevers leisure lateron. bool Manager::CreateStream(Stream* info, RecordVal* description) { RecordType* rtype = description->Type()->AsRecordType(); @@ -232,7 +232,7 @@ bool Manager::CreateStream(Stream* info, RecordVal* description) || same_type(rtype, BifType::Record::Input::EventDescription, 0) || same_type(rtype, BifType::Record::Input::AnalysisDescription, 0) ) ) { - reporter->Error("Stream description argument not of right type for new input stream"); + reporter->Error("Streamdescription argument not of right type for new input stream"); return false; } @@ -824,7 +824,6 @@ bool Manager::IsCompatibleType(BroType* t, bool atomic_only) case TYPE_INTERVAL: case TYPE_ENUM: case TYPE_STRING: - case TYPE_PATTERN: return true; case TYPE_RECORD: @@ -2075,12 +2074,6 @@ int Manager::GetValueLength(const Value* val) const } break; - case TYPE_PATTERN: - { - length += strlen(val->val.pattern_text_val) + 1; - break; - } - case TYPE_TABLE: { for ( int i = 0; i < val->val.set_val.size; i++ ) @@ -2200,14 +2193,6 @@ int Manager::CopyValue(char *data, const int startpos, const Value* val) const return length; } - case TYPE_PATTERN: - { - // include null-terminator - int length = strlen(val->val.pattern_text_val) + 1; - memcpy(data + startpos, val->val.pattern_text_val, length); - return length; - } - case TYPE_TABLE: { int length = 0; @@ -2365,13 +2350,6 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, BroType* request_typ return subnetval; } - case TYPE_PATTERN: - { - RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val); - re->Compile(); - return new PatternVal(re); - } - case TYPE_TABLE: { // all entries have to have the same type... @@ -2514,13 +2492,6 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, bool& have_error) co return subnetval; } - case TYPE_PATTERN: - { - RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val); - re->Compile(); - return new PatternVal(re); - } - case TYPE_TABLE: { TypeList* set_index; diff --git a/src/input/readers/ascii/Ascii.cc b/src/input/readers/ascii/Ascii.cc index e2b4b81714..7003c519a0 100644 --- a/src/input/readers/ascii/Ascii.cc +++ b/src/input/readers/ascii/Ascii.cc @@ -305,11 +305,15 @@ bool Ascii::DoUpdate() // no change return true; + // Warn again in case of trouble if the file changes. The comparison to 0 + // is to suppress an extra warning that we'd otherwise get on the initial + // inode assignment. + if ( ino != 0 ) + suppress_warnings = false; + mtime = sb.st_mtime; ino = sb.st_ino; - // file changed. reread. - - // fallthrough + // File changed. Fall through to re-read. } case MODE_MANUAL: @@ -470,8 +474,8 @@ bool Ascii::DoHeartbeat(double network_time, double current_time) case MODE_REREAD: case MODE_STREAM: - Update(); // call update and not DoUpdate, because update - // checks disabled. + Update(); // Call Update, not DoUpdate, because Update + // checks the "disabled" flag. break; default: diff --git a/src/input/readers/config/Config.cc b/src/input/readers/config/Config.cc index eca276281c..4f138c8828 100644 --- a/src/input/readers/config/Config.cc +++ b/src/input/readers/config/Config.cc @@ -151,11 +151,15 @@ bool Config::DoUpdate() // no change return true; + // Warn again in case of trouble if the file changes. The comparison to 0 + // is to suppress an extra warning that we'd otherwise get on the initial + // inode assignment. + if ( ino != 0 ) + suppress_warnings = false; + mtime = sb.st_mtime; ino = sb.st_ino; - // file changed. reread. - - // fallthrough + // File changed. Fall through to re-read. } case MODE_MANUAL: @@ -309,8 +313,8 @@ bool Config::DoHeartbeat(double network_time, double current_time) case MODE_REREAD: case MODE_STREAM: - Update(); // call update and not DoUpdate, because update - // checks disabled. + Update(); // Call Update, not DoUpdate, because Update + // checks the "disabled" flag. break; default: diff --git a/src/threading/SerialTypes.h b/src/threading/SerialTypes.h index b9a9c6c718..65bb79b659 100644 --- a/src/threading/SerialTypes.h +++ b/src/threading/SerialTypes.h @@ -126,7 +126,6 @@ struct Value { vec_t vector_val; addr_t addr_val; subnet_t subnet_val; - const char* pattern_text_val; struct { char* data; diff --git a/src/threading/formatters/Ascii.cc b/src/threading/formatters/Ascii.cc index fde6fa9380..147305485b 100644 --- a/src/threading/formatters/Ascii.cc +++ b/src/threading/formatters/Ascii.cc @@ -325,28 +325,6 @@ threading::Value* Ascii::ParseValue(const string& s, const string& name, TypeTag break; } - case TYPE_PATTERN: - { - string cannidate = get_unescaped_string(s); - // A string is a cannidate pattern iff it begins and ends with - // a '/'. Rather or not the rest of the string is legal will - // be determined later when it is given to the RE engine. - if ( cannidate.size() >= 2 ) - { - if ( cannidate.front() == cannidate.back() && - cannidate.back() == '/' ) - { - // Remove the '/'s - cannidate.erase(0, 1); - cannidate.erase(cannidate.size() - 1); - val->val.pattern_text_val = copy_string(cannidate.c_str()); - break; - } - } - GetThread()->Error(GetThread()->Fmt("String '%s' contained no parseable pattern.", cannidate.c_str())); - goto parse_error; - } - case TYPE_TABLE: case TYPE_VECTOR: // First - common initialization diff --git a/testing/btest/Baseline/language.copy-all-opaques/out b/testing/btest/Baseline/language.copy-all-opaques/out index ad38ca1a8d..3b5369b685 100644 --- a/testing/btest/Baseline/language.copy-all-opaques/out +++ b/testing/btest/Baseline/language.copy-all-opaques/out @@ -22,3 +22,6 @@ ============ Entropy [entropy=4.715374, chi_square=591.981818, mean=75.472727, monte_carlo_pi=4.0, serial_correlation=-0.11027] [entropy=4.715374, chi_square=591.981818, mean=75.472727, monte_carlo_pi=4.0, serial_correlation=-0.11027] +============ Paraglob +T +T diff --git a/testing/btest/Baseline/language.paraglob-serialization/recv.recv.out b/testing/btest/Baseline/language.paraglob-serialization/recv.recv.out new file mode 100644 index 0000000000..bd6ae96cfa --- /dev/null +++ b/testing/btest/Baseline/language.paraglob-serialization/recv.recv.out @@ -0,0 +1,12 @@ +receiver added peer: endpoint=127.0.0.1 msg=handshake successful +is_remote should be T, and is, T +receiver got ping number: 1 +[*, *ello, hello] +is_remote should be T, and is, T +receiver got ping number: 2 +[*, *ello, hello] +is_remote should be T, and is, T +receiver got ping number: 3 +[*, *ello, hello] +is_remote should be T, and is, T +[num_peers=1, num_stores=0, num_pending_queries=0, num_events_incoming=4, num_events_outgoing=3, num_logs_incoming=0, num_logs_outgoing=1, num_ids_incoming=0, num_ids_outgoing=0] diff --git a/testing/btest/Baseline/language.paraglob-serialization/send.send.out b/testing/btest/Baseline/language.paraglob-serialization/send.send.out new file mode 100644 index 0000000000..e6f0a48779 --- /dev/null +++ b/testing/btest/Baseline/language.paraglob-serialization/send.send.out @@ -0,0 +1,11 @@ +Starting send. +[*, *ello, hello] +is_remote should be F, and is, F +sender added peer: endpoint=127.0.0.1 msg=received handshake from remote core +is_remote should be T, and is, T +sender got pong number: 1 +is_remote should be T, and is, T +sender got pong number: 2 +is_remote should be T, and is, T +sender got pong number: 3 +sender lost peer: endpoint=127.0.0.1 msg=lost remote peer diff --git a/testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr b/testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr deleted file mode 100644 index e0a7be2cc3..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.input.bad_patterns/.stderr +++ /dev/null @@ -1,9 +0,0 @@ -error: input.log/Input::READER_ASCII: String '/cat/sss' contained no parseable pattern. -warning: input.log/Input::READER_ASCII: Could not convert line '2 /cat/sss' of input.log to Val. Ignoring line. -error: input.log/Input::READER_ASCII: String '/foo|bar' contained no parseable pattern. -warning: input.log/Input::READER_ASCII: Could not convert line '3 /foo|bar' of input.log to Val. Ignoring line. -error: input.log/Input::READER_ASCII: String 'this is not a pattern' contained no parseable pattern. -warning: input.log/Input::READER_ASCII: Could not convert line '4 this is not a pattern' of input.log to Val. Ignoring line. -error: input.log/Input::READER_ASCII: String '/5' contained no parseable pattern. -warning: input.log/Input::READER_ASCII: Could not convert line '5 /5' of input.log to Val. Ignoring line. -received termination signal diff --git a/testing/btest/Baseline/scripts.base.frameworks.input.patterns/out b/testing/btest/Baseline/scripts.base.frameworks.input.patterns/out deleted file mode 100644 index 9852d0d5d5..0000000000 --- a/testing/btest/Baseline/scripts.base.frameworks.input.patterns/out +++ /dev/null @@ -1,9 +0,0 @@ -T -F -T -{ -[2] = [p=/^?(cat)$?/], -[4] = [p=/^?(^oob)$?/], -[1] = [p=/^?(dog)$?/], -[3] = [p=/^?(foo|bar)$?/] -} diff --git a/testing/btest/language/copy-all-opaques.zeek b/testing/btest/language/copy-all-opaques.zeek index 25ca89fd80..92e50467e1 100644 --- a/testing/btest/language/copy-all-opaques.zeek +++ b/testing/btest/language/copy-all-opaques.zeek @@ -78,4 +78,12 @@ event zeek_init() local handle2 = copy(handle); print entropy_test_finish(handle); print entropy_test_finish(handle2); + + print "============ Paraglob"; + local p = paraglob_init(vector("https://*.google.com/*", "*malware*", "*.gov*")); + local p2 = copy(p); + print paraglob_equals(p, p2); + # A get operation shouldn't change the paraglob + paraglob_get(p, "whitehouse.gov"); + print paraglob_equals(p, p2); } diff --git a/testing/btest/language/paraglob-serialization.zeek b/testing/btest/language/paraglob-serialization.zeek new file mode 100644 index 0000000000..00d6c7a967 --- /dev/null +++ b/testing/btest/language/paraglob-serialization.zeek @@ -0,0 +1,102 @@ +# @TEST-PORT: BROKER_PORT +# +# @TEST-EXEC: btest-bg-run recv "zeek -B broker -b ../recv.zeek >recv.out" +# @TEST-EXEC: btest-bg-run send "zeek -B broker -b ../send.zeek >send.out" +# +# @TEST-EXEC: btest-bg-wait 30 +# @TEST-EXEC: btest-diff recv/recv.out +# @TEST-EXEC: btest-diff send/send.out + +@TEST-START-FILE send.zeek + +redef exit_only_after_terminate = T; + +global event_count = 0; +global p: opaque of paraglob = paraglob_init(vector("hello", "*ello", "*")); + +global ping: event(msg: opaque of paraglob, c: count); + +event zeek_init() + { + print "Starting send."; + print paraglob_get(p, "hello"); + Broker::subscribe("bro/event/my_topic"); + Broker::peer("127.0.0.1", 9999/tcp); + print "is_remote should be F, and is", is_remote_event(); + } + +function send_event() + { + ++event_count; + local e = Broker::make_event(ping, p, event_count); + Broker::publish("bro/event/my_topic", e); + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + print fmt("sender added peer: endpoint=%s msg=%s", + endpoint$network$address, msg); + send_event(); + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + print fmt("sender lost peer: endpoint=%s msg=%s", + endpoint$network$address, msg); + terminate(); + } + +event pong(msg: opaque of paraglob, n: count) + { + print "is_remote should be T, and is", is_remote_event(); + print fmt("sender got pong number: %s", n); + send_event(); + } + +@TEST-END-FILE + + +@TEST-START-FILE recv.zeek + +redef exit_only_after_terminate = T; + +const events_to_recv = 3; + +global handler: event(msg: string, c: count); +global auto_handler: event(msg: string, c: count); + +global pong: event(msg: opaque of paraglob, c: count); + +event zeek_init() + { + Broker::subscribe("bro/event/my_topic"); + Broker::listen("127.0.0.1", 9999/tcp); + } + +event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) + { + print fmt("receiver added peer: endpoint=%s msg=%s", endpoint$network$address, msg); + } + +event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) + { + print fmt("receiver lost peer: endpoint=%s msg=%s", endpoint$network$address, msg); + } + +event ping(msg: opaque of paraglob, n: count) + { + print "is_remote should be T, and is", is_remote_event(); + if ( n > events_to_recv ) + { + print get_broker_stats(); + terminate(); + return; + } + print fmt("receiver got ping number: %s", n); + print paraglob_get(msg, "hello"); + + local e = Broker::make_event(pong, msg, n); + Broker::publish("bro/event/my_topic", e); + } + +@TEST-END-FILE diff --git a/testing/btest/language/paraglob.zeek b/testing/btest/language/paraglob.zeek index b5a2dcba11..acd12d80da 100644 --- a/testing/btest/language/paraglob.zeek +++ b/testing/btest/language/paraglob.zeek @@ -15,9 +15,11 @@ event zeek_init () # paraglob_init should not modify v1 print (v1 == vector("*", "d?g", "*og", "d?", "d[!wl]g")); # p_eq and p1 should be the same paraglobs - print paraglob_equals(p1, p_eq); + print paraglob_equals(p_eq, p1); print paraglob_get(p1, "dog"); + + print paraglob_get(p2, "once"); print paraglob_get(p3, "www.strange-malware-domain.gov"); diff --git a/testing/btest/scripts/base/frameworks/input/bad_patterns.zeek b/testing/btest/scripts/base/frameworks/input/bad_patterns.zeek deleted file mode 100644 index 23d25b516b..0000000000 --- a/testing/btest/scripts/base/frameworks/input/bad_patterns.zeek +++ /dev/null @@ -1,38 +0,0 @@ -# @TEST-EXEC: zeek -b %INPUT -# @TEST-EXEC: btest-diff .stderr - -@TEST-START-FILE input.log -#separator \x09 -#fields i p -#types count pattern -1 /d/og/ -2 /cat/sss -3 /foo|bar -4 this is not a pattern -5 /5 -@TEST-END-FILE - -redef exit_only_after_terminate = T; - -module A; - -type Idx: record { - i: int; -}; - -type Val: record { - p: pattern; -}; - -event kill_me() - { - terminate(); - } - -global pats: table[int] of Val = table(); - -event zeek_init() - { - Input::add_table([$source="input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]); - schedule 10msec { kill_me() }; - } diff --git a/testing/btest/scripts/base/frameworks/input/patterns.zeek b/testing/btest/scripts/base/frameworks/input/patterns.zeek deleted file mode 100644 index eeed7ac602..0000000000 --- a/testing/btest/scripts/base/frameworks/input/patterns.zeek +++ /dev/null @@ -1,47 +0,0 @@ -# @TEST-EXEC: btest-bg-run zeek zeek -b %INPUT -# @TEST-EXEC: btest-bg-wait 10 - - -redef exit_only_after_terminate = T; - -@TEST-START-FILE input.log -#separator \x09 -#fields i p -#types count pattern -1 /dog/ -2 /cat/ -3 /foo|bar/ -4 /^oob/ -@TEST-END-FILE - -global outfile: file; - -module A; - -type Idx: record { - i: int; -}; - -type Val: record { - p: pattern; -}; - -global pats: table[int] of Val = table(); - -event zeek_init() - { - outfile = open("../out"); - # first read in the old stuff into the table... - Input::add_table([$source="../input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]); - } - -event Input::end_of_data(name: string, source:string) - { - print outfile, (pats[3]$p in "foobar"); # T - print outfile, (pats[4]$p in "foobar"); # F - print outfile, (pats[3]$p == "foo"); # T - print outfile, pats; - Input::remove("pats"); - close(outfile); - terminate(); - } From e100558658ceb7c6615663f88bc2fb89cebe4c38 Mon Sep 17 00:00:00 2001 From: ZekeMedley Date: Wed, 5 Jun 2019 12:33:20 -0700 Subject: [PATCH 3/8] Change C++11 detection in paraglob. --- aux/paraglob | 2 +- testing/btest/language/paraglob.zeek | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aux/paraglob b/aux/paraglob index bdff7b7634..0d65284144 160000 --- a/aux/paraglob +++ b/aux/paraglob @@ -1 +1 @@ -Subproject commit bdff7b76349fa740f049e794d3f7881a0d65c766 +Subproject commit 0d652841448b3a8ad12d5fa25a3769ded056afa0 diff --git a/testing/btest/language/paraglob.zeek b/testing/btest/language/paraglob.zeek index acd12d80da..de57712d7f 100644 --- a/testing/btest/language/paraglob.zeek +++ b/testing/btest/language/paraglob.zeek @@ -1,4 +1,4 @@ -# @TEST-EXEC: bro -b %INPUT >out +# @TEST-EXEC: zeek -b %INPUT >out # @TEST-EXEC: btest-diff out event zeek_init () From 145bfe50afc4ee64fe36c762c9dacaa17ec3663b Mon Sep 17 00:00:00 2001 From: Zeke Medley Date: Thu, 6 Jun 2019 15:35:02 -0700 Subject: [PATCH 4/8] Update paraglob submodule --- aux/paraglob | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aux/paraglob b/aux/paraglob index 0d65284144..c3bd6b88d8 160000 --- a/aux/paraglob +++ b/aux/paraglob @@ -1 +1 @@ -Subproject commit 0d652841448b3a8ad12d5fa25a3769ded056afa0 +Subproject commit c3bd6b88d8ee79752d95f6647a098f9a0b600b0e From bd605bfc3a1ac23bb5acfbf9984c938eabeae158 Mon Sep 17 00:00:00 2001 From: Zeke Medley Date: Fri, 7 Jun 2019 09:45:18 -0700 Subject: [PATCH 5/8] Stop execution on paraglob error. --- src/bro.bif | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bro.bif b/src/bro.bif index d08f9c5792..7b5d5671ae 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -802,7 +802,8 @@ function paraglob_init%(v: any%) : opaque of paraglob if ( v->Type()->Tag() != TYPE_VECTOR || v->Type()->YieldType()->Tag() != TYPE_STRING ) { - builtin_error("paraglob requires a vector of strings for initialization."); + // reporter->Error will throw an exception. + reporter->Error("paraglob requires a vector of strings for initialization."); return nullptr; } @@ -822,8 +823,8 @@ function paraglob_init%(v: any%) : opaque of paraglob // Thrown if paraglob fails to add a pattern. catch (const paraglob::add_error& e) { - builtin_error(e.what()); - return nullptr; + reporter->Error(e.what()); + return nullptr; } %} From f1779a2518a8aeab41b4aa41b6a0f7bf3b218618 Mon Sep 17 00:00:00 2001 From: Zeke Medley Date: Thu, 20 Jun 2019 15:13:31 -0700 Subject: [PATCH 6/8] Update paraglob serialization. --- src/OpaqueVal.cc | 30 ++++++++++++++++++------------ src/OpaqueVal.h | 7 ++----- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 694fa0ba77..8a94153014 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -1047,36 +1047,42 @@ bool ParaglobVal::operator==(const ParaglobVal& other) const return *(this->internal_paraglob) == *(other.internal_paraglob); } -IMPLEMENT_SERIAL(ParaglobVal, SER_PARAGLOB_VAL) +IMPLEMENT_OPAQUE_VALUE(ParaglobVal) -bool ParaglobVal::DoSerialize(SerialInfo* info) const +broker::expected ParaglobVal::DoSerialize() const { - DO_SERIALIZE(SER_PARAGLOB_VAL, OpaqueVal) - + broker::vector d; std::unique_ptr> iv = this->internal_paraglob->serialize(); - - return SERIALIZE(iv.get()); + for (uint8_t a : *(iv.get())) + d.emplace_back(static_cast(a)); + return {std::move(d)}; } -bool ParaglobVal::DoUnserialize(UnserialInfo* info) +bool ParaglobVal::DoUnserialize(const broker::data& data) { - DO_UNSERIALIZE(OpaqueVal) + auto d = caf::get_if(&data); + if ( ! d ) + return false; std::unique_ptr> iv (new std::vector); + iv->resize(d->size()); - bool success = UNSERIALIZE(iv.get()); + for (std::vector::size_type i = 0; i < d->size(); ++i) + { + get_vector_idx(*d, i, iv.get()->data() + i); + } try { this->internal_paraglob = build_unique(std::move(iv)); } catch (const paraglob::underflow_error& e) { - reporter->Error(e.what()); + reporter->Error("Paraglob underflow error -> %s", e.what()); return false; } catch (const paraglob::overflow_error& e) { - reporter->Error(e.what()); + reporter->Error("Paraglob overflow error -> %s", e.what()); return false; } - return success; + return true; } Val* ParaglobVal::DoClone(CloneState* state) diff --git a/src/OpaqueVal.h b/src/OpaqueVal.h index cc6d63a1e7..aa47efb49d 100644 --- a/src/OpaqueVal.h +++ b/src/OpaqueVal.h @@ -3,12 +3,9 @@ #ifndef OPAQUEVAL_H #define OPAQUEVAL_H -<<<<<<< HEAD #include #include // std::unique_ptr -======= ->>>>>>> upstream/master #include "RandTest.h" #include "Val.h" #include "digest.h" @@ -333,6 +330,8 @@ public: protected: ParaglobVal() : OpaqueVal(paraglob_type) {} + DECLARE_OPAQUE_VALUE(ParaglobVal) + private: std::unique_ptr internal_paraglob; // Small convenience function. Does what std::make_unique does in C++14. Wont @@ -341,8 +340,6 @@ private: std::unique_ptr build_unique (Args&&... args) { return std::unique_ptr(new T(std::forward(args)...)); } - - DECLARE_SERIAL(ParaglobVal) }; #endif From 5d5558629391334607cd078eb4edfa2bf4af5140 Mon Sep 17 00:00:00 2001 From: Zeke Medley Date: Thu, 20 Jun 2019 15:25:19 -0700 Subject: [PATCH 7/8] Catch paraglob serialization errors in DoClone. --- src/#OpaqueVal.h# | 213 ---------------------------------------------- src/.#OpaqueVal.h | 1 - src/OpaqueVal.cc | 12 ++- 3 files changed, 10 insertions(+), 216 deletions(-) delete mode 100644 src/#OpaqueVal.h# delete mode 120000 src/.#OpaqueVal.h diff --git a/src/#OpaqueVal.h# b/src/#OpaqueVal.h# deleted file mode 100644 index 42ac6641fd..0000000000 --- a/src/#OpaqueVal.h# +++ /dev/null @@ -1,213 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef OPAQUEVAL_H -#define OPAQUEVAL_H - -#include -#include // std::unique_ptr - -#include "RandTest.h" -#include "Val.h" -#include "digest.h" -#include "src/paraglob.h" - -namespace probabilistic { - class BloomFilter; - class CardinalityCounter; -} - -class HashVal : public OpaqueVal { -public: - virtual bool IsValid() const; - virtual bool Init(); - virtual bool Feed(const void* data, size_t size); - virtual StringVal* Get(); - -protected: - HashVal() { }; - explicit HashVal(OpaqueType* t); - - virtual bool DoInit(); - virtual bool DoFeed(const void* data, size_t size); - virtual StringVal* DoGet(); - - DECLARE_SERIAL(HashVal); - -private: - // This flag exists because Get() can only be called once. - bool valid; -}; - -class MD5Val : public HashVal { -public: - static void digest(val_list& vlist, u_char result[MD5_DIGEST_LENGTH]); - - static void hmac(val_list& vlist, - u_char key[MD5_DIGEST_LENGTH], - u_char result[MD5_DIGEST_LENGTH]); - - MD5Val(); - ~MD5Val(); - - Val* DoClone(CloneState* state) override; - -protected: - friend class Val; - - bool DoInit() override; - bool DoFeed(const void* data, size_t size) override; - StringVal* DoGet() override; - - DECLARE_SERIAL(MD5Val); - -private: - EVP_MD_CTX* ctx; -}; - -class SHA1Val : public HashVal { -public: - static void digest(val_list& vlist, u_char result[SHA_DIGEST_LENGTH]); - - SHA1Val(); - ~SHA1Val(); - - Val* DoClone(CloneState* state) override; - -protected: - friend class Val; - - bool DoInit() override; - bool DoFeed(const void* data, size_t size) override; - StringVal* DoGet() override; - - DECLARE_SERIAL(SHA1Val); - -private: - EVP_MD_CTX* ctx; -}; - -class SHA256Val : public HashVal { -public: - static void digest(val_list& vlist, u_char result[SHA256_DIGEST_LENGTH]); - - SHA256Val(); - ~SHA256Val(); - - Val* DoClone(CloneState* state) override; - -protected: - friend class Val; - - bool DoInit() override; - bool DoFeed(const void* data, size_t size) override; - StringVal* DoGet() override; - - DECLARE_SERIAL(SHA256Val); - -private: - EVP_MD_CTX* ctx; -}; - -class EntropyVal : public OpaqueVal { -public: - EntropyVal(); - - Val* DoClone(CloneState* state) override; - - bool Feed(const void* data, size_t size); - bool Get(double *r_ent, double *r_chisq, double *r_mean, - double *r_montepicalc, double *r_scc); - -protected: - friend class Val; - - DECLARE_SERIAL(EntropyVal); - -private: - RandTest state; -}; - -class BloomFilterVal : public OpaqueVal { -public: - explicit BloomFilterVal(probabilistic::BloomFilter* bf); - ~BloomFilterVal() override; - - Val* DoClone(CloneState* state) override; - - BroType* Type() const; - bool Typify(BroType* type); - - void Add(const Val* val); - size_t Count(const Val* val) const; - void Clear(); - bool Empty() const; - string InternalState() const; - - static BloomFilterVal* Merge(const BloomFilterVal* x, - const BloomFilterVal* y); - -protected: - friend class Val; - BloomFilterVal(); - explicit BloomFilterVal(OpaqueType* t); - - DECLARE_SERIAL(BloomFilterVal); - -private: - // Disable. - BloomFilterVal(const BloomFilterVal&); - BloomFilterVal& operator=(const BloomFilterVal&); - - BroType* type; - CompositeHash* hash; - probabilistic::BloomFilter* bloom_filter; -}; - - -class CardinalityVal: public OpaqueVal { -public: - explicit CardinalityVal(probabilistic::CardinalityCounter*); - ~CardinalityVal() override; - - Val* DoClone(CloneState* state) override; - - void Add(const Val* val); - - BroType* Type() const; - bool Typify(BroType* type); - - - probabilistic::CardinalityCounter* Get() { return c; }; - -protected: - CardinalityVal(); - -private: - BroType* type; - CompositeHash* hash; - probabilistic::CardinalityCounter* c; - - DECLARE_SERIAL(CardinalityVal); -}; - -class ParaglobVal : public OpaqueVal { -public: - explicit ParaglobVal(std::unique_ptr p); - VectorVal* get(StringVal* &pattern); - Val* DoClone(CloneState* state) override; - bool operator==(const ParaglobVal& other) const; - -protected: - ParaglobVal() : OpaqueVal(paraglob_type) {} - -private: - std::unique_ptr internal_paraglob; - // Small convenience function. Does what std::make_unique does in C++14. Wont - // work on arrays. - template - std::unique_ptr build_unique (Args&&... args) { - return std::unique_ptr(new T(std::forward(args)...)); - } - - DECLARE_SERIAL(ParaglobVal) -}; \ No newline at end of file diff --git a/src/.#OpaqueVal.h b/src/.#OpaqueVal.h deleted file mode 120000 index bd904257c8..0000000000 --- a/src/.#OpaqueVal.h +++ /dev/null @@ -1 +0,0 @@ -zekemedley@zeke.6738:1560935202 \ No newline at end of file diff --git a/src/OpaqueVal.cc b/src/OpaqueVal.cc index 8a94153014..37e9b88510 100644 --- a/src/OpaqueVal.cc +++ b/src/OpaqueVal.cc @@ -1087,6 +1087,14 @@ bool ParaglobVal::DoUnserialize(const broker::data& data) Val* ParaglobVal::DoClone(CloneState* state) { - return new ParaglobVal - (build_unique(this->internal_paraglob->serialize())); + try { + return new ParaglobVal + (build_unique(this->internal_paraglob->serialize())); + } catch (const paraglob::underflow_error& e) { + reporter->Error("Paraglob underflow error while cloning -> %s", e.what()); + return nullptr; + } catch (const paraglob::overflow_error& e) { + reporter->Error("Paraglob overflow error while cloning -> %s", e.what()); + return nullptr; + } } From 9efca707b671d9e1dae3d6e5b064d12364906a3c Mon Sep 17 00:00:00 2001 From: Zeke Medley Date: Mon, 24 Jun 2019 12:40:16 -0700 Subject: [PATCH 8/8] Add leak test to paraglob. --- testing/btest/core/leaks/paraglob.zeek | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 testing/btest/core/leaks/paraglob.zeek diff --git a/testing/btest/core/leaks/paraglob.zeek b/testing/btest/core/leaks/paraglob.zeek new file mode 100644 index 0000000000..c9e42f51e0 --- /dev/null +++ b/testing/btest/core/leaks/paraglob.zeek @@ -0,0 +1,34 @@ +# Needs perftools support. +# +# @TEST-GROUP: leaks +# +# @TEST-REQUIRES: zeek --help 2>&1 | grep -q mem-leaks +# +# @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local btest-bg-run zeek zeek -m -b -r $TRACES/http/get.trace %INPUT +# @TEST-EXEC: btest-bg-wait 120 + +event new_connection (c : connection) +{ + local v1 = vector("*", "d?g", "*og", "d?", "d[!wl]g"); + local v2 = vector("once", "!o*", "once"); + local v3 = vector("https://*.google.com/*", "*malware*", "*.gov*"); + + local p1 = paraglob_init(v1); + local p2: opaque of paraglob = paraglob_init(v2); + local p3 = paraglob_init(v3); + local p_eq = paraglob_init(v1); + + # paraglob_init should not modify v1 + print (v1 == vector("*", "d?g", "*og", "d?", "d[!wl]g")); + # p_eq and p1 should be the same paraglobs + print paraglob_equals(p_eq, p1); + + print paraglob_get(p1, "dog"); + + + print paraglob_get(p2, "once"); + print paraglob_get(p3, "www.strange-malware-domain.gov"); + + local large_glob: opaque of paraglob = paraglob_init(v3); + print paraglob_get(large_glob, "www.strange-malware-domain.gov"); +}