Initial paraglob integration.

This commit is contained in:
ZekeMedley 2019-05-28 16:59:50 -07:00
parent f2b7764769
commit e1520a0d67
17 changed files with 302 additions and 3 deletions

3
.gitmodules vendored
View file

@ -28,3 +28,6 @@
[submodule "doc"]
path = doc
url = https://github.com/zeek/zeek-docs
[submodule "aux/paraglob"]
path = aux/paraglob
url = https://github.com/zeek/paraglob

View file

@ -325,6 +325,10 @@ include_directories(BEFORE ${CAF_INCLUDE_DIR_CORE})
include_directories(BEFORE ${CAF_INCLUDE_DIR_IO})
include_directories(BEFORE ${CAF_INCLUDE_DIR_OPENSSL})
add_subdirectory(aux/paraglob)
set(zeekdeps ${zeekdeps} paraglob)
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/aux/paraglob)
add_subdirectory(src)
add_subdirectory(scripts)
add_subdirectory(man)

1
aux/paraglob Submodule

@ -0,0 +1 @@
Subproject commit 757e00b6510d2b0e92510c9c26f9e3279aa442a4

View file

@ -869,3 +869,28 @@ void CardinalityVal::Add(const Val* val)
c->AddElement(key->Hash());
delete key;
}
ParaglobVal::ParaglobVal(paraglob::Paraglob* p)
: OpaqueVal(paraglob_type)
{
this->internal_paraglob = p;
}
VectorVal* ParaglobVal::get(StringVal* &pattern)
{
VectorVal* rval = new VectorVal(internal_type("string_vec")->AsVectorType());
std::string string_pattern (pattern->CheckString(), pattern->Len());
std::vector<std::string> matches = this->internal_paraglob->get(string_pattern);
for (unsigned int i = 0; i < matches.size(); i++) {
rval->Assign(i, new StringVal(matches.at(i).c_str()));
}
return rval;
}
bool ParaglobVal::operator==(const ParaglobVal *other)
{
return (*(this->internal_paraglob) == *(other->internal_paraglob));
}

View file

@ -8,6 +8,7 @@
#include "RandTest.h"
#include "Val.h"
#include "digest.h"
#include "src/paraglob.h"
namespace probabilistic {
class BloomFilter;
@ -188,4 +189,14 @@ private:
DECLARE_SERIAL(CardinalityVal);
};
class ParaglobVal : public OpaqueVal {
public:
explicit ParaglobVal(paraglob::Paraglob* p);
VectorVal* get(StringVal* &pattern);
bool operator==(const ParaglobVal *other);
private:
paraglob::Paraglob* internal_paraglob;
};
#endif

View file

@ -639,6 +639,7 @@ extern OpaqueType* topk_type;
extern OpaqueType* bloomfilter_type;
extern OpaqueType* x509_opaque_type;
extern OpaqueType* ocsp_resp_opaque_type;
extern OpaqueType* paraglob_type;
// Returns the Bro basic (non-parameterized) type with the given type.
// The reference count of the type is not increased.

View file

@ -789,6 +789,63 @@ function sha256_hash_finish%(handle: opaque of sha256%): string
return static_cast<HashVal*>(handle)->Get();
%}
## Initializes and returns a new paraglob.
##
## v: Vector of patterns to initialize the paraglob with.
##
## Returns: A new, compiled, paraglob with the patterns in *v*
##
## .. zeek:see::paraglob_get paraglob_equals paraglob_add
function paraglob_init%(v: any%) : opaque of paraglob
%{
if ( v->Type()->Tag() != TYPE_VECTOR ||
v->Type()->YieldType()->Tag() != TYPE_STRING )
{
builtin_error("paraglob requires a vector for initialization.");
return nullptr;
}
std::vector<std::string> patterns;
VectorVal* vv = v->AsVectorVal();
for ( unsigned int i = 0; i < vv->Size(); ++i )
{
const BroString* s = vv->Lookup(i)->AsString();
patterns.push_back(std::string(s->CheckString(), s->Len()));
}
return new ParaglobVal(new paraglob::Paraglob(patterns));
%}
## Gets all the strings inside the handle associated with an input pattern.
##
## handle: A compiled paraglob.
## pattern: A glob style pattern.
##
## Returns: A vector of strings matching the input pattern
##
## ## .. zeek:see::paraglob_add paraglob_equals paraglob_init
function paraglob_get%(handle: opaque of paraglob, pat: string%): string_vec
%{
return static_cast<ParaglobVal*>(handle)->get(pat);
%}
## Compares two paraglobs for equality.
##
## p_one: A compiled paraglob.
## p_two: A compiled paraglob.
##
## Returns: True of both paraglobs contain the same patterns, false otherwise.
##
## ## .. zeek:see::paraglob_add paraglob_get paraglob_init
function paraglob_equals%(p_one: opaque of paraglob, p_two: opaque of paraglob%)
: bool
%{
bool eq =
(static_cast<ParaglobVal*>(p_one) == static_cast<ParaglobVal*>(p_two));
return val_mgr->GetBool(eq);
%}
## Returns 32-bit digest of arbitrary input values using FNV-1a hash algorithm.
## See `<https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function>`_.
##

View file

@ -224,7 +224,7 @@ ReaderBackend* Manager::CreateBackend(ReaderFrontend* frontend, EnumVal* tag)
return backend;
}
// Create a new input reader object to be used at whomevers leisure lateron.
// Create a new input reader object to be used at whomevers leisure later on.
bool Manager::CreateStream(Stream* info, RecordVal* description)
{
RecordType* rtype = description->Type()->AsRecordType();
@ -232,7 +232,7 @@ bool Manager::CreateStream(Stream* info, RecordVal* description)
|| same_type(rtype, BifType::Record::Input::EventDescription, 0)
|| same_type(rtype, BifType::Record::Input::AnalysisDescription, 0) ) )
{
reporter->Error("Streamdescription argument not of right type for new input stream");
reporter->Error("Stream description argument not of right type for new input stream");
return false;
}
@ -824,6 +824,7 @@ bool Manager::IsCompatibleType(BroType* t, bool atomic_only)
case TYPE_INTERVAL:
case TYPE_ENUM:
case TYPE_STRING:
case TYPE_PATTERN:
return true;
case TYPE_RECORD:
@ -2074,6 +2075,12 @@ int Manager::GetValueLength(const Value* val) const
}
break;
case TYPE_PATTERN:
{
length += strlen(val->val.pattern_text_val) + 1;
break;
}
case TYPE_TABLE:
{
for ( int i = 0; i < val->val.set_val.size; i++ )
@ -2193,6 +2200,14 @@ int Manager::CopyValue(char *data, const int startpos, const Value* val) const
return length;
}
case TYPE_PATTERN:
{
// include null-terminator
int length = strlen(val->val.pattern_text_val) + 1;
memcpy(data + startpos, val->val.pattern_text_val, length);
return length;
}
case TYPE_TABLE:
{
int length = 0;
@ -2350,6 +2365,13 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, BroType* request_typ
return subnetval;
}
case TYPE_PATTERN:
{
RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val);
re->Compile();
return new PatternVal(re);
}
case TYPE_TABLE:
{
// all entries have to have the same type...
@ -2492,6 +2514,13 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, bool& have_error) co
return subnetval;
}
case TYPE_PATTERN:
{
RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val);
re->Compile();
return new PatternVal(re);
}
case TYPE_TABLE:
{
TypeList* set_index;

View file

@ -122,6 +122,7 @@ OpaqueType* topk_type = 0;
OpaqueType* bloomfilter_type = 0;
OpaqueType* x509_opaque_type = 0;
OpaqueType* ocsp_resp_opaque_type = 0;
OpaqueType* paraglob_type = 0;
// Keep copy of command line
int bro_argc;
@ -809,6 +810,7 @@ int main(int argc, char** argv)
bloomfilter_type = new OpaqueType("bloomfilter");
x509_opaque_type = new OpaqueType("x509");
ocsp_resp_opaque_type = new OpaqueType("ocsp_resp");
paraglob_type = new OpaqueType("paraglob");
// The leak-checker tends to produce some false
// positives (memory which had already been

View file

@ -126,6 +126,7 @@ struct Value {
vec_t vector_val;
addr_t addr_val;
subnet_t subnet_val;
const char* pattern_text_val;
struct {
char* data;

View file

@ -325,6 +325,28 @@ threading::Value* Ascii::ParseValue(const string& s, const string& name, TypeTag
break;
}
case TYPE_PATTERN:
{
string cannidate = get_unescaped_string(s);
// A string is a cannidate pattern iff it begins and ends with
// a '/'. Rather or not the rest of the string is legal will
// be determined later when it is given to the RE engine.
if ( cannidate.size() >= 2 )
{
if ( cannidate.front() == cannidate.back() &&
cannidate.back() == '/' )
{
// Remove the '/'s
cannidate.erase(0, 1);
cannidate.erase(cannidate.size() - 1);
val->val.pattern_text_val = copy_string(cannidate.c_str());
break;
}
}
GetThread()->Error(GetThread()->Fmt("String '%s' contained no parseable pattern.", cannidate.c_str()));
goto parse_error;
}
case TYPE_TABLE:
case TYPE_VECTOR:
// First - common initialization

View file

@ -0,0 +1,6 @@
[T, T, T, T, T]
T
[*, *og, d?g, d[!wl]g]
[once]
[*.gov*, *malware*]
[*.gov*, *malware*]

View file

@ -0,0 +1,9 @@
error: input.log/Input::READER_ASCII: String '/cat/sss' contained no parseable pattern.
warning: input.log/Input::READER_ASCII: Could not convert line '2 /cat/sss' of input.log to Val. Ignoring line.
error: input.log/Input::READER_ASCII: String '/foo|bar' contained no parseable pattern.
warning: input.log/Input::READER_ASCII: Could not convert line '3 /foo|bar' of input.log to Val. Ignoring line.
error: input.log/Input::READER_ASCII: String 'this is not a pattern' contained no parseable pattern.
warning: input.log/Input::READER_ASCII: Could not convert line '4 this is not a pattern' of input.log to Val. Ignoring line.
error: input.log/Input::READER_ASCII: String '/5' contained no parseable pattern.
warning: input.log/Input::READER_ASCII: Could not convert line '5 /5' of input.log to Val. Ignoring line.
received termination signal

View file

@ -0,0 +1,9 @@
T
F
T
{
[2] = [p=/^?(cat)$?/],
[4] = [p=/^?(^oob)$?/],
[1] = [p=/^?(dog)$?/],
[3] = [p=/^?(foo|bar)$?/]
}

View file

@ -0,0 +1,34 @@
# @TEST-EXEC: bro -b %INPUT >out
# @TEST-EXEC: btest-diff out
event zeek_init ()
{
local v1 = vector("*", "d?g", "*og", "d?", "d[!wl]g");
local v2 = vector("once", "!o*", "once");
local v3 = vector("https://*.google.com/*", "*malware*", "*.gov*");
local p1 = paraglob_init(v1);
local p2: opaque of paraglob = paraglob_init(v2);
local p3 = paraglob_init(v3);
local p_eq = paraglob_init(v1);
# paraglob_init should not modify v1
print (v1 == vector("*", "d?g", "*og", "d?", "d[!wl]g"));
# p_eq and p1 should be the same paraglobs
print paraglob_equals(p1, p_eq);
print paraglob_get(p1, "dog");
print paraglob_get(p2, "once");
print paraglob_get(p3, "www.strange-malware-domain.gov");
# This looks like a lot, but really should complete quickly.
# Paraglob should stop addition of duplicate patterns.
local i = 1000000;
while (i > 0) {
i = i - 1;
v3 += v3[1];
}
local large_glob: opaque of paraglob = paraglob_init(v3);
print paraglob_get(large_glob, "www.strange-malware-domain.gov");
}

View file

@ -0,0 +1,38 @@
# @TEST-EXEC: zeek -b %INPUT
# @TEST-EXEC: btest-diff .stderr
@TEST-START-FILE input.log
#separator \x09
#fields i p
#types count pattern
1 /d/og/
2 /cat/sss
3 /foo|bar
4 this is not a pattern
5 /5
@TEST-END-FILE
redef exit_only_after_terminate = T;
module A;
type Idx: record {
i: int;
};
type Val: record {
p: pattern;
};
event kill_me()
{
terminate();
}
global pats: table[int] of Val = table();
event zeek_init()
{
Input::add_table([$source="input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]);
schedule 10msec { kill_me() };
}

View file

@ -0,0 +1,47 @@
# @TEST-EXEC: btest-bg-run zeek zeek -b %INPUT
# @TEST-EXEC: btest-bg-wait 10
redef exit_only_after_terminate = T;
@TEST-START-FILE input.log
#separator \x09
#fields i p
#types count pattern
1 /dog/
2 /cat/
3 /foo|bar/
4 /^oob/
@TEST-END-FILE
global outfile: file;
module A;
type Idx: record {
i: int;
};
type Val: record {
p: pattern;
};
global pats: table[int] of Val = table();
event zeek_init()
{
outfile = open("../out");
# first read in the old stuff into the table...
Input::add_table([$source="../input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]);
}
event Input::end_of_data(name: string, source:string)
{
print outfile, (pats[3]$p in "foobar"); # T
print outfile, (pats[4]$p in "foobar"); # F
print outfile, (pats[3]$p == "foo"); # T
print outfile, pats;
Input::remove("pats");
close(outfile);
terminate();
}