mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Initial paraglob integration.
This commit is contained in:
parent
f2b7764769
commit
e1520a0d67
17 changed files with 302 additions and 3 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -28,3 +28,6 @@
|
|||
[submodule "doc"]
|
||||
path = doc
|
||||
url = https://github.com/zeek/zeek-docs
|
||||
[submodule "aux/paraglob"]
|
||||
path = aux/paraglob
|
||||
url = https://github.com/zeek/paraglob
|
||||
|
|
|
@ -325,6 +325,10 @@ include_directories(BEFORE ${CAF_INCLUDE_DIR_CORE})
|
|||
include_directories(BEFORE ${CAF_INCLUDE_DIR_IO})
|
||||
include_directories(BEFORE ${CAF_INCLUDE_DIR_OPENSSL})
|
||||
|
||||
add_subdirectory(aux/paraglob)
|
||||
set(zeekdeps ${zeekdeps} paraglob)
|
||||
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/aux/paraglob)
|
||||
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(scripts)
|
||||
add_subdirectory(man)
|
||||
|
|
1
aux/paraglob
Submodule
1
aux/paraglob
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 757e00b6510d2b0e92510c9c26f9e3279aa442a4
|
|
@ -869,3 +869,28 @@ void CardinalityVal::Add(const Val* val)
|
|||
c->AddElement(key->Hash());
|
||||
delete key;
|
||||
}
|
||||
|
||||
|
||||
ParaglobVal::ParaglobVal(paraglob::Paraglob* p)
|
||||
: OpaqueVal(paraglob_type)
|
||||
{
|
||||
this->internal_paraglob = p;
|
||||
}
|
||||
|
||||
VectorVal* ParaglobVal::get(StringVal* &pattern)
|
||||
{
|
||||
VectorVal* rval = new VectorVal(internal_type("string_vec")->AsVectorType());
|
||||
std::string string_pattern (pattern->CheckString(), pattern->Len());
|
||||
std::vector<std::string> matches = this->internal_paraglob->get(string_pattern);
|
||||
|
||||
for (unsigned int i = 0; i < matches.size(); i++) {
|
||||
rval->Assign(i, new StringVal(matches.at(i).c_str()));
|
||||
}
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
bool ParaglobVal::operator==(const ParaglobVal *other)
|
||||
{
|
||||
return (*(this->internal_paraglob) == *(other->internal_paraglob));
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "RandTest.h"
|
||||
#include "Val.h"
|
||||
#include "digest.h"
|
||||
#include "src/paraglob.h"
|
||||
|
||||
namespace probabilistic {
|
||||
class BloomFilter;
|
||||
|
@ -188,4 +189,14 @@ private:
|
|||
DECLARE_SERIAL(CardinalityVal);
|
||||
};
|
||||
|
||||
class ParaglobVal : public OpaqueVal {
|
||||
public:
|
||||
explicit ParaglobVal(paraglob::Paraglob* p);
|
||||
VectorVal* get(StringVal* &pattern);
|
||||
bool operator==(const ParaglobVal *other);
|
||||
|
||||
private:
|
||||
paraglob::Paraglob* internal_paraglob;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -639,6 +639,7 @@ extern OpaqueType* topk_type;
|
|||
extern OpaqueType* bloomfilter_type;
|
||||
extern OpaqueType* x509_opaque_type;
|
||||
extern OpaqueType* ocsp_resp_opaque_type;
|
||||
extern OpaqueType* paraglob_type;
|
||||
|
||||
// Returns the Bro basic (non-parameterized) type with the given type.
|
||||
// The reference count of the type is not increased.
|
||||
|
|
57
src/bro.bif
57
src/bro.bif
|
@ -789,6 +789,63 @@ function sha256_hash_finish%(handle: opaque of sha256%): string
|
|||
return static_cast<HashVal*>(handle)->Get();
|
||||
%}
|
||||
|
||||
|
||||
## Initializes and returns a new paraglob.
|
||||
##
|
||||
## v: Vector of patterns to initialize the paraglob with.
|
||||
##
|
||||
## Returns: A new, compiled, paraglob with the patterns in *v*
|
||||
##
|
||||
## .. zeek:see::paraglob_get paraglob_equals paraglob_add
|
||||
function paraglob_init%(v: any%) : opaque of paraglob
|
||||
%{
|
||||
if ( v->Type()->Tag() != TYPE_VECTOR ||
|
||||
v->Type()->YieldType()->Tag() != TYPE_STRING )
|
||||
{
|
||||
builtin_error("paraglob requires a vector for initialization.");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<std::string> patterns;
|
||||
VectorVal* vv = v->AsVectorVal();
|
||||
for ( unsigned int i = 0; i < vv->Size(); ++i )
|
||||
{
|
||||
const BroString* s = vv->Lookup(i)->AsString();
|
||||
patterns.push_back(std::string(s->CheckString(), s->Len()));
|
||||
}
|
||||
|
||||
return new ParaglobVal(new paraglob::Paraglob(patterns));
|
||||
%}
|
||||
|
||||
## Gets all the strings inside the handle associated with an input pattern.
|
||||
##
|
||||
## handle: A compiled paraglob.
|
||||
## pattern: A glob style pattern.
|
||||
##
|
||||
## Returns: A vector of strings matching the input pattern
|
||||
##
|
||||
## ## .. zeek:see::paraglob_add paraglob_equals paraglob_init
|
||||
function paraglob_get%(handle: opaque of paraglob, pat: string%): string_vec
|
||||
%{
|
||||
return static_cast<ParaglobVal*>(handle)->get(pat);
|
||||
%}
|
||||
|
||||
## Compares two paraglobs for equality.
|
||||
##
|
||||
## p_one: A compiled paraglob.
|
||||
## p_two: A compiled paraglob.
|
||||
##
|
||||
## Returns: True of both paraglobs contain the same patterns, false otherwise.
|
||||
##
|
||||
## ## .. zeek:see::paraglob_add paraglob_get paraglob_init
|
||||
function paraglob_equals%(p_one: opaque of paraglob, p_two: opaque of paraglob%)
|
||||
: bool
|
||||
%{
|
||||
bool eq =
|
||||
(static_cast<ParaglobVal*>(p_one) == static_cast<ParaglobVal*>(p_two));
|
||||
return val_mgr->GetBool(eq);
|
||||
%}
|
||||
|
||||
## Returns 32-bit digest of arbitrary input values using FNV-1a hash algorithm.
|
||||
## See `<https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function>`_.
|
||||
##
|
||||
|
|
|
@ -224,7 +224,7 @@ ReaderBackend* Manager::CreateBackend(ReaderFrontend* frontend, EnumVal* tag)
|
|||
return backend;
|
||||
}
|
||||
|
||||
// Create a new input reader object to be used at whomevers leisure lateron.
|
||||
// Create a new input reader object to be used at whomevers leisure later on.
|
||||
bool Manager::CreateStream(Stream* info, RecordVal* description)
|
||||
{
|
||||
RecordType* rtype = description->Type()->AsRecordType();
|
||||
|
@ -232,7 +232,7 @@ bool Manager::CreateStream(Stream* info, RecordVal* description)
|
|||
|| same_type(rtype, BifType::Record::Input::EventDescription, 0)
|
||||
|| same_type(rtype, BifType::Record::Input::AnalysisDescription, 0) ) )
|
||||
{
|
||||
reporter->Error("Streamdescription argument not of right type for new input stream");
|
||||
reporter->Error("Stream description argument not of right type for new input stream");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -824,6 +824,7 @@ bool Manager::IsCompatibleType(BroType* t, bool atomic_only)
|
|||
case TYPE_INTERVAL:
|
||||
case TYPE_ENUM:
|
||||
case TYPE_STRING:
|
||||
case TYPE_PATTERN:
|
||||
return true;
|
||||
|
||||
case TYPE_RECORD:
|
||||
|
@ -2074,6 +2075,12 @@ int Manager::GetValueLength(const Value* val) const
|
|||
}
|
||||
break;
|
||||
|
||||
case TYPE_PATTERN:
|
||||
{
|
||||
length += strlen(val->val.pattern_text_val) + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_TABLE:
|
||||
{
|
||||
for ( int i = 0; i < val->val.set_val.size; i++ )
|
||||
|
@ -2193,6 +2200,14 @@ int Manager::CopyValue(char *data, const int startpos, const Value* val) const
|
|||
return length;
|
||||
}
|
||||
|
||||
case TYPE_PATTERN:
|
||||
{
|
||||
// include null-terminator
|
||||
int length = strlen(val->val.pattern_text_val) + 1;
|
||||
memcpy(data + startpos, val->val.pattern_text_val, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
case TYPE_TABLE:
|
||||
{
|
||||
int length = 0;
|
||||
|
@ -2350,6 +2365,13 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, BroType* request_typ
|
|||
return subnetval;
|
||||
}
|
||||
|
||||
case TYPE_PATTERN:
|
||||
{
|
||||
RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val);
|
||||
re->Compile();
|
||||
return new PatternVal(re);
|
||||
}
|
||||
|
||||
case TYPE_TABLE:
|
||||
{
|
||||
// all entries have to have the same type...
|
||||
|
@ -2492,6 +2514,13 @@ Val* Manager::ValueToVal(const Stream* i, const Value* val, bool& have_error) co
|
|||
return subnetval;
|
||||
}
|
||||
|
||||
case TYPE_PATTERN:
|
||||
{
|
||||
RE_Matcher* re = new RE_Matcher(val->val.pattern_text_val);
|
||||
re->Compile();
|
||||
return new PatternVal(re);
|
||||
}
|
||||
|
||||
case TYPE_TABLE:
|
||||
{
|
||||
TypeList* set_index;
|
||||
|
|
|
@ -122,6 +122,7 @@ OpaqueType* topk_type = 0;
|
|||
OpaqueType* bloomfilter_type = 0;
|
||||
OpaqueType* x509_opaque_type = 0;
|
||||
OpaqueType* ocsp_resp_opaque_type = 0;
|
||||
OpaqueType* paraglob_type = 0;
|
||||
|
||||
// Keep copy of command line
|
||||
int bro_argc;
|
||||
|
@ -809,6 +810,7 @@ int main(int argc, char** argv)
|
|||
bloomfilter_type = new OpaqueType("bloomfilter");
|
||||
x509_opaque_type = new OpaqueType("x509");
|
||||
ocsp_resp_opaque_type = new OpaqueType("ocsp_resp");
|
||||
paraglob_type = new OpaqueType("paraglob");
|
||||
|
||||
// The leak-checker tends to produce some false
|
||||
// positives (memory which had already been
|
||||
|
|
|
@ -126,6 +126,7 @@ struct Value {
|
|||
vec_t vector_val;
|
||||
addr_t addr_val;
|
||||
subnet_t subnet_val;
|
||||
const char* pattern_text_val;
|
||||
|
||||
struct {
|
||||
char* data;
|
||||
|
|
|
@ -325,6 +325,28 @@ threading::Value* Ascii::ParseValue(const string& s, const string& name, TypeTag
|
|||
break;
|
||||
}
|
||||
|
||||
case TYPE_PATTERN:
|
||||
{
|
||||
string cannidate = get_unescaped_string(s);
|
||||
// A string is a cannidate pattern iff it begins and ends with
|
||||
// a '/'. Rather or not the rest of the string is legal will
|
||||
// be determined later when it is given to the RE engine.
|
||||
if ( cannidate.size() >= 2 )
|
||||
{
|
||||
if ( cannidate.front() == cannidate.back() &&
|
||||
cannidate.back() == '/' )
|
||||
{
|
||||
// Remove the '/'s
|
||||
cannidate.erase(0, 1);
|
||||
cannidate.erase(cannidate.size() - 1);
|
||||
val->val.pattern_text_val = copy_string(cannidate.c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
GetThread()->Error(GetThread()->Fmt("String '%s' contained no parseable pattern.", cannidate.c_str()));
|
||||
goto parse_error;
|
||||
}
|
||||
|
||||
case TYPE_TABLE:
|
||||
case TYPE_VECTOR:
|
||||
// First - common initialization
|
||||
|
|
6
testing/btest/Baseline/language.paraglob/out
Normal file
6
testing/btest/Baseline/language.paraglob/out
Normal file
|
@ -0,0 +1,6 @@
|
|||
[T, T, T, T, T]
|
||||
T
|
||||
[*, *og, d?g, d[!wl]g]
|
||||
[once]
|
||||
[*.gov*, *malware*]
|
||||
[*.gov*, *malware*]
|
|
@ -0,0 +1,9 @@
|
|||
error: input.log/Input::READER_ASCII: String '/cat/sss' contained no parseable pattern.
|
||||
warning: input.log/Input::READER_ASCII: Could not convert line '2 /cat/sss' of input.log to Val. Ignoring line.
|
||||
error: input.log/Input::READER_ASCII: String '/foo|bar' contained no parseable pattern.
|
||||
warning: input.log/Input::READER_ASCII: Could not convert line '3 /foo|bar' of input.log to Val. Ignoring line.
|
||||
error: input.log/Input::READER_ASCII: String 'this is not a pattern' contained no parseable pattern.
|
||||
warning: input.log/Input::READER_ASCII: Could not convert line '4 this is not a pattern' of input.log to Val. Ignoring line.
|
||||
error: input.log/Input::READER_ASCII: String '/5' contained no parseable pattern.
|
||||
warning: input.log/Input::READER_ASCII: Could not convert line '5 /5' of input.log to Val. Ignoring line.
|
||||
received termination signal
|
|
@ -0,0 +1,9 @@
|
|||
T
|
||||
F
|
||||
T
|
||||
{
|
||||
[2] = [p=/^?(cat)$?/],
|
||||
[4] = [p=/^?(^oob)$?/],
|
||||
[1] = [p=/^?(dog)$?/],
|
||||
[3] = [p=/^?(foo|bar)$?/]
|
||||
}
|
34
testing/btest/language/paraglob.zeek
Normal file
34
testing/btest/language/paraglob.zeek
Normal file
|
@ -0,0 +1,34 @@
|
|||
# @TEST-EXEC: bro -b %INPUT >out
|
||||
# @TEST-EXEC: btest-diff out
|
||||
|
||||
event zeek_init ()
|
||||
{
|
||||
local v1 = vector("*", "d?g", "*og", "d?", "d[!wl]g");
|
||||
local v2 = vector("once", "!o*", "once");
|
||||
local v3 = vector("https://*.google.com/*", "*malware*", "*.gov*");
|
||||
|
||||
local p1 = paraglob_init(v1);
|
||||
local p2: opaque of paraglob = paraglob_init(v2);
|
||||
local p3 = paraglob_init(v3);
|
||||
local p_eq = paraglob_init(v1);
|
||||
|
||||
# paraglob_init should not modify v1
|
||||
print (v1 == vector("*", "d?g", "*og", "d?", "d[!wl]g"));
|
||||
# p_eq and p1 should be the same paraglobs
|
||||
print paraglob_equals(p1, p_eq);
|
||||
|
||||
print paraglob_get(p1, "dog");
|
||||
print paraglob_get(p2, "once");
|
||||
print paraglob_get(p3, "www.strange-malware-domain.gov");
|
||||
|
||||
# This looks like a lot, but really should complete quickly.
|
||||
# Paraglob should stop addition of duplicate patterns.
|
||||
local i = 1000000;
|
||||
while (i > 0) {
|
||||
i = i - 1;
|
||||
v3 += v3[1];
|
||||
}
|
||||
|
||||
local large_glob: opaque of paraglob = paraglob_init(v3);
|
||||
print paraglob_get(large_glob, "www.strange-malware-domain.gov");
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
# @TEST-EXEC: zeek -b %INPUT
|
||||
# @TEST-EXEC: btest-diff .stderr
|
||||
|
||||
@TEST-START-FILE input.log
|
||||
#separator \x09
|
||||
#fields i p
|
||||
#types count pattern
|
||||
1 /d/og/
|
||||
2 /cat/sss
|
||||
3 /foo|bar
|
||||
4 this is not a pattern
|
||||
5 /5
|
||||
@TEST-END-FILE
|
||||
|
||||
redef exit_only_after_terminate = T;
|
||||
|
||||
module A;
|
||||
|
||||
type Idx: record {
|
||||
i: int;
|
||||
};
|
||||
|
||||
type Val: record {
|
||||
p: pattern;
|
||||
};
|
||||
|
||||
event kill_me()
|
||||
{
|
||||
terminate();
|
||||
}
|
||||
|
||||
global pats: table[int] of Val = table();
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
Input::add_table([$source="input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]);
|
||||
schedule 10msec { kill_me() };
|
||||
}
|
47
testing/btest/scripts/base/frameworks/input/patterns.zeek
Normal file
47
testing/btest/scripts/base/frameworks/input/patterns.zeek
Normal file
|
@ -0,0 +1,47 @@
|
|||
# @TEST-EXEC: btest-bg-run zeek zeek -b %INPUT
|
||||
# @TEST-EXEC: btest-bg-wait 10
|
||||
|
||||
|
||||
redef exit_only_after_terminate = T;
|
||||
|
||||
@TEST-START-FILE input.log
|
||||
#separator \x09
|
||||
#fields i p
|
||||
#types count pattern
|
||||
1 /dog/
|
||||
2 /cat/
|
||||
3 /foo|bar/
|
||||
4 /^oob/
|
||||
@TEST-END-FILE
|
||||
|
||||
global outfile: file;
|
||||
|
||||
module A;
|
||||
|
||||
type Idx: record {
|
||||
i: int;
|
||||
};
|
||||
|
||||
type Val: record {
|
||||
p: pattern;
|
||||
};
|
||||
|
||||
global pats: table[int] of Val = table();
|
||||
|
||||
event zeek_init()
|
||||
{
|
||||
outfile = open("../out");
|
||||
# first read in the old stuff into the table...
|
||||
Input::add_table([$source="../input.log", $name="pats", $idx=Idx, $val=Val, $destination=pats]);
|
||||
}
|
||||
|
||||
event Input::end_of_data(name: string, source:string)
|
||||
{
|
||||
print outfile, (pats[3]$p in "foobar"); # T
|
||||
print outfile, (pats[4]$p in "foobar"); # F
|
||||
print outfile, (pats[3]$p == "foo"); # T
|
||||
print outfile, pats;
|
||||
Input::remove("pats");
|
||||
close(outfile);
|
||||
terminate();
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue