Merge branch 'topic/christian/inputframework-paths' of https://github.com/ckreibich/zeek

* 'topic/christian/inputframework-paths' of https://github.com/ckreibich/zeek:
  Source file path control for Input and Intel frameworks
This commit is contained in:
Jon Siwek 2019-03-18 11:45:35 -07:00
commit f19db92508
41 changed files with 502 additions and 10 deletions

11
CHANGES
View file

@ -1,4 +1,15 @@
2.6-166 | 2019-03-18 11:45:35 -0700
* Add source file path control options for Input and Intel frameworks (Christian Kreibich, Corelight)
This introduces the following redefinable string constants, empty by
default:
- InputAscii::path_prefix
- InputBinary::path_prefix
- Intel::path_prefix
2.6-164 | 2019-03-15 19:45:48 -0700
* Migrate table-based for-loops to key-value iteration (Jon Siwek, Corelight)

7
NEWS
View file

@ -42,6 +42,13 @@ New Functionality
for ( key, value in t )
print key, value;
- Added options for controlling the source path/prefix for Input and
Intel framework files:
- InputAscii::path_prefix
- InputBinary::path_prefix
- Intel::path_prefix
Changed Functionality
---------------------

View file

@ -1 +1 @@
2.6-164
2.6-166

2
doc

@ -1 +1 @@
Subproject commit 79748981d8abf9d76fe045b0151698f98d43c05c
Subproject commit 3438d6f185e258e4ef17b5d11b4e374356bd2ce0

View file

@ -47,4 +47,10 @@ export {
## fail_on_file_problem = T was the default behavior
## until Bro 2.6.
const fail_on_file_problem = F &redef;
## On input streams with a pathless or relative-path source filename,
## prefix the following path. This prefix can, but need not be, absolute.
## The default is to leave any filenames unchanged. This prefix has no
## effect if the source already is an absolute path.
const path_prefix = "" &redef;
}

View file

@ -5,4 +5,10 @@ module InputBinary;
export {
## Size of data chunks to read from the input file at a time.
const chunk_size = 1024 &redef;
## On input streams with a pathless or relative-path source filename,
## prefix the following path. This prefix can, but need not be, absolute.
## The default is to leave any filenames unchanged. This prefix has no
## effect if the source already is an absolute path.
const path_prefix = "" &redef;
}

View file

@ -10,6 +10,16 @@ export {
## reread every time they are updated so updates must be atomic
## with "mv" instead of writing the file in place.
const read_files: set[string] = {} &redef;
## An optional path prefix for intel files. This prefix can, but
## need not be, absolute. The default is to leave any filenames
## unchanged. This prefix has no effect if a read_file entry is
## an absolute path. This prefix gets applied _before_ entering
## the input framework, so if the prefix is absolute, the input
## framework won't munge it further. If it is relative, then
## any path_prefix specified in the input framework will apply
## additionally.
const path_prefix = "" &redef;
}
event Intel::read_entry(desc: Input::EventDescription, tpe: Input::Event, item: Intel::Item)
@ -24,7 +34,17 @@ event bro_init() &priority=5
{
for ( a_file in read_files )
{
Input::add_event([$source=a_file,
# Handle prefixing of the source file name. Note
# that this currently always uses the ASCII reader,
# so we know we're dealing with filenames.
local source = a_file;
# If we have a path prefix and the file doesn't
# already have an absolute path, prepend the prefix.
if ( |path_prefix| > 0 && sub_bytes(a_file, 0, 1) != "/" )
source = cat(rstrip(path_prefix, "/"), "/", a_file);
Input::add_event([$source=source,
$reader=Input::READER_ASCII,
$mode=Input::REREAD,
$name=cat("intel-", a_file),

View file

@ -82,6 +82,9 @@ bool Ascii::DoInit(const ReaderInfo& info, int num_fields, const Field* const* f
fail_on_invalid_lines = BifConst::InputAscii::fail_on_invalid_lines;
fail_on_file_problem = BifConst::InputAscii::fail_on_file_problem;
path_prefix.assign((const char*) BifConst::InputAscii::path_prefix->Bytes(),
BifConst::InputAscii::path_prefix->Len());
// Set per-filter configuration options.
for ( ReaderInfo::config_map::const_iterator i = info.config.begin(); i != info.config.end(); i++ )
{
@ -137,18 +140,34 @@ bool Ascii::OpenFile()
if ( file.is_open() )
return true;
file.open(Info().source);
// Handle path-prefixing. See similar logic in Binary::DoInit().
fname = Info().source;
if ( fname.front() != '/' && ! path_prefix.empty() )
{
string path = path_prefix;
std::size_t last = path.find_last_not_of("/");
if ( last == string::npos ) // Nothing but slashes -- weird but ok...
path = "/";
else
path.erase(last + 1);
fname = path + "/" + fname;
}
file.open(fname);
if ( ! file.is_open() )
{
FailWarn(fail_on_file_problem, Fmt("Init: cannot open %s", Info().source), true);
FailWarn(fail_on_file_problem, Fmt("Init: cannot open %s", fname.c_str()), true);
return ! fail_on_file_problem;
}
if ( ReadHeader(false) == false )
{
FailWarn(fail_on_file_problem, Fmt("Init: cannot open %s; problem reading file header", Info().source), true);
FailWarn(fail_on_file_problem, Fmt("Init: cannot open %s; problem reading file header", fname.c_str()), true);
file.close();
return ! fail_on_file_problem;
@ -169,7 +188,7 @@ bool Ascii::ReadHeader(bool useCached)
if ( ! GetLine(line) )
{
FailWarn(fail_on_file_problem, Fmt("Could not read input data file %s; first line could not be read",
Info().source), true);
fname.c_str()), true);
return false;
}
@ -212,7 +231,7 @@ bool Ascii::ReadHeader(bool useCached)
}
FailWarn(fail_on_file_problem, Fmt("Did not find requested field %s in input data file %s.",
field->name, Info().source), true);
field->name, fname.c_str()), true);
return false;
}
@ -274,9 +293,9 @@ bool Ascii::DoUpdate()
{
// check if the file has changed
struct stat sb;
if ( stat(Info().source, &sb) == -1 )
if ( stat(fname.c_str(), &sb) == -1 )
{
FailWarn(fail_on_file_problem, Fmt("Could not get stat for %s", Info().source), true);
FailWarn(fail_on_file_problem, Fmt("Could not get stat for %s", fname.c_str()), true);
file.close();
return ! fail_on_file_problem;

View file

@ -66,6 +66,11 @@ private:
time_t mtime;
ino_t ino;
// The name using which we actually load the file -- compared
// to the input source name, this one may have a path_prefix
// attached to it.
string fname;
// map columns in the file to columns to send back to the manager
vector<FieldMapping> columnMap;
@ -79,6 +84,7 @@ private:
string unset_field;
bool fail_on_invalid_lines;
bool fail_on_file_problem;
string path_prefix;
// this is an internal indicator in case the read is currently in a failed state
// it's used to suppress duplicate error messages.

View file

@ -7,3 +7,4 @@ const empty_field: string;
const unset_field: string;
const fail_on_invalid_lines: bool;
const fail_on_file_problem: bool;
const path_prefix: string;

View file

@ -81,6 +81,9 @@ bool Binary::DoInit(const ReaderInfo& info, int num_fields,
ino = 0;
firstrun = true;
path_prefix.assign((const char*) BifConst::InputBinary::path_prefix->Bytes(),
BifConst::InputBinary::path_prefix->Len());
if ( ! info.source || strlen(info.source) == 0 )
{
Error("No source path provided");
@ -104,6 +107,20 @@ bool Binary::DoInit(const ReaderInfo& info, int num_fields,
// do initialization
fname = info.source;
// Handle path-prefixing. See similar logic in Ascii::OpenFile().
if ( fname.front() != '/' && ! path_prefix.empty() )
{
string path = path_prefix;
std::size_t last = path.find_last_not_of("/");
if ( last == string::npos ) // Nothing but slashes -- weird but ok...
path = "/";
else
path.erase(last + 1);
fname = path + "/" + fname;
}
if ( ! OpenInput() )
return false;

View file

@ -42,6 +42,7 @@ private:
// options set from the script-level.
static streamsize chunk_size;
string path_prefix;
};
}

View file

@ -2,3 +2,4 @@
module InputBinary;
const chunk_size: count;
const path_prefix: string;

View file

@ -0,0 +1,3 @@
[ip=127.0.3.1, tag=just]
[ip=127.0.3.2, tag=some]
[ip=127.0.3.3, tag=value]

View file

@ -0,0 +1 @@
file_hash, md5, ad8f8274b0e9fedb79093b1d1b6d6d73

View file

@ -0,0 +1,5 @@
{
[127.0.3.2] = some,
[127.0.3.3] = value,
[127.0.3.1] = just
}

View file

@ -0,0 +1,3 @@
[ip=127.0.4.1, tag=just]
[ip=127.0.4.2, tag=some]
[ip=127.0.4.3, tag=value]

View file

@ -0,0 +1 @@
file_hash, md5, cacc0ee959be71cbe287c5554b4e83b0

View file

@ -0,0 +1,5 @@
{
[127.0.4.1] = just,
[127.0.4.2] = some,
[127.0.4.3] = value
}

View file

@ -0,0 +1,3 @@
[ip=127.0.0.1, tag=just]
[ip=127.0.0.2, tag=some]
[ip=127.0.0.3, tag=value]

View file

@ -0,0 +1 @@
file_hash, md5, f78b90a215f8decb59cf672df57ab134

View file

@ -0,0 +1,5 @@
{
[127.0.0.2] = some,
[127.0.0.1] = just,
[127.0.0.3] = value
}

View file

@ -0,0 +1,3 @@
[ip=127.0.1.1, tag=just]
[ip=127.0.1.2, tag=some]
[ip=127.0.1.3, tag=value]

View file

@ -0,0 +1 @@
file_hash, md5, 6a87477e9c8e269ac9cc0cb70464ea1f

View file

@ -0,0 +1,5 @@
{
[127.0.1.1] = just,
[127.0.1.2] = some,
[127.0.1.3] = value
}

View file

@ -0,0 +1,3 @@
127.0.2.1 Intel::ADDR
127.0.2.2 Intel::ADDR
127.0.2.3 Intel::ADDR

View file

@ -0,0 +1,3 @@
127.0.1.1 Intel::ADDR
127.0.1.2 Intel::ADDR
127.0.1.3 Intel::ADDR

View file

@ -0,0 +1,3 @@
127.0.0.1 Intel::ADDR
127.0.0.2 Intel::ADDR
127.0.0.3 Intel::ADDR

View file

@ -0,0 +1,3 @@
127.0.0.1 Intel::ADDR
127.0.0.2 Intel::ADDR
127.0.0.3 Intel::ADDR

View file

@ -0,0 +1,54 @@
# These tests set the InputAscii::path_prefix / InputBinary::path_prefix
# variables to verify that an absolute path prefix gets added correctly
# to relative/path-less input sources.
#
# @TEST-EXEC: cat %INPUT | sed "s|@path_prefix@|$PWD/subdir|" >input.bro
# @TEST-EXEC: mkdir -p subdir
#
# Note, in the following we'd ideally use %DIR to express the
# additional path, but there's currently a problem in btest with using
# %DIR after TEST-START-NEXT.
#
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/input/path-prefix bro -b input.bro >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE subdir/input.data
#fields ip tag
127.0.3.1 just
127.0.3.2 some
127.0.3.3 value
@TEST-END-FILE
@load path-prefix-common-table.bro
redef InputAscii::path_prefix = "@path_prefix@";
event bro_init()
{
Input::add_table([$source="input.data", $name="input", $idx=Idx, $val=Val,
$destination=destination, $want_record=F]);
}
# @TEST-START-NEXT
#
# The same test, but using event streams for input.
@load path-prefix-common-event.bro
redef InputAscii::path_prefix = "@path_prefix@";
event bro_init()
{
Input::add_event([$source="input.data", $name="input",
$fields=Val, $ev=inputev]);
}
# @TEST-START-NEXT
#
# The same test again, but using file analysis w/ binary readers.
@load path-prefix-common-analysis.bro
redef InputBinary::path_prefix = "@path_prefix@";
event bro_init()
{
Input::add_analysis([$source="input.data", $name="input"]);
}

View file

@ -0,0 +1,48 @@
# These tests set the InputAscii::path_prefix / InputBinary::path_prefix
# variables to verify that setting these prefixes has no effect when
# an input file uses an absolute-path source.
#
# @TEST-EXEC: cat %INPUT | sed "s|@path_prefix@|$PWD|" >input.bro
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/input/path-prefix bro -b input.bro >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE input.data
#fields ip tag
127.0.4.1 just
127.0.4.2 some
127.0.4.3 value
@TEST-END-FILE
@load path-prefix-common-table.bro
redef InputAscii::path_prefix = "/this/does/not/exist";
event bro_init()
{
Input::add_table([$source="@path_prefix@/input.data", $name="input", $idx=Idx, $val=Val,
$destination=destination, $want_record=F]);
}
# @TEST-START-NEXT
#
# The same test, but using event streams for input.
@load path-prefix-common-event.bro
redef InputAscii::path_prefix = "/this/does/not/exist";
event bro_init()
{
Input::add_event([$source="@path_prefix@/input.data", $name="input",
$fields=Val, $ev=inputev]);
}
# @TEST-START-NEXT
#
# The same test again, but using file analysis w/ binary readers.
@load path-prefix-common-analysis.bro
redef InputBinary::path_prefix = "/this/does/not/exist";
event bro_init()
{
Input::add_analysis([$source="@path_prefix@/input.data", $name="input"]);
}

View file

@ -0,0 +1,43 @@
# These tests verify that when setting neither InputAscii::path_prefix
# nor InputBinary::path_prefix, Zeek correctly locates local input files.
#
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/input/path-prefix bro -b %INPUT >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE input.data
#fields ip tag
127.0.0.1 just
127.0.0.2 some
127.0.0.3 value
@TEST-END-FILE
@load path-prefix-common-table.bro
event bro_init()
{
Input::add_table([$source="input.data", $name="input", $idx=Idx, $val=Val,
$destination=destination, $want_record=F]);
}
# @TEST-START-NEXT
#
# The same test, but using event streams for input.
@load path-prefix-common-event.bro
event bro_init()
{
Input::add_event([$source="input.data", $name="input",
$fields=Val, $ev=inputev]);
}
# @TEST-START-NEXT
#
# The same test again, but using file analysis w/ binary readers.
@load path-prefix-common-analysis.bro
event bro_init()
{
Input::add_analysis([$source="input.data", $name="input"]);
}

View file

@ -0,0 +1,16 @@
# @TEST-IGNORE
#
# This file contains code used by the file analysis path-prefix tests.
redef exit_only_after_terminate = T;
event file_new(f: fa_file)
{
Files::add_analyzer(f, Files::ANALYZER_MD5);
}
event file_hash(f: fa_file, kind: string, hash: string)
{
print "file_hash", kind, hash;
terminate();
}

View file

@ -0,0 +1,21 @@
# @TEST-IGNORE
#
# This file contains code used by the event-driven path-prefix tests.
redef exit_only_after_terminate = T;
type Val: record {
ip: addr;
tag: string;
};
event inputev(description: Input::EventDescription,
t: Input::Event, data: Val)
{
print data;
}
event Input::end_of_data(name: string, source: string)
{
terminate();
}

View file

@ -0,0 +1,21 @@
# @TEST-IGNORE
#
# This file contains code used by the table-driven path-prefix tests.
redef exit_only_after_terminate = T;
type Idx: record {
ip: addr;
};
type Val: record {
tag: string;
};
global destination: table[addr] of string = table();
event Input::end_of_data(name: string, source: string)
{
print destination;
terminate();
}

View file

@ -0,0 +1,48 @@
# This test sets the InputAscii::path_prefix / InputBinary::path_prefix
# variables to verify that a relative path prefix applies correctly
# from the current working directory.
#
# @TEST-EXEC: mkdir -p alternative
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/input/path-prefix bro -b %INPUT >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE alternative/input.data
#fields ip tag
127.0.1.1 just
127.0.1.2 some
127.0.1.3 value
@TEST-END-FILE
@load path-prefix-common-table.bro
redef InputAscii::path_prefix = "alternative";
event bro_init()
{
Input::add_table([$source="input.data", $name="input", $idx=Idx, $val=Val,
$destination=destination, $want_record=F]);
}
# @TEST-START-NEXT
#
# The same test, but using event streams for input.
@load path-prefix-common-event.bro
redef InputAscii::path_prefix = "alternative";
event bro_init()
{
Input::add_event([$source="input.data", $name="input",
$fields=Val, $ev=inputev]);
}
# @TEST-START-NEXT
#
# The same test again, but using file analysis w/ binary readers.
@load path-prefix-common-analysis.bro
redef InputBinary::path_prefix = "alternative";
event bro_init()
{
Input::add_analysis([$source="input.data", $name="input"]);
}

View file

@ -0,0 +1,23 @@
# This test verifies that an absolute Intel::path_prefix overrides any
# set for the Input framework. We still want the Intel framework to
# "break out" of any file system location specified for the input
# framework, e.g. when their paths live side-by-side (/foo/bar/input,
# /foo/bar/intel).
#
# @TEST-EXEC: mkdir -p intel
# @TEST-EXEC: cat %INPUT | sed "s|@path_prefix@|$PWD/intel|" >input.bro
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/intel/path-prefix bro -b input.bro >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE intel/test.data
#fields indicator indicator_type meta.source
127.0.2.1 Intel::ADDR this btest
127.0.2.2 Intel::ADDR this btest
127.0.2.3 Intel::ADDR this btest
@TEST-END-FILE
@load path-prefix-common.bro
redef Intel::read_files += { "test.data" };
redef InputAscii::path_prefix = "/this/does/not/exist";
redef Intel::path_prefix = "@path_prefix@";

View file

@ -0,0 +1,20 @@
# This test verifies that combining Input::path_prefix and
# Intel::path_prefix works as intended: the intel path gets
# prepended first, then the input framework one.
#
# @TEST-EXEC: mkdir -p input/intel
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/intel/path-prefix bro -b %INPUT >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE input/intel/test.data
#fields indicator indicator_type meta.source
127.0.1.1 Intel::ADDR this btest
127.0.1.2 Intel::ADDR this btest
127.0.1.3 Intel::ADDR this btest
@TEST-END-FILE
@load path-prefix-common.bro
redef Intel::read_files += { "test.data" };
redef InputAscii::path_prefix = "input";
redef Intel::path_prefix = "intel";

View file

@ -0,0 +1,20 @@
# This test verifies that specifying an Input::path_prefix
# also affects the Intel framework since it relies on the
# former for loading data. (Note that this also tests the
# Input::REREAD ingestion mode.)
#
# @TEST-EXEC: mkdir -p alternative
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/intel/path-prefix bro -b %INPUT >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE alternative/test.data
#fields indicator indicator_type meta.source
127.0.0.1 Intel::ADDR this btest
127.0.0.2 Intel::ADDR this btest
127.0.0.3 Intel::ADDR this btest
@TEST-END-FILE
@load path-prefix-common.bro
redef Intel::read_files += { "test.data" };
redef InputAscii::path_prefix = "alternative";

View file

@ -0,0 +1,16 @@
# This test verifies that when setting neither InputAscii::path_prefix
# nor Intel::path_prefix, Zeek correctly locates local intel files.
#
# @TEST-EXEC: BROPATH=$BROPATH:$TEST_BASE/scripts/base/frameworks/intel/path-prefix bro -b %INPUT >output
# @TEST-EXEC: btest-diff output
@TEST-START-FILE test.data
#fields indicator indicator_type meta.source
127.0.0.1 Intel::ADDR this btest
127.0.0.2 Intel::ADDR this btest
127.0.0.3 Intel::ADDR this btest
@TEST-END-FILE
@load path-prefix-common.bro
redef Intel::read_files += { "test.data" };

View file

@ -0,0 +1,19 @@
# @TEST-IGNORE
#
# This file contains code used by the intel framework path-prefix tests.
@load base/frameworks/intel
redef exit_only_after_terminate = T;
module Intel;
event Intel::new_item(item: Intel::Item)
{
print fmt("%s %s", item$indicator, item$indicator_type);
}
event Input::end_of_data(name: string, source: string)
{
terminate();
}