Merge branch 'topic/corelight/ascii-gzip' of https://github.com/corelight/bro

* 'topic/corelight/ascii-gzip' of https://github.com/corelight/bro:
  Add gzip log writing to the ascii writer.
This commit is contained in:
Jon Siwek 2017-05-22 00:08:52 -05:00
commit 4d06ee85ce
9 changed files with 222 additions and 9 deletions

10
CHANGES
View file

@ -1,4 +1,14 @@
2.5-142 | 2017-05-22 00:08:52 -0500
* Add gzip log writing to the ascii writer.
This feature can be enabled globally for all logs by setting
LogAscii::gzip_level to a value greater than 0.
This feature can be enabled on a per-log basis by setting gzip-level in
$config to a value greater than 0. (Corelight)
2.5-140 | 2017-05-12 15:31:32 -0400 2.5-140 | 2017-05-12 15:31:32 -0400
* Lessen cluster load due to notice suppression. * Lessen cluster load due to notice suppression.

View file

@ -1 +1 @@
2.5-140 2.5-142

View file

@ -26,6 +26,13 @@ export {
## This option is also available as a per-filter ``$config`` option. ## This option is also available as a per-filter ``$config`` option.
const use_json = F &redef; const use_json = F &redef;
## Define the gzip level to compress the logs. If 0, then no gzip
## compression is performed. Enabling compression also changes
## the log file name extension to include ".gz".
##
## This option is also available as a per-filter ``$config`` option.
const gzip_level = 0 &redef;
## Format of timestamps when writing out JSON. By default, the JSON ## Format of timestamps when writing out JSON. By default, the JSON
## formatter will use double values for timestamps which represent the ## formatter will use double values for timestamps which represent the
## number of seconds from the UNIX epoch. ## number of seconds from the UNIX epoch.

View file

@ -24,6 +24,8 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend)
tsv = false; tsv = false;
use_json = false; use_json = false;
formatter = 0; formatter = 0;
gzip_level = 0;
gzfile = nullptr;
InitConfigOptions(); InitConfigOptions();
init_options = InitFilterOptions(); init_options = InitFilterOptions();
@ -34,6 +36,7 @@ void Ascii::InitConfigOptions()
output_to_stdout = BifConst::LogAscii::output_to_stdout; output_to_stdout = BifConst::LogAscii::output_to_stdout;
include_meta = BifConst::LogAscii::include_meta; include_meta = BifConst::LogAscii::include_meta;
use_json = BifConst::LogAscii::use_json; use_json = BifConst::LogAscii::use_json;
gzip_level = BifConst::LogAscii::gzip_level;
separator.assign( separator.assign(
(const char*) BifConst::LogAscii::separator->Bytes(), (const char*) BifConst::LogAscii::separator->Bytes(),
@ -89,6 +92,16 @@ bool Ascii::InitFilterOptions()
} }
} }
else if ( strcmp(i->first, "gzip_level" ) == 0 )
{
gzip_level = atoi(i->second);
if ( gzip_level < 0 || gzip_level > 9 )
{
Error("invalid value for 'gzip_level', must be a number between 0 and 9.");
return false;
}
}
else if ( strcmp(i->first, "use_json") == 0 ) else if ( strcmp(i->first, "use_json") == 0 )
{ {
if ( strcmp(i->second, "T") == 0 ) if ( strcmp(i->second, "T") == 0 )
@ -192,7 +205,7 @@ bool Ascii::WriteHeaderField(const string& key, const string& val)
{ {
string str = meta_prefix + key + separator + val + "\n"; string str = meta_prefix + key + separator + val + "\n";
return safe_write(fd, str.c_str(), str.length()); return InternalWrite(fd, str.c_str(), str.length());
} }
void Ascii::CloseFile(double t) void Ascii::CloseFile(double t)
@ -203,8 +216,9 @@ void Ascii::CloseFile(double t)
if ( include_meta && ! tsv ) if ( include_meta && ! tsv )
WriteHeaderField("close", Timestamp(0)); WriteHeaderField("close", Timestamp(0));
safe_close(fd); InternalClose(fd);
fd = 0; fd = 0;
gzfile = nullptr;
} }
bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const * fields) bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const * fields)
@ -219,7 +233,8 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
if ( output_to_stdout ) if ( output_to_stdout )
path = "/dev/stdout"; path = "/dev/stdout";
fname = IsSpecial(path) ? path : path + "." + LogExt(); fname = IsSpecial(path) ? path : path + "." + LogExt() +
(gzip_level > 0 ? ".gz" : "");
fd = open(fname.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); fd = open(fname.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
@ -231,6 +246,31 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
return false; return false;
} }
if ( gzip_level > 0 )
{
if ( gzip_level < 0 || gzip_level > 9 )
{
Error("invalid value for 'gzip_level', must be a number between 0 and 9.");
return false;
}
char mode[4];
snprintf(mode, sizeof(mode), "wb%d", gzip_level);
errno = 0; // errno will only be set under certain circumstances by gzdopen.
gzfile = gzdopen(fd, mode);
if ( gzfile == nullptr )
{
Error(Fmt("cannot gzip %s: %s", fname.c_str(),
Strerror(errno)));
return false;
}
}
else
{
gzfile = nullptr;
}
if ( ! WriteHeader(path) ) if ( ! WriteHeader(path) )
{ {
Error(Fmt("error writing to %s: %s", fname.c_str(), Strerror(errno))); Error(Fmt("error writing to %s: %s", fname.c_str(), Strerror(errno)));
@ -264,7 +304,7 @@ bool Ascii::WriteHeader(const string& path)
{ {
// A single TSV-style line is all we need. // A single TSV-style line is all we need.
string str = names + "\n"; string str = names + "\n";
if ( ! safe_write(fd, str.c_str(), str.length()) ) if ( ! InternalWrite(fd, str.c_str(), str.length()) )
return false; return false;
return true; return true;
@ -275,7 +315,7 @@ bool Ascii::WriteHeader(const string& path)
+ get_escaped_string(separator, false) + get_escaped_string(separator, false)
+ "\n"; + "\n";
if ( ! safe_write(fd, str.c_str(), str.length()) ) if ( ! InternalWrite(fd, str.c_str(), str.length()) )
return false; return false;
if ( ! (WriteHeaderField("set_separator", get_escaped_string(set_separator, false)) && if ( ! (WriteHeaderField("set_separator", get_escaped_string(set_separator, false)) &&
@ -337,14 +377,14 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
char hex[4] = {'\\', 'x', '0', '0'}; char hex[4] = {'\\', 'x', '0', '0'};
bytetohex(bytes[0], hex + 2); bytetohex(bytes[0], hex + 2);
if ( ! safe_write(fd, hex, 4) ) if ( ! InternalWrite(fd, hex, 4) )
goto write_error; goto write_error;
++bytes; ++bytes;
--len; --len;
} }
if ( ! safe_write(fd, bytes, len) ) if ( ! InternalWrite(fd, bytes, len) )
goto write_error; goto write_error;
if ( ! IsBuf() ) if ( ! IsBuf() )
@ -368,7 +408,8 @@ bool Ascii::DoRotate(const char* rotated_path, double open, double close, bool t
CloseFile(close); CloseFile(close);
string nname = string(rotated_path) + "." + LogExt(); string nname = string(rotated_path) + "." + LogExt() +
(gzip_level > 0 ? ".gz" : "");
if ( rename(fname.c_str(), nname.c_str()) != 0 ) if ( rename(fname.c_str(), nname.c_str()) != 0 )
{ {
@ -434,4 +475,58 @@ string Ascii::Timestamp(double t)
return tmp; return tmp;
} }
bool Ascii::InternalWrite(int fd, const char* data, int len)
{
if ( ! gzfile )
return safe_write(fd, data, len);
while ( len > 0 )
{
int n = gzwrite(gzfile, data, len);
if ( n <= 0 )
{
const char* err = gzerror(gzfile, &n);
Error(Fmt("Ascii::InternalWrite error: %s\n", err));
return false;
}
data += n;
len -= n;
}
return true;
}
bool Ascii::InternalClose(int fd)
{
if ( ! gzfile )
{
safe_close(fd);
return true;
}
int res = gzclose(gzfile);
if ( res == Z_OK )
return true;
switch ( res ) {
case Z_STREAM_ERROR:
Error("Ascii::InternalClose gzclose error: invalid file stream");
break;
case Z_BUF_ERROR:
Error("Ascii::InternalClose gzclose error: "
"no compression progress possible during buffer flush");
break;
case Z_ERRNO:
Error(Fmt("Ascii::InternalClose gzclose error: %s\n", Strerror(errno)));
break;
default:
Error("Ascii::InternalClose invalid gzclose result");
break;
}
return false;
}

View file

@ -8,6 +8,7 @@
#include "logging/WriterBackend.h" #include "logging/WriterBackend.h"
#include "threading/formatters/Ascii.h" #include "threading/formatters/Ascii.h"
#include "threading/formatters/JSON.h" #include "threading/formatters/JSON.h"
#include "zlib.h"
namespace logging { namespace writer { namespace logging { namespace writer {
@ -42,8 +43,11 @@ private:
void InitConfigOptions(); void InitConfigOptions();
bool InitFilterOptions(); bool InitFilterOptions();
bool InitFormatter(); bool InitFormatter();
bool InternalWrite(int fd, const char* data, int len);
bool InternalClose(int fd);
int fd; int fd;
gzFile gzfile;
string fname; string fname;
ODesc desc; ODesc desc;
bool ascii_done; bool ascii_done;
@ -59,6 +63,7 @@ private:
string unset_field; string unset_field;
string meta_prefix; string meta_prefix;
int gzip_level; // level > 0 enables gzip compression
bool use_json; bool use_json;
string json_timestamps; string json_timestamps;

View file

@ -12,3 +12,4 @@ const empty_field: string;
const unset_field: string; const unset_field: string;
const use_json: bool; const use_json: bool;
const json_timestamps: JSON::TimestampFormat; const json_timestamps: JSON::TimestampFormat;
const gzip_level: count;

View file

@ -0,0 +1,10 @@
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path ssh-uncompressed
#open 2017-04-18-16-16-16
#fields b i e c p sn a d t iv s sc ss se vc ve f
#types bool int enum count port subnet addr double time interval string set[count] set[string] set[string] vector[count] vector[string] func
T -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14 1215620010.543210 100.000000 hurz 2,4,1,3 BB,AA,CC (empty) 10,20,30 (empty) SSH::foo\x0a{ \x0aif (0 < SSH::i) \x0a\x09return (Foo);\x0aelse\x0a\x09return (Bar);\x0a\x0a}
#close 2017-04-18-16-16-16

View file

@ -0,0 +1,10 @@
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path ssh
#open 2017-04-18-16-15-17
#fields b i e c p sn a d t iv s sc ss se vc ve f
#types bool int enum count port subnet addr double time interval string set[count] set[string] set[string] vector[count] vector[string] func
T -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14 1215620010.543210 100.000000 hurz 2,4,1,3 BB,AA,CC (empty) 10,20,30 (empty) SSH::foo\x0a{ \x0aif (0 < SSH::i) \x0a\x09return (Foo);\x0aelse\x0a\x09return (Bar);\x0a\x0a}
#close 2017-04-18-16-15-17

View file

@ -0,0 +1,75 @@
#
# @TEST-EXEC: bro -b %INPUT
# @TEST-EXEC: gunzip ssh.log.gz
# @TEST-EXEC: btest-diff ssh.log
# @TEST-EXEC: btest-diff ssh-uncompressed.log
#
# Testing all possible types.
redef LogAscii::gzip_level = 9;
module SSH;
export {
redef enum Log::ID += { LOG };
type Log: record {
b: bool;
i: int;
e: Log::ID;
c: count;
p: port;
sn: subnet;
a: addr;
d: double;
t: time;
iv: interval;
s: string;
sc: set[count];
ss: set[string];
se: set[string];
vc: vector of count;
ve: vector of string;
f: function(i: count) : string;
} &log;
}
function foo(i : count) : string
{
if ( i > 0 )
return "Foo";
else
return "Bar";
}
event bro_init()
{
Log::create_stream(SSH::LOG, [$columns=Log]);
local filter = Log::Filter($name="ssh-uncompressed", $path="ssh-uncompressed",
$config = table(["gzip_level"] = "0"));
Log::add_filter(SSH::LOG, filter);
local empty_set: set[string];
local empty_vector: vector of string;
Log::write(SSH::LOG, [
$b=T,
$i=-42,
$e=SSH::LOG,
$c=21,
$p=123/tcp,
$sn=10.0.0.1/24,
$a=1.2.3.4,
$d=3.14,
$t=(strptime("%Y-%m-%dT%H:%M:%SZ", "2008-07-09T16:13:30Z") + 0.543210 secs),
$iv=100secs,
$s="hurz",
$sc=set(1,2,3,4),
$ss=set("AA", "BB", "CC"),
$se=empty_set,
$vc=vector(10, 20, 30),
$ve=empty_vector,
$f=foo
]);
}