Add gzip log writing to the ascii writer.

This feature can be enabled globally for all logs by setting
LogAscii::gzip_level to a value greater than 0.

This feature can be enabled on a per-log basis by setting gzip-level in
$confic to a value greater than 0.
This commit is contained in:
Johanna Amann 2017-04-24 13:12:49 -07:00
parent 2c2c9c9052
commit c868a19a28
7 changed files with 191 additions and 8 deletions

View file

@ -26,6 +26,13 @@ export {
## This option is also available as a per-filter ``$config`` option.
const use_json = F &redef;
## Define the gzip level to compress the logs. If 0,
## the no gzip compression is performed. Enabling compression also changes
## the log file name extension to include ".gz".
##
## This option is also available as a per-filter ``$config`` option.
const gzip_level = 0 &redef;
## Format of timestamps when writing out JSON. By default, the JSON
## formatter will use double values for timestamps which represent the
## number of seconds from the UNIX epoch.

View file

@ -24,6 +24,7 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend)
tsv = false;
use_json = false;
formatter = 0;
gzip_level = 0;
InitConfigOptions();
init_options = InitFilterOptions();
@ -34,6 +35,7 @@ void Ascii::InitConfigOptions()
output_to_stdout = BifConst::LogAscii::output_to_stdout;
include_meta = BifConst::LogAscii::include_meta;
use_json = BifConst::LogAscii::use_json;
gzip_level = BifConst::LogAscii::gzip_level;
separator.assign(
(const char*) BifConst::LogAscii::separator->Bytes(),
@ -89,6 +91,15 @@ bool Ascii::InitFilterOptions()
}
}
else if ( strcmp(i->first, "gzip_level" ) == 0 )
{
gzip_level = atoi(i->second);
if ( gzip_level < 0 || gzip_level > 9 )
{
Error("invalid value for 'gzip_level', must be a number between 0 and 9.");
return false;
}
}
else if ( strcmp(i->first, "use_json") == 0 )
{
if ( strcmp(i->second, "T") == 0 )
@ -192,7 +203,7 @@ bool Ascii::WriteHeaderField(const string& key, const string& val)
{
string str = meta_prefix + key + separator + val + "\n";
return safe_write(fd, str.c_str(), str.length());
return InternalWrite(fd, str.c_str(), str.length());
}
void Ascii::CloseFile(double t)
@ -203,7 +214,7 @@ void Ascii::CloseFile(double t)
if ( include_meta && ! tsv )
WriteHeaderField("close", Timestamp(0));
safe_close(fd);
InternalClose(fd);
fd = 0;
}
@ -219,7 +230,7 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
if ( output_to_stdout )
path = "/dev/stdout";
fname = IsSpecial(path) ? path : path + "." + LogExt();
fname = IsSpecial(path) ? path : path + "." + LogExt() + (( gzip_level > 0 ) ? ".gz" : "");
fd = open(fname.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
@ -231,6 +242,25 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
return false;
}
if ( gzip_level > 0 )
{
assert(gzip_level < 10);
char mode[4];
snprintf(mode, 3, "wb%d", gzip_level);
errno = 0; // errno will only be set under certain circumstances by gzdopen.
gzfile = gzdopen(fd, mode);
if ( gzfile == nullptr )
{
Error(Fmt("cannot gzip %s: %s", fname.c_str(),
Strerror(errno)));
return false;
}
}
else
{
gzfile = 0;
}
if ( ! WriteHeader(path) )
{
Error(Fmt("error writing to %s: %s", fname.c_str(), Strerror(errno)));
@ -264,7 +294,7 @@ bool Ascii::WriteHeader(const string& path)
{
// A single TSV-style line is all we need.
string str = names + "\n";
if ( ! safe_write(fd, str.c_str(), str.length()) )
if ( ! InternalWrite(fd, str.c_str(), str.length()) )
return false;
return true;
@ -275,7 +305,7 @@ bool Ascii::WriteHeader(const string& path)
+ get_escaped_string(separator, false)
+ "\n";
if ( ! safe_write(fd, str.c_str(), str.length()) )
if ( ! InternalWrite(fd, str.c_str(), str.length()) )
return false;
if ( ! (WriteHeaderField("set_separator", get_escaped_string(set_separator, false)) &&
@ -337,14 +367,14 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
char hex[4] = {'\\', 'x', '0', '0'};
bytetohex(bytes[0], hex + 2);
if ( ! safe_write(fd, hex, 4) )
if ( ! InternalWrite(fd, hex, 4) )
goto write_error;
++bytes;
--len;
}
if ( ! safe_write(fd, bytes, len) )
if ( ! InternalWrite(fd, bytes, len) )
goto write_error;
if ( ! IsBuf() )
@ -368,7 +398,7 @@ bool Ascii::DoRotate(const char* rotated_path, double open, double close, bool t
CloseFile(close);
string nname = string(rotated_path) + "." + LogExt();
string nname = string(rotated_path) + "." + LogExt() + (gzfile ? ".gz" : "");
if ( rename(fname.c_str(), nname.c_str()) != 0 )
{
@ -434,4 +464,49 @@ string Ascii::Timestamp(double t)
return tmp;
}
bool Ascii::InternalWrite(int fd, const char* data, int len)
{
if ( gzfile )
{
while ( len > 0 )
{
int n = gzwrite(gzfile, data, len);
if ( n < 0 )
{
if ( n == Z_ERRNO )
Error(Fmt("Ascii::InternalWrite error: %s\n", Strerror(errno)));
else
Error(Fmt("Ascii::InternalWrite error: %s\n", gzerror(gzfile, &n)));
return false;
}
data += n;
len -= n;
}
}
else
return safe_write(fd, data, len);
return true;
}
bool Ascii::InternalClose(int fd)
{
if ( gzfile )
{
if ( gzclose(gzfile) < 0 )
{
Error(Fmt("Ascii::InternalClose error: %s\n", Strerror(errno)));
return false;
}
}
else
{
safe_close(fd);
}
return true;
}

View file

@ -8,6 +8,7 @@
#include "logging/WriterBackend.h"
#include "threading/formatters/Ascii.h"
#include "threading/formatters/JSON.h"
#include "zlib.h"
namespace logging { namespace writer {
@ -42,8 +43,11 @@ private:
void InitConfigOptions();
bool InitFilterOptions();
bool InitFormatter();
bool InternalWrite(int fd, const char* data, int len);
bool InternalClose(int fd);
int fd;
gzFile gzfile;
string fname;
ODesc desc;
bool ascii_done;
@ -59,6 +63,7 @@ private:
string unset_field;
string meta_prefix;
int gzip_level; // level > 0 enables gzip compression
bool use_json;
string json_timestamps;

View file

@ -12,3 +12,4 @@ const empty_field: string;
const unset_field: string;
const use_json: bool;
const json_timestamps: JSON::TimestampFormat;
const gzip_level: count;

View file

@ -0,0 +1,10 @@
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path ssh-uncompressed
#open 2017-04-18-16-16-16
#fields b i e c p sn a d t iv s sc ss se vc ve f
#types bool int enum count port subnet addr double time interval string set[count] set[string] set[string] vector[count] vector[string] func
T -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14 1215620010.543210 100.000000 hurz 2,4,1,3 BB,AA,CC (empty) 10,20,30 (empty) SSH::foo\x0a{ \x0aif (0 < SSH::i) \x0a\x09return (Foo);\x0aelse\x0a\x09return (Bar);\x0a\x0a}
#close 2017-04-18-16-16-16

View file

@ -0,0 +1,10 @@
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path ssh
#open 2017-04-18-16-15-17
#fields b i e c p sn a d t iv s sc ss se vc ve f
#types bool int enum count port subnet addr double time interval string set[count] set[string] set[string] vector[count] vector[string] func
T -42 SSH::LOG 21 123 10.0.0.0/24 1.2.3.4 3.14 1215620010.543210 100.000000 hurz 2,4,1,3 BB,AA,CC (empty) 10,20,30 (empty) SSH::foo\x0a{ \x0aif (0 < SSH::i) \x0a\x09return (Foo);\x0aelse\x0a\x09return (Bar);\x0a\x0a}
#close 2017-04-18-16-15-17

View file

@ -0,0 +1,75 @@
#
# @TEST-EXEC: bro -b %INPUT
# @TEST-EXEC: gunzip ssh.log.gz
# @TEST-EXEC: btest-diff ssh.log
# @TEST-EXEC: btest-diff ssh-uncompressed.log
#
# Testing all possible types.
redef LogAscii::gzip_level = 9;
module SSH;
export {
redef enum Log::ID += { LOG };
type Log: record {
b: bool;
i: int;
e: Log::ID;
c: count;
p: port;
sn: subnet;
a: addr;
d: double;
t: time;
iv: interval;
s: string;
sc: set[count];
ss: set[string];
se: set[string];
vc: vector of count;
ve: vector of string;
f: function(i: count) : string;
} &log;
}
function foo(i : count) : string
{
if ( i > 0 )
return "Foo";
else
return "Bar";
}
event bro_init()
{
Log::create_stream(SSH::LOG, [$columns=Log]);
local filter = Log::Filter($name="ssh-uncompressed", $path="ssh-uncompressed",
$config = table(["gzip_level"] = "0"));
Log::add_filter(SSH::LOG, filter);
local empty_set: set[string];
local empty_vector: vector of string;
Log::write(SSH::LOG, [
$b=T,
$i=-42,
$e=SSH::LOG,
$c=21,
$p=123/tcp,
$sn=10.0.0.1/24,
$a=1.2.3.4,
$d=3.14,
$t=(strptime("%Y-%m-%dT%H:%M:%SZ", "2008-07-09T16:13:30Z") + 0.543210 secs),
$iv=100secs,
$s="hurz",
$sc=set(1,2,3,4),
$ss=set("AA", "BB", "CC"),
$se=empty_set,
$vc=vector(10, 20, 30),
$ve=empty_vector,
$f=foo
]);
}