Merge remote-tracking branch 'origin/topic/seth/json-formatter'

* origin/topic/seth/json-formatter:
  Updating a couple of tests.
  Expanded support for modifying the timestamp format in the JSON formatter.
  Ascii input reader now supports all config options per-input stream.
  Added an option to the JSON formatter to use ISO 8601 for timestamps.
  Refactored formatters and updated the the writers a bit.

Includes some minor bugfixes and cleanup at various places, including
in old code.
This commit is contained in:
Robin Sommer 2014-03-13 16:01:25 -07:00
commit ff261ea626
34 changed files with 1038 additions and 540 deletions

View file

@ -10,8 +10,8 @@
#include "Ascii.h"
using namespace logging;
using namespace writer;
using namespace logging::writer;
using namespace threading;
using threading::Value;
using threading::Field;
@ -20,9 +20,46 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend)
fd = 0;
ascii_done = false;
tsv = false;
}
Ascii::~Ascii()
{
if ( ! ascii_done )
{
fprintf(stderr, "internal error: finish missing\n");
abort();
}
delete formatter;
}
bool Ascii::WriteHeaderField(const string& key, const string& val)
{
string str = meta_prefix + key + separator + val + "\n";
return safe_write(fd, str.c_str(), str.length());
}
void Ascii::CloseFile(double t)
{
if ( ! fd )
return;
if ( include_meta && ! tsv )
WriteHeaderField("close", Timestamp(0));
safe_close(fd);
fd = 0;
}
bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const * fields)
{
assert(! fd);
// Set some default values.
output_to_stdout = BifConst::LogAscii::output_to_stdout;
include_meta = BifConst::LogAscii::include_meta;
use_json = BifConst::LogAscii::use_json;
separator.assign(
(const char*) BifConst::LogAscii::separator->Bytes(),
@ -49,45 +86,104 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend)
BifConst::LogAscii::meta_prefix->Len()
);
desc.EnableEscaping();
desc.AddEscapeSequence(separator);
ODesc tsfmt;
BifConst::LogAscii::json_timestamps->Describe(&tsfmt);
json_timestamps.assign(
(const char*) tsfmt.Bytes(),
tsfmt.Len()
);
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field));
}
Ascii::~Ascii()
{
if ( ! ascii_done )
// Set per-filter configuration options.
for ( WriterInfo::config_map::const_iterator i = info.config.begin(); i != info.config.end(); i++ )
{
fprintf(stderr, "internal error: finish missing\n");
abort();
if ( strcmp(i->first, "tsv") == 0 )
{
if ( strcmp(i->second, "T") == 0 )
tsv = true;
else if ( strcmp(i->second, "F") == 0 )
tsv = false;
else
{
Error("invalid value for 'tsv', must be a string and either \"T\" or \"F\"");
return false;
}
}
else if ( strcmp(i->first, "use_json") == 0 )
{
if ( strcmp(i->second, "T") == 0 )
use_json = true;
else if ( strcmp(i->second, "F") == 0 )
use_json = false;
else
{
Error("invalid value for 'use_json', must be a string and either \"T\" or \"F\"");
return false;
}
}
else if ( strcmp(i->first, "output_to_stdout") == 0 )
{
if ( strcmp(i->second, "T") == 0 )
output_to_stdout = true;
else if ( strcmp(i->second, "F") == 0 )
output_to_stdout = false;
else
{
Error("invalid value for 'output_to_stdout', must be a string and either \"T\" or \"F\"");
return false;
}
}
else if ( strcmp(i->first, "separator") == 0 )
separator.assign(i->second);
else if ( strcmp(i->first, "set_separator") == 0 )
set_separator.assign(i->second);
else if ( strcmp(i->first, "empty_field") == 0 )
empty_field.assign(i->second);
else if ( strcmp(i->first, "unset_field") == 0 )
unset_field.assign(i->second);
else if ( strcmp(i->first, "meta_prefix") == 0 )
meta_prefix.assign(i->second);
else if ( strcmp(i->first, "json_timestamps") == 0 )
json_timestamps.assign(i->second);
}
delete ascii;
}
if ( use_json )
{
formatter::JSON::TimeFormat tf = formatter::JSON::TS_EPOCH;
bool Ascii::WriteHeaderField(const string& key, const string& val)
{
string str = meta_prefix + key + separator + val + "\n";
// Write out JSON formatted logs.
if ( strcmp(json_timestamps.c_str(), "JSON::TS_EPOCH") == 0 )
tf = formatter::JSON::TS_EPOCH;
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS") == 0 )
tf = formatter::JSON::TS_MILLIS;
else if ( strcmp(json_timestamps.c_str(), "JSON::TS_ISO8601") == 0 )
tf = formatter::JSON::TS_ISO8601;
else
{
Error(Fmt("Invalid JSON timestamp format: %s", json_timestamps.c_str()));
return false;
}
return safe_write(fd, str.c_str(), str.length());
}
formatter = new formatter::JSON(this, tf);
// Using JSON implicitly turns off the header meta fields.
include_meta = false;
}
void Ascii::CloseFile(double t)
{
if ( ! fd )
return;
if ( include_meta && ! tsv )
WriteHeaderField("close", Timestamp(0));
safe_close(fd);
fd = 0;
}
bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const * fields)
{
assert(! fd);
else
{
// Use the default "Bro logs" format.
desc.EnableEscaping();
desc.AddEscapeSequence(separator);
formatter::Ascii::SeparatorInfo sep_info(separator, set_separator, unset_field, empty_field);
formatter = new formatter::Ascii(this, sep_info);
}
string path = info.path;
@ -106,24 +202,6 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
return false;
}
for ( WriterInfo::config_map::const_iterator i = info.config.begin(); i != info.config.end(); i++ )
{
if ( strcmp(i->first, "tsv") == 0 )
{
if ( strcmp(i->second, "T") == 0 )
tsv = true;
else if ( strcmp(i->second, "F") == 0 )
tsv = false;
else
{
Error("invalid value for 'tsv', must be a string and either \"T\" or \"F\"");
return false;
}
}
}
if ( include_meta )
{
string names;
@ -209,16 +287,10 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
desc.Clear();
for ( int i = 0; i < num_fields; i++ )
{
if ( i > 0 )
desc.AddRaw(separator);
if ( ! formatter->Describe(&desc, num_fields, fields, vals) )
return false;
if ( ! ascii->Describe(&desc, vals[i], fields[i]->name) )
return false;
}
desc.AddRaw("\n", 1);
desc.AddRaw("\n");
const char* bytes = (const char*)desc.Bytes();
int len = desc.Len();

View file

@ -6,7 +6,8 @@
#define LOGGING_WRITER_ASCII_H
#include "../WriterBackend.h"
#include "threading/AsciiFormatter.h"
#include "threading/formatters/Ascii.h"
#include "threading/formatters/JSON.h"
namespace logging { namespace writer {
@ -53,7 +54,10 @@ private:
string unset_field;
string meta_prefix;
AsciiFormatter* ascii;
bool use_json;
string json_timestamps;
threading::formatter::Formatter* formatter;
};
}

View file

@ -232,7 +232,8 @@ DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend)
ds_use_integer_for_time = BifConst::LogDataSeries::use_integer_for_time;
ds_set_separator = ",";
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo());
threading::formatter::Ascii::SeparatorInfo sep_info;
ascii = new threading::formatter::Ascii(this, sep_info);
compress_type = Extent::compress_none;
log_file = 0;

View file

@ -12,7 +12,7 @@
#include <DataSeries/GeneralField.hpp>
#include "../WriterBackend.h"
#include "threading/AsciiFormatter.h"
#include "threading/formatters/Ascii.h"
namespace logging { namespace writer {
@ -118,7 +118,7 @@ private:
bool ds_use_integer_for_time;
string ds_set_separator;
AsciiFormatter* ascii;
threading::formatter::Ascii* ascii;
};
}

View file

@ -16,7 +16,6 @@
#include "BroString.h"
#include "NetVar.h"
#include "threading/SerialTypes.h"
#include "threading/AsciiFormatter.h"
#include <curl/curl.h>
#include <curl/easy.h>
@ -53,13 +52,13 @@ ElasticSearch::ElasticSearch(WriterFrontend* frontend) : WriterBackend(frontend)
curl_handle = HTTPSetup();
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo());
json = new threading::formatter::JSON(this, threading::formatter::JSON::TS_MILLIS);
}
ElasticSearch::~ElasticSearch()
{
delete [] cluster_name;
delete ascii;
delete json;
}
bool ElasticSearch::DoInit(const WriterInfo& info, int num_fields, const threading::Field* const* fields)
@ -98,134 +97,6 @@ bool ElasticSearch::BatchIndex()
return true;
}
bool ElasticSearch::AddValueToBuffer(ODesc* b, Value* val)
{
switch ( val->type )
{
// ES treats 0 as false and any other value as true so bool types go here.
case TYPE_BOOL:
case TYPE_INT:
b->Add(val->val.int_val);
break;
case TYPE_COUNT:
case TYPE_COUNTER:
{
// ElasticSearch doesn't seem to support unsigned 64bit ints.
if ( val->val.uint_val >= INT64_MAX )
{
Error(Fmt("count value too large: %" PRIu64, val->val.uint_val));
b->AddRaw("null", 4);
}
else
b->Add(val->val.uint_val);
break;
}
case TYPE_PORT:
b->Add(val->val.port_val.port);
break;
case TYPE_SUBNET:
b->AddRaw("\"", 1);
b->Add(ascii->Render(val->val.subnet_val));
b->AddRaw("\"", 1);
break;
case TYPE_ADDR:
b->AddRaw("\"", 1);
b->Add(ascii->Render(val->val.addr_val));
b->AddRaw("\"", 1);
break;
case TYPE_DOUBLE:
case TYPE_INTERVAL:
b->Add(val->val.double_val);
break;
case TYPE_TIME:
{
// ElasticSearch uses milliseconds for timestamps and json only
// supports signed ints (uints can be too large).
uint64_t ts = (uint64_t) (val->val.double_val * 1000);
if ( ts >= INT64_MAX )
{
Error(Fmt("time value too large: %" PRIu64, ts));
b->AddRaw("null", 4);
}
else
b->Add(ts);
break;
}
case TYPE_ENUM:
case TYPE_STRING:
case TYPE_FILE:
case TYPE_FUNC:
{
b->AddRaw("\"", 1);
for ( int i = 0; i < val->val.string_val.length; ++i )
{
char c = val->val.string_val.data[i];
// 2byte Unicode escape special characters.
if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' )
{
static const char hex_chars[] = "0123456789abcdef";
b->AddRaw("\\u00", 4);
b->AddRaw(&hex_chars[(c & 0xf0) >> 4], 1);
b->AddRaw(&hex_chars[c & 0x0f], 1);
}
else
b->AddRaw(&c, 1);
}
b->AddRaw("\"", 1);
break;
}
case TYPE_TABLE:
{
b->AddRaw("[", 1);
for ( int j = 0; j < val->val.set_val.size; j++ )
{
if ( j > 0 )
b->AddRaw(",", 1);
AddValueToBuffer(b, val->val.set_val.vals[j]);
}
b->AddRaw("]", 1);
break;
}
case TYPE_VECTOR:
{
b->AddRaw("[", 1);
for ( int j = 0; j < val->val.vector_val.size; j++ )
{
if ( j > 0 )
b->AddRaw(",", 1);
AddValueToBuffer(b, val->val.vector_val.vals[j]);
}
b->AddRaw("]", 1);
break;
}
default:
return false;
}
return true;
}
bool ElasticSearch::AddFieldToBuffer(ODesc *b, Value* val, const Field* field)
{
if ( ! val->present )
return false;
b->AddRaw("\"", 1);
b->Add(field->name);
b->AddRaw("\":", 2);
AddValueToBuffer(b, val);
return true;
}
bool ElasticSearch::DoWrite(int num_fields, const Field* const * fields,
Value** vals)
{
@ -239,14 +110,7 @@ bool ElasticSearch::DoWrite(int num_fields, const Field* const * fields,
buffer.Add(Info().path);
buffer.AddRaw("\"}}\n", 4);
buffer.AddRaw("{", 1);
for ( int i = 0; i < num_fields; i++ )
{
if ( i > 0 && buffer.Bytes()[buffer.Len()] != ',' && vals[i]->present )
buffer.AddRaw(",", 1);
AddFieldToBuffer(&buffer, vals[i], fields[i]);
}
buffer.AddRaw("}\n", 2);
json->Describe(&buffer, num_fields, fields, vals);
counter++;
if ( counter >= BifConst::LogElasticSearch::max_batch_size ||

View file

@ -9,6 +9,7 @@
#define LOGGING_WRITER_ELASTICSEARCH_H
#include <curl/curl.h>
#include "threading/formatters/JSON.h"
#include "../WriterBackend.h"
namespace logging { namespace writer {
@ -73,7 +74,7 @@ private:
uint64 batch_size;
AsciiFormatter* ascii;
threading::formatter::JSON* json;
};
}

View file

@ -35,7 +35,8 @@ SQLite::SQLite(WriterFrontend* frontend)
BifConst::LogSQLite::empty_field->Len()
);
io = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field));
threading::formatter::Ascii::SeparatorInfo sep_info(string(), set_separator, unset_field, empty_field);
io = new threading::formatter::Ascii(this, sep_info);
}
SQLite::~SQLite()

View file

@ -9,7 +9,7 @@
#include "../WriterBackend.h"
#include "threading/AsciiFormatter.h"
#include "threading/formatters/Ascii.h"
#include "3rdparty/sqlite3.h"
namespace logging { namespace writer {
@ -51,7 +51,7 @@ private:
string unset_field;
string empty_field;
AsciiFormatter* io;
threading::formatter::Ascii* io;
};
}