diff --git a/CHANGES b/CHANGES index e39b7d253f..8a1d8f483e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,15 @@ +2.2-235 | 2014-03-13 16:21:19 -0700 + + * The Ascii writer has a new option LogAscii::use_json for writing + out logs as JSON. (Seth Hall) + + * Ascii input reader now supports all config options as per-input + stream "config" values. (Seth Hall) + + * Refactored formatters and updated the the writers a bit. (Seth + Hall) + 2.2-229 | 2014-03-13 14:58:30 -0700 * Refactoring analyzer manager code to reuse diff --git a/VERSION b/VERSION index 9e47c5d61f..71fffc0abf 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2-229 +2.2-235 diff --git a/scripts/base/frameworks/logging/writers/ascii.bro b/scripts/base/frameworks/logging/writers/ascii.bro index e510874951..50ce450bce 100644 --- a/scripts/base/frameworks/logging/writers/ascii.bro +++ b/scripts/base/frameworks/logging/writers/ascii.bro @@ -5,11 +5,11 @@ ##! ``config``: setting ``tsv`` to the string ``T`` turns the output into ##! "tab-separated-value" mode where only a single header row with the column ##! names is printed out as meta information, with no "# fields" prepended; no -##! other meta data gets included in that mode. -##! +##! other meta data gets included in that mode. +##! ##! Example filter using this:: -##! -##! local my_filter: Log::Filter = [$name = "my-filter", $writer = Log::WRITER_ASCII, $config = table(["tsv"] = "T")]; +##! +##! local my_filter: Log::Filter = [$name = "my-filter", $writer = Log::WRITER_ASCII, $config = table(["tsv"] = "T")]; ##! module LogAscii; @@ -17,27 +17,51 @@ module LogAscii; export { ## If true, output everything to stdout rather than ## into files. This is primarily for debugging purposes. + ## + ## This option is also available as a per-filter ``$config`` option. const output_to_stdout = F &redef; + ## If true, the default will be to write logs in a JSON format. + ## + ## This option is also available as a per-filter ``$config`` option. + const use_json = F &redef; + + ## Format of timestamps when writing out JSON. By default, the JSON formatter will + ## use double values for timestamps which represent the number of seconds from the + ## UNIX epoch. + const json_timestamps: JSON::TimestampFormat = JSON::TS_EPOCH &redef; + ## If true, include lines with log meta information such as column names ## with types, the values of ASCII logging options that are in use, and ## the time when the file was opened and closed (the latter at the end). + ## + ## If writing in JSON format, this is implicitly disabled. const include_meta = T &redef; ## Prefix for lines with meta information. + ## + ## This option is also available as a per-filter ``$config`` option. const meta_prefix = "#" &redef; ## Separator between fields. + ## + ## This option is also available as a per-filter ``$config`` option. const separator = Log::separator &redef; ## Separator between set elements. + ## + ## This option is also available as a per-filter ``$config`` option. const set_separator = Log::set_separator &redef; ## String to use for empty fields. This should be different from - ## *unset_field* to make the output unambiguous. + ## *unset_field* to make the output unambiguous. + ## + ## This option is also available as a per-filter ``$config`` option. const empty_field = Log::empty_field &redef; ## String to use for an unset &optional field. + ## + ## This option is also available as a per-filter ``$config`` option. const unset_field = Log::unset_field &redef; } diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index afb89ba1c7..87a7e6fe49 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -3057,6 +3057,24 @@ const record_all_packets = F &redef; ## .. bro:see:: conn_stats const ignore_keep_alive_rexmit = F &redef; +module JSON; +export { + type TimestampFormat: enum { + ## Timestamps will be formatted as UNIX epoch doubles. This is + ## the format that Bro typically writes out timestamps. + TS_EPOCH, + ## Timestamps will be formatted as unsigned integers that + ## represent the number of milliseconds since the UNIX + ## epoch. + TS_MILLIS, + ## Timestamps will be formatted in the ISO8601 DateTime format. + ## Subseconds are also included which isn't actually part of the + ## standard but most consumers that parse ISO8601 seem to be able + ## to cope with that. + TS_ISO8601, + }; +} + module Tunnel; export { ## The maximum depth of a tunnel to decapsulate until giving up. diff --git a/scripts/policy/tuning/json-logs.bro b/scripts/policy/tuning/json-logs.bro new file mode 100644 index 0000000000..e7daf35063 --- /dev/null +++ b/scripts/policy/tuning/json-logs.bro @@ -0,0 +1,4 @@ +##! Loading this script will cause all logs to be written +##! out as JSON by default. + +redef LogAscii::use_json=T; diff --git a/scripts/test-all-policy.bro b/scripts/test-all-policy.bro index 3a0bd17614..254ad69533 100644 --- a/scripts/test-all-policy.bro +++ b/scripts/test-all-policy.bro @@ -93,6 +93,7 @@ @load tuning/defaults/extracted_file_limits.bro @load tuning/defaults/packet-fragments.bro @load tuning/defaults/warnings.bro +@load tuning/json-logs.bro @load tuning/logs-to-elasticsearch.bro @load tuning/track-all-assets.bro diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ecf8683ddd..0e990dd37b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -335,11 +335,13 @@ set(bro_SRCS strsep.c modp_numtoa.c - threading/AsciiFormatter.cc threading/BasicThread.cc + threading/Formatter.cc threading/Manager.cc threading/MsgThread.cc threading/SerialTypes.cc + threading/formatters/Ascii.cc + threading/formatters/JSON.cc logging/Manager.cc logging/WriterBackend.cc diff --git a/src/input/readers/Ascii.cc b/src/input/readers/Ascii.cc index 3fc74480fc..9bef57c264 100644 --- a/src/input/readers/Ascii.cc +++ b/src/input/readers/Ascii.cc @@ -14,6 +14,7 @@ #include using namespace input::reader; +using namespace threading; using threading::Value; using threading::Field; @@ -50,32 +51,12 @@ Ascii::Ascii(ReaderFrontend *frontend) : ReaderBackend(frontend) { file = 0; mtime = 0; - - separator.assign( (const char*) BifConst::InputAscii::separator->Bytes(), - BifConst::InputAscii::separator->Len()); - - if ( separator.size() != 1 ) - Error("separator length has to be 1. Separator will be truncated."); - - set_separator.assign( (const char*) BifConst::InputAscii::set_separator->Bytes(), - BifConst::InputAscii::set_separator->Len()); - - if ( set_separator.size() != 1 ) - Error("set_separator length has to be 1. Separator will be truncated."); - - empty_field.assign( (const char*) BifConst::InputAscii::empty_field->Bytes(), - BifConst::InputAscii::empty_field->Len()); - - unset_field.assign( (const char*) BifConst::InputAscii::unset_field->Bytes(), - BifConst::InputAscii::unset_field->Len()); - - ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field)); -} + } Ascii::~Ascii() { DoClose(); - delete ascii; + delete formatter; } void Ascii::DoClose() @@ -90,7 +71,42 @@ void Ascii::DoClose() bool Ascii::DoInit(const ReaderInfo& info, int num_fields, const Field* const* fields) { - mtime = 0; + separator.assign( (const char*) BifConst::InputAscii::separator->Bytes(), + BifConst::InputAscii::separator->Len()); + + set_separator.assign( (const char*) BifConst::InputAscii::set_separator->Bytes(), + BifConst::InputAscii::set_separator->Len()); + + empty_field.assign( (const char*) BifConst::InputAscii::empty_field->Bytes(), + BifConst::InputAscii::empty_field->Len()); + + unset_field.assign( (const char*) BifConst::InputAscii::unset_field->Bytes(), + BifConst::InputAscii::unset_field->Len()); + + // Set per-filter configuration options. + for ( ReaderInfo::config_map::const_iterator i = info.config.begin(); i != info.config.end(); i++ ) + { + if ( strcmp(i->first, "separator") == 0 ) + separator.assign(i->second); + + else if ( strcmp(i->first, "set_separator") == 0 ) + set_separator.assign(i->second); + + else if ( strcmp(i->first, "empty_field") == 0 ) + empty_field.assign(i->second); + + else if ( strcmp(i->first, "unset_field") == 0 ) + unset_field.assign(i->second); + } + + if ( separator.size() != 1 ) + Error("separator length has to be 1. Separator will be truncated."); + + if ( set_separator.size() != 1 ) + Error("set_separator length has to be 1. Separator will be truncated."); + + formatter::Ascii::SeparatorInfo sep_info(separator, set_separator, unset_field, empty_field); + formatter = new formatter::Ascii(this, sep_info); file = new ifstream(info.source); if ( ! file->is_open() ) @@ -173,7 +189,6 @@ bool Ascii::ReadHeader(bool useCached) return false; } - FieldMapping f(field->name, field->type, field->subtype, ifields[field->name]); if ( field->secondary_name && strlen(field->secondary_name) != 0 ) @@ -200,7 +215,7 @@ bool Ascii::ReadHeader(bool useCached) bool Ascii::GetLine(string& str) { while ( getline(*file, str) ) - { + { if ( str[0] != '#' ) return true; @@ -282,7 +297,7 @@ bool Ascii::DoUpdate() file->sync(); - while ( GetLine(line ) ) + while ( GetLine(line) ) { // split on tabs bool error = false; @@ -332,7 +347,7 @@ bool Ascii::DoUpdate() return false; } - Value* val = ascii->ParseValue(stringfields[(*fit).position], (*fit).name, (*fit).type, (*fit).subtype); + Value* val = formatter->ParseValue(stringfields[(*fit).position], (*fit).name, (*fit).type, (*fit).subtype); if ( val == 0 ) { @@ -347,7 +362,7 @@ bool Ascii::DoUpdate() assert(val->type == TYPE_PORT ); // Error(Fmt("Got type %d != PORT with secondary position!", val->type)); - val->val.port_val.proto = ascii->ParseProto(stringfields[(*fit).secondary_position]); + val->val.port_val.proto = formatter->ParseProto(stringfields[(*fit).secondary_position]); } fields[fpos] = val; @@ -384,8 +399,9 @@ bool Ascii::DoUpdate() } bool Ascii::DoHeartbeat(double network_time, double current_time) -{ - switch ( Info().mode ) { + { + switch ( Info().mode ) + { case MODE_MANUAL: // yay, we do nothing :) break; @@ -398,7 +414,7 @@ bool Ascii::DoHeartbeat(double network_time, double current_time) default: assert(false); - } + } return true; } diff --git a/src/input/readers/Ascii.h b/src/input/readers/Ascii.h index 934ea9a258..5d6bc71d54 100644 --- a/src/input/readers/Ascii.h +++ b/src/input/readers/Ascii.h @@ -7,7 +7,7 @@ #include #include "../ReaderBackend.h" -#include "threading/AsciiFormatter.h" +#include "threading/formatters/Ascii.h" namespace input { namespace reader { @@ -64,7 +64,7 @@ private: string empty_field; string unset_field; - AsciiFormatter* ascii; + threading::formatter::Formatter* formatter; }; diff --git a/src/input/readers/Benchmark.cc b/src/input/readers/Benchmark.cc index ec6b382ebb..de7eae8cc8 100644 --- a/src/input/readers/Benchmark.cc +++ b/src/input/readers/Benchmark.cc @@ -29,7 +29,7 @@ Benchmark::Benchmark(ReaderFrontend *frontend) : ReaderBackend(frontend) heartbeatstarttime = 0; heartbeat_interval = double(BifConst::Threading::heartbeat_interval); - ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo()); + ascii = new threading::formatter::Ascii(this, threading::formatter::Ascii::SeparatorInfo()); } Benchmark::~Benchmark() diff --git a/src/input/readers/Benchmark.h b/src/input/readers/Benchmark.h index ad61b7d2a4..3296f3a85e 100644 --- a/src/input/readers/Benchmark.h +++ b/src/input/readers/Benchmark.h @@ -4,7 +4,7 @@ #define INPUT_READERS_BENCHMARK_H #include "../ReaderBackend.h" -#include "threading/AsciiFormatter.h" +#include "threading/formatters/Ascii.h" namespace input { namespace reader { @@ -40,7 +40,7 @@ private: double timedspread; double heartbeat_interval; - AsciiFormatter* ascii; + threading::formatter::Ascii* ascii; }; diff --git a/src/input/readers/SQLite.cc b/src/input/readers/SQLite.cc index 794b8059ba..d032f934a7 100644 --- a/src/input/readers/SQLite.cc +++ b/src/input/readers/SQLite.cc @@ -36,7 +36,7 @@ SQLite::SQLite(ReaderFrontend *frontend) BifConst::InputSQLite::empty_field->Len() ); - io = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field)); + io = new threading::formatter::Ascii(this, threading::formatter::Ascii::SeparatorInfo(string(), set_separator, unset_field, empty_field)); } SQLite::~SQLite() diff --git a/src/input/readers/SQLite.h b/src/input/readers/SQLite.h index cdea9808c0..a98b3e06b8 100644 --- a/src/input/readers/SQLite.h +++ b/src/input/readers/SQLite.h @@ -10,7 +10,7 @@ #include "../ReaderBackend.h" -#include "threading/AsciiFormatter.h" +#include "threading/formatters/Ascii.h" #include "3rdparty/sqlite3.h" namespace input { namespace reader { @@ -40,7 +40,7 @@ private: string query; sqlite3 *db; sqlite3_stmt *st; - AsciiFormatter* io; + threading::formatter::Ascii* io; string set_separator; string unset_field; diff --git a/src/logging.bif b/src/logging.bif index 1927e149c0..062e4dbe31 100644 --- a/src/logging.bif +++ b/src/logging.bif @@ -77,6 +77,8 @@ const separator: string; const set_separator: string; const empty_field: string; const unset_field: string; +const use_json: bool; +const json_timestamps: JSON::TimestampFormat; # Options for the DataSeries writer. diff --git a/src/logging/writers/Ascii.cc b/src/logging/writers/Ascii.cc index 1a9cc5c4cd..9b95639843 100644 --- a/src/logging/writers/Ascii.cc +++ b/src/logging/writers/Ascii.cc @@ -10,8 +10,8 @@ #include "Ascii.h" -using namespace logging; -using namespace writer; +using namespace logging::writer; +using namespace threading; using threading::Value; using threading::Field; @@ -20,9 +20,46 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend) fd = 0; ascii_done = false; tsv = false; + } +Ascii::~Ascii() + { + if ( ! ascii_done ) + { + fprintf(stderr, "internal error: finish missing\n"); + abort(); + } + + delete formatter; + } + +bool Ascii::WriteHeaderField(const string& key, const string& val) + { + string str = meta_prefix + key + separator + val + "\n"; + + return safe_write(fd, str.c_str(), str.length()); + } + +void Ascii::CloseFile(double t) + { + if ( ! fd ) + return; + + if ( include_meta && ! tsv ) + WriteHeaderField("close", Timestamp(0)); + + safe_close(fd); + fd = 0; + } + +bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const * fields) + { + assert(! fd); + + // Set some default values. output_to_stdout = BifConst::LogAscii::output_to_stdout; include_meta = BifConst::LogAscii::include_meta; + use_json = BifConst::LogAscii::use_json; separator.assign( (const char*) BifConst::LogAscii::separator->Bytes(), @@ -49,45 +86,104 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend) BifConst::LogAscii::meta_prefix->Len() ); - desc.EnableEscaping(); - desc.AddEscapeSequence(separator); + ODesc tsfmt; + BifConst::LogAscii::json_timestamps->Describe(&tsfmt); + json_timestamps.assign( + (const char*) tsfmt.Bytes(), + tsfmt.Len() + ); - ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field)); - } - -Ascii::~Ascii() - { - if ( ! ascii_done ) + // Set per-filter configuration options. + for ( WriterInfo::config_map::const_iterator i = info.config.begin(); i != info.config.end(); i++ ) { - fprintf(stderr, "internal error: finish missing\n"); - abort(); + if ( strcmp(i->first, "tsv") == 0 ) + { + if ( strcmp(i->second, "T") == 0 ) + tsv = true; + else if ( strcmp(i->second, "F") == 0 ) + tsv = false; + else + { + Error("invalid value for 'tsv', must be a string and either \"T\" or \"F\""); + return false; + } + } + + else if ( strcmp(i->first, "use_json") == 0 ) + { + if ( strcmp(i->second, "T") == 0 ) + use_json = true; + else if ( strcmp(i->second, "F") == 0 ) + use_json = false; + else + { + Error("invalid value for 'use_json', must be a string and either \"T\" or \"F\""); + return false; + } + } + + else if ( strcmp(i->first, "output_to_stdout") == 0 ) + { + if ( strcmp(i->second, "T") == 0 ) + output_to_stdout = true; + else if ( strcmp(i->second, "F") == 0 ) + output_to_stdout = false; + else + { + Error("invalid value for 'output_to_stdout', must be a string and either \"T\" or \"F\""); + return false; + } + } + + else if ( strcmp(i->first, "separator") == 0 ) + separator.assign(i->second); + + else if ( strcmp(i->first, "set_separator") == 0 ) + set_separator.assign(i->second); + + else if ( strcmp(i->first, "empty_field") == 0 ) + empty_field.assign(i->second); + + else if ( strcmp(i->first, "unset_field") == 0 ) + unset_field.assign(i->second); + + else if ( strcmp(i->first, "meta_prefix") == 0 ) + meta_prefix.assign(i->second); + + else if ( strcmp(i->first, "json_timestamps") == 0 ) + json_timestamps.assign(i->second); } - delete ascii; - } + if ( use_json ) + { + formatter::JSON::TimeFormat tf = formatter::JSON::TS_EPOCH; -bool Ascii::WriteHeaderField(const string& key, const string& val) - { - string str = meta_prefix + key + separator + val + "\n"; + // Write out JSON formatted logs. + if ( strcmp(json_timestamps.c_str(), "JSON::TS_EPOCH") == 0 ) + tf = formatter::JSON::TS_EPOCH; + else if ( strcmp(json_timestamps.c_str(), "JSON::TS_MILLIS") == 0 ) + tf = formatter::JSON::TS_MILLIS; + else if ( strcmp(json_timestamps.c_str(), "JSON::TS_ISO8601") == 0 ) + tf = formatter::JSON::TS_ISO8601; + else + { + Error(Fmt("Invalid JSON timestamp format: %s", json_timestamps.c_str())); + return false; + } - return safe_write(fd, str.c_str(), str.length()); - } + formatter = new formatter::JSON(this, tf); + // Using JSON implicitly turns off the header meta fields. + include_meta = false; + } -void Ascii::CloseFile(double t) - { - if ( ! fd ) - return; - - if ( include_meta && ! tsv ) - WriteHeaderField("close", Timestamp(0)); - - safe_close(fd); - fd = 0; - } - -bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const * fields) - { - assert(! fd); + else + { + // Use the default "Bro logs" format. + desc.EnableEscaping(); + desc.AddEscapeSequence(separator); + formatter::Ascii::SeparatorInfo sep_info(separator, set_separator, unset_field, empty_field); + formatter = new formatter::Ascii(this, sep_info); + } string path = info.path; @@ -106,24 +202,6 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const * return false; } - for ( WriterInfo::config_map::const_iterator i = info.config.begin(); i != info.config.end(); i++ ) - { - if ( strcmp(i->first, "tsv") == 0 ) - { - if ( strcmp(i->second, "T") == 0 ) - tsv = true; - - else if ( strcmp(i->second, "F") == 0 ) - tsv = false; - - else - { - Error("invalid value for 'tsv', must be a string and either \"T\" or \"F\""); - return false; - } - } - } - if ( include_meta ) { string names; @@ -209,16 +287,10 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields, desc.Clear(); - for ( int i = 0; i < num_fields; i++ ) - { - if ( i > 0 ) - desc.AddRaw(separator); + if ( ! formatter->Describe(&desc, num_fields, fields, vals) ) + return false; - if ( ! ascii->Describe(&desc, vals[i], fields[i]->name) ) - return false; - } - - desc.AddRaw("\n", 1); + desc.AddRaw("\n"); const char* bytes = (const char*)desc.Bytes(); int len = desc.Len(); diff --git a/src/logging/writers/Ascii.h b/src/logging/writers/Ascii.h index 0ddcada57b..15afdef62f 100644 --- a/src/logging/writers/Ascii.h +++ b/src/logging/writers/Ascii.h @@ -6,7 +6,8 @@ #define LOGGING_WRITER_ASCII_H #include "../WriterBackend.h" -#include "threading/AsciiFormatter.h" +#include "threading/formatters/Ascii.h" +#include "threading/formatters/JSON.h" namespace logging { namespace writer { @@ -53,7 +54,10 @@ private: string unset_field; string meta_prefix; - AsciiFormatter* ascii; + bool use_json; + string json_timestamps; + + threading::formatter::Formatter* formatter; }; } diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index 1073e31a08..71a6428950 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -232,7 +232,8 @@ DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend) ds_use_integer_for_time = BifConst::LogDataSeries::use_integer_for_time; ds_set_separator = ","; - ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo()); + threading::formatter::Ascii::SeparatorInfo sep_info; + ascii = new threading::formatter::Ascii(this, sep_info); compress_type = Extent::compress_none; log_file = 0; diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h index 4fdc54eaa5..fe095bcb37 100644 --- a/src/logging/writers/DataSeries.h +++ b/src/logging/writers/DataSeries.h @@ -12,7 +12,7 @@ #include #include "../WriterBackend.h" -#include "threading/AsciiFormatter.h" +#include "threading/formatters/Ascii.h" namespace logging { namespace writer { @@ -118,7 +118,7 @@ private: bool ds_use_integer_for_time; string ds_set_separator; - AsciiFormatter* ascii; + threading::formatter::Ascii* ascii; }; } diff --git a/src/logging/writers/ElasticSearch.cc b/src/logging/writers/ElasticSearch.cc index b22c3f4ab6..1096316afd 100644 --- a/src/logging/writers/ElasticSearch.cc +++ b/src/logging/writers/ElasticSearch.cc @@ -16,7 +16,6 @@ #include "BroString.h" #include "NetVar.h" #include "threading/SerialTypes.h" -#include "threading/AsciiFormatter.h" #include #include @@ -53,13 +52,13 @@ ElasticSearch::ElasticSearch(WriterFrontend* frontend) : WriterBackend(frontend) curl_handle = HTTPSetup(); - ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo()); + json = new threading::formatter::JSON(this, threading::formatter::JSON::TS_MILLIS); } ElasticSearch::~ElasticSearch() { delete [] cluster_name; - delete ascii; + delete json; } bool ElasticSearch::DoInit(const WriterInfo& info, int num_fields, const threading::Field* const* fields) @@ -98,134 +97,6 @@ bool ElasticSearch::BatchIndex() return true; } -bool ElasticSearch::AddValueToBuffer(ODesc* b, Value* val) - { - switch ( val->type ) - { - // ES treats 0 as false and any other value as true so bool types go here. - case TYPE_BOOL: - case TYPE_INT: - b->Add(val->val.int_val); - break; - - case TYPE_COUNT: - case TYPE_COUNTER: - { - // ElasticSearch doesn't seem to support unsigned 64bit ints. - if ( val->val.uint_val >= INT64_MAX ) - { - Error(Fmt("count value too large: %" PRIu64, val->val.uint_val)); - b->AddRaw("null", 4); - } - else - b->Add(val->val.uint_val); - break; - } - - case TYPE_PORT: - b->Add(val->val.port_val.port); - break; - - case TYPE_SUBNET: - b->AddRaw("\"", 1); - b->Add(ascii->Render(val->val.subnet_val)); - b->AddRaw("\"", 1); - break; - - case TYPE_ADDR: - b->AddRaw("\"", 1); - b->Add(ascii->Render(val->val.addr_val)); - b->AddRaw("\"", 1); - break; - - case TYPE_DOUBLE: - case TYPE_INTERVAL: - b->Add(val->val.double_val); - break; - - case TYPE_TIME: - { - // ElasticSearch uses milliseconds for timestamps and json only - // supports signed ints (uints can be too large). - uint64_t ts = (uint64_t) (val->val.double_val * 1000); - if ( ts >= INT64_MAX ) - { - Error(Fmt("time value too large: %" PRIu64, ts)); - b->AddRaw("null", 4); - } - else - b->Add(ts); - break; - } - - case TYPE_ENUM: - case TYPE_STRING: - case TYPE_FILE: - case TYPE_FUNC: - { - b->AddRaw("\"", 1); - for ( int i = 0; i < val->val.string_val.length; ++i ) - { - char c = val->val.string_val.data[i]; - // 2byte Unicode escape special characters. - if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' ) - { - static const char hex_chars[] = "0123456789abcdef"; - b->AddRaw("\\u00", 4); - b->AddRaw(&hex_chars[(c & 0xf0) >> 4], 1); - b->AddRaw(&hex_chars[c & 0x0f], 1); - } - else - b->AddRaw(&c, 1); - } - b->AddRaw("\"", 1); - break; - } - - case TYPE_TABLE: - { - b->AddRaw("[", 1); - for ( int j = 0; j < val->val.set_val.size; j++ ) - { - if ( j > 0 ) - b->AddRaw(",", 1); - AddValueToBuffer(b, val->val.set_val.vals[j]); - } - b->AddRaw("]", 1); - break; - } - - case TYPE_VECTOR: - { - b->AddRaw("[", 1); - for ( int j = 0; j < val->val.vector_val.size; j++ ) - { - if ( j > 0 ) - b->AddRaw(",", 1); - AddValueToBuffer(b, val->val.vector_val.vals[j]); - } - b->AddRaw("]", 1); - break; - } - - default: - return false; - } - return true; - } - -bool ElasticSearch::AddFieldToBuffer(ODesc *b, Value* val, const Field* field) - { - if ( ! val->present ) - return false; - - b->AddRaw("\"", 1); - b->Add(field->name); - b->AddRaw("\":", 2); - AddValueToBuffer(b, val); - return true; - } - bool ElasticSearch::DoWrite(int num_fields, const Field* const * fields, Value** vals) { @@ -239,14 +110,7 @@ bool ElasticSearch::DoWrite(int num_fields, const Field* const * fields, buffer.Add(Info().path); buffer.AddRaw("\"}}\n", 4); - buffer.AddRaw("{", 1); - for ( int i = 0; i < num_fields; i++ ) - { - if ( i > 0 && buffer.Bytes()[buffer.Len()] != ',' && vals[i]->present ) - buffer.AddRaw(",", 1); - AddFieldToBuffer(&buffer, vals[i], fields[i]); - } - buffer.AddRaw("}\n", 2); + json->Describe(&buffer, num_fields, fields, vals); counter++; if ( counter >= BifConst::LogElasticSearch::max_batch_size || diff --git a/src/logging/writers/ElasticSearch.h b/src/logging/writers/ElasticSearch.h index b7aba09964..283fff2972 100644 --- a/src/logging/writers/ElasticSearch.h +++ b/src/logging/writers/ElasticSearch.h @@ -9,6 +9,7 @@ #define LOGGING_WRITER_ELASTICSEARCH_H #include +#include "threading/formatters/JSON.h" #include "../WriterBackend.h" namespace logging { namespace writer { @@ -73,7 +74,7 @@ private: uint64 batch_size; - AsciiFormatter* ascii; + threading::formatter::JSON* json; }; } diff --git a/src/logging/writers/SQLite.cc b/src/logging/writers/SQLite.cc index 25f5cb012e..44d01ec73f 100644 --- a/src/logging/writers/SQLite.cc +++ b/src/logging/writers/SQLite.cc @@ -35,7 +35,8 @@ SQLite::SQLite(WriterFrontend* frontend) BifConst::LogSQLite::empty_field->Len() ); - io = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field)); + threading::formatter::Ascii::SeparatorInfo sep_info(string(), set_separator, unset_field, empty_field); + io = new threading::formatter::Ascii(this, sep_info); } SQLite::~SQLite() diff --git a/src/logging/writers/SQLite.h b/src/logging/writers/SQLite.h index b70381ebf6..a962e903ff 100644 --- a/src/logging/writers/SQLite.h +++ b/src/logging/writers/SQLite.h @@ -9,7 +9,7 @@ #include "../WriterBackend.h" -#include "threading/AsciiFormatter.h" +#include "threading/formatters/Ascii.h" #include "3rdparty/sqlite3.h" namespace logging { namespace writer { @@ -51,7 +51,7 @@ private: string unset_field; string empty_field; - AsciiFormatter* io; + threading::formatter::Ascii* io; }; } diff --git a/src/threading/AsciiFormatter.h b/src/threading/AsciiFormatter.h deleted file mode 100644 index 1522e46191..0000000000 --- a/src/threading/AsciiFormatter.h +++ /dev/null @@ -1,142 +0,0 @@ -// See the file "COPYING" in the main distribution directory for copyright. - -#ifndef THREADING_ASCII_FORMATTER_H -#define THREADING_ASCII_FORMATTER_H - -#include "../Desc.h" -#include "MsgThread.h" - -/** - * A thread-safe class for converting values into a readable ASCII - * representation, and vice versa. This is a utility class that factors out - * common rendering/parsing code needed by a number of input/output threads. - */ -class AsciiFormatter { -public: - /** - * A struct to pass the necessary configuration values to the - * AsciiFormatter module on initialization. - */ - struct SeparatorInfo - { - string set_separator; // Separator between set elements. - string unset_field; // String marking an unset field. - string empty_field; // String marking an empty (but set) field. - - /** - * Constructor that leaves separators etc unset to dummy - * values. Useful if you use only methods that don't need any - * of them, like StringToAddr, etc. - */ - SeparatorInfo(); - - /** - * Constructor that defines all the configuration options. - * Use if you need either ValToODesc or EntryToVal. - */ - SeparatorInfo(const string& set_separator, const string& unset_field, const string& empty_field); - }; - - /** - * Constructor. - * - * @param t The thread that uses this class instance. The class uses - * some of the thread's methods, e.g., for error reporting and - * internal formatting. - * - * @param info SeparatorInfo structure defining the necessary - * separators. - */ - AsciiFormatter(threading::MsgThread* t, const SeparatorInfo info); - - /** - * Destructor. - */ - ~AsciiFormatter(); - - /** - * Convert a threading value into a corresponding ASCII. - * representation. - * - * @param desc The ODesc object to write to. - * - * @param val the Value to render to the ODesc object. - * - * @param The name of a field associated with the value. Used only - * for error reporting. - * - * @return Returns true on success, false on error. Errors are also - * flagged via the reporter. - */ - bool Describe(ODesc* desc, threading::Value* val, const string& name) const; - - /** - * Convert an IP address into a string. - * - * @param addr The address. - * - * @return An ASCII representation of the address. - */ - string Render(const threading::Value::addr_t& addr) const; - - /** - * Convert an subnet value into a string. - * - * @param addr The address. - * - * @return An ASCII representation of the subnet. - */ - string Render(const threading::Value::subnet_t& subnet) const; - - /** - * Convert a double into a string. This renders the double with Bro's - * standard precision. - * - * @param d The double. - * - * @return An ASCII representation of the double. - */ - string Render(double d) const; - - /** - * Convert the ASCII representation of a field into a value. - * - * @param s The string to parse. - * - * @param The name of a field associated with the value. Used only - * for error reporting. - * - * @return The new value, or null on error. Errors are also flagged - * via the reporter. - */ - threading::Value* ParseValue(string s, string name, TypeTag type, TypeTag subtype = TYPE_ERROR) const; - - /** - * Convert a string into a TransportProto. The string must be one of - * \c tcp, \c udp, \c icmp, or \c unknown. - * - * @param proto The transport protocol - * - * @return The transport protocol, which will be \c TRANSPORT_UNKNOWN - * on error. Errors are also flagged via the reporter. - */ - TransportProto ParseProto(const string &proto) const; - - /** - * Convert a string into a Value::addr_t. - * - * @param addr String containing an IPv4 or IPv6 address. - * - * @return The address, which will be all-zero on error. Errors are - * also flagged via the reporter. - */ - threading::Value::addr_t ParseAddr(const string &addr) const; - -private: - bool CheckNumberError(const string& s, const char * end) const; - - SeparatorInfo separators; - threading::MsgThread* thread; -}; - -#endif /* THREADING_ASCII_FORMATTER_H */ diff --git a/src/threading/Formatter.cc b/src/threading/Formatter.cc new file mode 100644 index 0000000000..f003f37d29 --- /dev/null +++ b/src/threading/Formatter.cc @@ -0,0 +1,113 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "config.h" + +#include +#include + +#include "Formatter.h" +#include "bro_inet_ntop.h" + +using namespace threading; +using namespace formatter; +using threading::Value; +using threading::Field; + +Formatter::Formatter(threading::MsgThread* t) + { + thread = t; + } + +Formatter::~Formatter() + { + } + +string Formatter::Render(const threading::Value::addr_t& addr) const + { + if ( addr.family == IPv4 ) + { + char s[INET_ADDRSTRLEN]; + + if ( ! bro_inet_ntop(AF_INET, &addr.in.in4, s, INET_ADDRSTRLEN) ) + return ""; + else + return s; + } + else + { + char s[INET6_ADDRSTRLEN]; + + if ( ! bro_inet_ntop(AF_INET6, &addr.in.in6, s, INET6_ADDRSTRLEN) ) + return ""; + else + return s; + } + } + +TransportProto Formatter::ParseProto(const string &proto) const + { + if ( proto == "unknown" ) + return TRANSPORT_UNKNOWN; + else if ( proto == "tcp" ) + return TRANSPORT_TCP; + else if ( proto == "udp" ) + return TRANSPORT_UDP; + else if ( proto == "icmp" ) + return TRANSPORT_ICMP; + + thread->Error(thread->Fmt("Tried to parse invalid/unknown protocol: %s", proto.c_str())); + + return TRANSPORT_UNKNOWN; + } + + +// More or less verbose copy from IPAddr.cc -- which uses reporter. +threading::Value::addr_t Formatter::ParseAddr(const string &s) const + { + threading::Value::addr_t val; + + if ( s.find(':') == std::string::npos ) // IPv4. + { + val.family = IPv4; + + if ( inet_aton(s.c_str(), &(val.in.in4)) <= 0 ) + { + thread->Error(thread->Fmt("Bad address: %s", s.c_str())); + memset(&val.in.in4.s_addr, 0, sizeof(val.in.in4.s_addr)); + } + } + + else + { + val.family = IPv6; + if ( inet_pton(AF_INET6, s.c_str(), val.in.in6.s6_addr) <=0 ) + { + thread->Error(thread->Fmt("Bad address: %s", s.c_str())); + memset(val.in.in6.s6_addr, 0, sizeof(val.in.in6.s6_addr)); + } + } + + return val; + } + +string Formatter::Render(const threading::Value::subnet_t& subnet) const + { + char l[16]; + + if ( subnet.prefix.family == IPv4 ) + modp_uitoa10(subnet.length - 96, l); + else + modp_uitoa10(subnet.length, l); + + string s = Render(subnet.prefix) + "/" + l; + + return s; + } + +string Formatter::Render(double d) const + { + char buf[256]; + modp_dtoa(d, buf, 6); + return buf; + } + diff --git a/src/threading/Formatter.h b/src/threading/Formatter.h new file mode 100644 index 0000000000..9f69a3879a --- /dev/null +++ b/src/threading/Formatter.h @@ -0,0 +1,153 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef THREADING_FORMATTER_H +#define THREADING_FORMATTER_H + +#include "../Desc.h" +#include "MsgThread.h" + +namespace threading { namespace formatter { + +/** + * A thread-safe class for converting values into some textual format. This + * is a base class that implements the interface for common + * rendering/parsing code needed by a number of input/output threads. + */ +class Formatter { +public: + /** + * Constructor. + * + * @param t The thread that uses this class instance. The class uses + * some of the thread's methods, e.g., for error reporting and + * internal formatting. + * + */ + Formatter(threading::MsgThread* t); + + /** + * Destructor. + */ + virtual ~Formatter(); + + /** + * Convert a list of threading values into an implementation specific + * textual representation. + * + * @param desc The ODesc object to write to. + * + * @param num_fields The number of fields in the logging record. + * + * @param fields Information about the fields for each of the given + * log values. + * + * @param vals The field values. + * + * @return Returns true on success, false on error. Errors must also + * be flagged via the thread. + */ + virtual bool Describe(ODesc* desc, int num_fields, const threading::Field* const * fields, + threading::Value** vals) const = 0; + + /** + * Convert a single threading value into an implementation-specific + * representation. + * + * @param desc The ODesc object to write to. + * + * @param val the Value to render to the ODesc object. + * + * @param The name of a field associated with the value. + * + * @return Returns true on success, false on error. Errors are also + * flagged via the thread. + */ + virtual bool Describe(ODesc* desc, threading::Value* val, const string& name = "") const = 0; + + /** + * Convert an implementation-specific textual representation of a + * field into a value. + * + * @param s The string to parse. + * + * @param The name of a field associated with the value. Used only + * for error reporting. + * + * @return The new value, or null on error. Errors must also be + * flagged via the thread. + */ + virtual threading::Value* ParseValue(string s, string name, TypeTag type, TypeTag subtype = TYPE_ERROR) const = 0; + + /** + * Convert an IP address into a string. + * + * This is a helper function that formatter implementations may use. + * + * @param addr The address. + * + * @return An ASCII representation of the address. + */ + string Render(const threading::Value::addr_t& addr) const; + + /** + * Convert an subnet value into a string. + * + * This is a helper function that formatter implementations may use. + * + * @param addr The address. + * + * @return An ASCII representation of the subnet. + */ + string Render(const threading::Value::subnet_t& subnet) const; + + /** + * Convert a double into a string. This renders the double with Bro's + * standard precision. + * + * This is a helper function that formatter implementations may use. + * + * @param d The double. + * + * @return An ASCII representation of the double. + */ + string Render(double d) const; + + /** + * Convert a string into a TransportProto. The string must be one of + * \c tcp, \c udp, \c icmp, or \c unknown. + * + * This is a helper function that formatter implementations may use. + * + * @param proto The transport protocol + * + * @return The transport protocol, which will be \c TRANSPORT_UNKNOWN + * on error. Errors are also flagged via the thread. + */ + TransportProto ParseProto(const string &proto) const; + + /** + * Convert a string into a Value::addr_t. + * + * This is a helper function that formatter implementations may use. + * + * @param addr String containing an IPv4 or IPv6 address. + * + * @return The address, which will be all-zero on error. Errors are + * also flagged via the thread. + */ + threading::Value::addr_t ParseAddr(const string &addr) const; + +protected: + /** + * Returns the thread associated with the formatter via the + * constructor. + */ + threading::MsgThread* GetThread() const { return thread; } + +private: + threading::MsgThread* thread; +}; + +}} + +#endif /* THREADING_FORMATTER_H */ diff --git a/src/threading/AsciiFormatter.cc b/src/threading/formatters/Ascii.cc similarity index 61% rename from src/threading/AsciiFormatter.cc rename to src/threading/formatters/Ascii.cc index 616abbe2b6..c400f8a7ad 100644 --- a/src/threading/AsciiFormatter.cc +++ b/src/threading/formatters/Ascii.cc @@ -5,35 +5,54 @@ #include #include -#include "AsciiFormatter.h" -#include "bro_inet_ntop.h" +#include "./Ascii.h" -AsciiFormatter::SeparatorInfo::SeparatorInfo() +using namespace threading::formatter; + +Ascii::SeparatorInfo::SeparatorInfo() { - this->set_separator = "SHOULD_NOT_BE_USED"; - this->unset_field = "SHOULD_NOT_BE_USED"; - this->empty_field = "SHOULD_NOT_BE_USED"; + separator = "SHOULD_NOT_BE_USED"; + set_separator = "SHOULD_NOT_BE_USED"; + unset_field = "SHOULD_NOT_BE_USED"; + empty_field = "SHOULD_NOT_BE_USED"; } -AsciiFormatter::SeparatorInfo::SeparatorInfo(const string & set_separator, - const string & unset_field, const string & empty_field) +Ascii::SeparatorInfo::SeparatorInfo(const string& arg_separator, + const string& arg_set_separator, + const string& arg_unset_field, + const string& arg_empty_field) { - this->set_separator = set_separator; - this->unset_field = unset_field; - this->empty_field = empty_field; + separator = arg_separator; + set_separator = arg_set_separator; + unset_field = arg_unset_field; + empty_field = arg_empty_field; } -AsciiFormatter::AsciiFormatter(threading::MsgThread* t, const SeparatorInfo info) +Ascii::Ascii(threading::MsgThread* t, const SeparatorInfo& info) : Formatter(t) { - thread = t; - this->separators = info; + separators = info; } -AsciiFormatter::~AsciiFormatter() +Ascii::~Ascii() { } -bool AsciiFormatter::Describe(ODesc* desc, threading::Value* val, const string& name) const +bool Ascii::Describe(ODesc* desc, int num_fields, const threading::Field* const * fields, + threading::Value** vals) const + { + for ( int i = 0; i < num_fields; i++ ) + { + if ( i > 0 ) + desc->AddRaw(separators.separator); + + if ( ! Describe(desc, vals[i], fields[i]->name) ) + return false; + } + + return true; + } + +bool Ascii::Describe(ODesc* desc, threading::Value* val, const string& name) const { if ( ! val->present ) { @@ -128,17 +147,19 @@ bool AsciiFormatter::Describe(ODesc* desc, threading::Value* val, const string& } desc->AddEscapeSequence(separators.set_separator); + for ( int j = 0; j < val->val.set_val.size; j++ ) { if ( j > 0 ) desc->AddRaw(separators.set_separator); - if ( ! Describe(desc, val->val.set_val.vals[j], name) ) + if ( ! Describe(desc, val->val.set_val.vals[j]) ) { desc->RemoveEscapeSequence(separators.set_separator); return false; } } + desc->RemoveEscapeSequence(separators.set_separator); break; @@ -153,24 +174,26 @@ bool AsciiFormatter::Describe(ODesc* desc, threading::Value* val, const string& } desc->AddEscapeSequence(separators.set_separator); + for ( int j = 0; j < val->val.vector_val.size; j++ ) { if ( j > 0 ) desc->AddRaw(separators.set_separator); - if ( ! Describe(desc, val->val.vector_val.vals[j], name) ) + if ( ! Describe(desc, val->val.vector_val.vals[j]) ) { desc->RemoveEscapeSequence(separators.set_separator); return false; } } + desc->RemoveEscapeSequence(separators.set_separator); break; } default: - thread->Error(thread->Fmt("unsupported field format %d for %s", val->type, name.c_str())); + GetThread()->Error(GetThread()->Fmt("Ascii writer unsupported field format %d", val->type)); return false; } @@ -178,12 +201,13 @@ bool AsciiFormatter::Describe(ODesc* desc, threading::Value* val, const string& } -threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type, TypeTag subtype) const +threading::Value* Ascii::ParseValue(string s, string name, TypeTag type, TypeTag subtype) const { if ( s.compare(separators.unset_field) == 0 ) // field is not set... return new threading::Value(type, false); threading::Value* val = new threading::Value(type, true); + const char* start = s.c_str(); char* end = 0; errno = 0; @@ -202,36 +226,36 @@ threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type val->val.int_val = 0; else { - thread->Error(thread->Fmt("Field: %s Invalid value for boolean: %s", - name.c_str(), s.c_str())); + GetThread()->Error(GetThread()->Fmt("Field: %s Invalid value for boolean: %s", + name.c_str(), start)); goto parse_error; } break; case TYPE_INT: - val->val.int_val = strtoll(s.c_str(), &end, 10); - if ( CheckNumberError(s, end) ) + val->val.int_val = strtoll(start, &end, 10); + if ( CheckNumberError(start, end) ) goto parse_error; break; case TYPE_DOUBLE: case TYPE_TIME: case TYPE_INTERVAL: - val->val.double_val = strtod(s.c_str(), &end); - if ( CheckNumberError(s, end) ) + val->val.double_val = strtod(start, &end); + if ( CheckNumberError(start, end) ) goto parse_error; break; case TYPE_COUNT: case TYPE_COUNTER: - val->val.uint_val = strtoull(s.c_str(), &end, 10); - if ( CheckNumberError(s, end) ) + val->val.uint_val = strtoull(start, &end, 10); + if ( CheckNumberError(start, end) ) goto parse_error; break; case TYPE_PORT: - val->val.port_val.port = strtoull(s.c_str(), &end, 10); - if ( CheckNumberError(s, end) ) + val->val.port_val.port = strtoull(start, &end, 10); + if ( CheckNumberError(start, end) ) goto parse_error; val->val.port_val.proto = TRANSPORT_UNKNOWN; @@ -243,14 +267,14 @@ threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type size_t pos = s.find("/"); if ( pos == s.npos ) { - thread->Error(thread->Fmt("Invalid value for subnet: %s", s.c_str())); + GetThread()->Error(GetThread()->Fmt("Invalid value for subnet: %s", start)); goto parse_error; } string width_str = s.substr(pos + 1); uint8_t width = (uint8_t) strtol(width_str.c_str(), &end, 10); - if ( CheckNumberError(s, end) ) + if ( CheckNumberError(start, end) ) goto parse_error; string addr = s.substr(0, pos); @@ -316,7 +340,7 @@ threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type if ( pos >= length ) { - thread->Error(thread->Fmt("Internal error while parsing set. pos %d >= length %d." + GetThread()->Error(GetThread()->Fmt("Internal error while parsing set. pos %d >= length %d." " Element: %s", pos, length, element.c_str())); error = true; break; @@ -325,7 +349,7 @@ threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type threading::Value* newval = ParseValue(element, name, subtype); if ( newval == 0 ) { - thread->Error("Error while reading set or vector"); + GetThread()->Error("Error while reading set or vector"); error = true; break; } @@ -343,7 +367,7 @@ threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type lvals[pos] = ParseValue("", name, subtype); if ( lvals[pos] == 0 ) { - thread->Error("Error while trying to add empty set element"); + GetThread()->Error("Error while trying to add empty set element"); goto parse_error; } @@ -362,7 +386,7 @@ threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type if ( pos != length ) { - thread->Error(thread->Fmt("Internal error while parsing set: did not find all elements: %s", s.c_str())); + GetThread()->Error(GetThread()->Fmt("Internal error while parsing set: did not find all elements: %s", start)); goto parse_error; } @@ -370,8 +394,8 @@ threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type } default: - thread->Error(thread->Fmt("unsupported field format %d for %s", type, - name.c_str())); + GetThread()->Error(GetThread()->Fmt("unsupported field format %d for %s", type, + name.c_str())); goto parse_error; } @@ -382,128 +406,35 @@ parse_error: return 0; } -bool AsciiFormatter::CheckNumberError(const string& s, const char* end) const +bool Ascii::CheckNumberError(const char* start, const char* end) const { - // Do this check first, before executing s.c_str() or similar. - // otherwise the value to which *end is pointing at the moment might - // be gone ... - bool endnotnull = (*end != '\0'); + threading::MsgThread* thread = GetThread(); - if ( s.length() == 0 ) + if ( end == start && *end != '\0' ) { + thread->Error(thread->Fmt("String '%s' contained no parseable number", start)); + return true; + } + + if ( end - start == 0 && *end == '\0' ) { thread->Error("Got empty string for number field"); return true; } - if ( end == s.c_str() ) { - thread->Error(thread->Fmt("String '%s' contained no parseable number", s.c_str())); - return true; - } - - if ( endnotnull ) - thread->Warning(thread->Fmt("Number '%s' contained non-numeric trailing characters. Ignored trailing characters '%s'", s.c_str(), end)); + if ( (*end != '\0') ) + thread->Warning(thread->Fmt("Number '%s' contained non-numeric trailing characters. Ignored trailing characters '%s'", start, end)); if ( errno == EINVAL ) { - thread->Error(thread->Fmt("String '%s' could not be converted to a number", s.c_str())); + thread->Error(thread->Fmt("String '%s' could not be converted to a number", start)); return true; } else if ( errno == ERANGE ) { - thread->Error(thread->Fmt("Number '%s' out of supported range.", s.c_str())); + thread->Error(thread->Fmt("Number '%s' out of supported range.", start)); return true; } return false; } - -string AsciiFormatter::Render(const threading::Value::addr_t& addr) const - { - if ( addr.family == IPv4 ) - { - char s[INET_ADDRSTRLEN]; - - if ( ! bro_inet_ntop(AF_INET, &addr.in.in4, s, INET_ADDRSTRLEN) ) - return ""; - else - return s; - } - else - { - char s[INET6_ADDRSTRLEN]; - - if ( ! bro_inet_ntop(AF_INET6, &addr.in.in6, s, INET6_ADDRSTRLEN) ) - return ""; - else - return s; - } - } - -TransportProto AsciiFormatter::ParseProto(const string &proto) const - { - if ( proto == "unknown" ) - return TRANSPORT_UNKNOWN; - else if ( proto == "tcp" ) - return TRANSPORT_TCP; - else if ( proto == "udp" ) - return TRANSPORT_UDP; - else if ( proto == "icmp" ) - return TRANSPORT_ICMP; - - thread->Error(thread->Fmt("Tried to parse invalid/unknown protocol: %s", proto.c_str())); - - return TRANSPORT_UNKNOWN; - } - - -// More or less verbose copy from IPAddr.cc -- which uses reporter. -threading::Value::addr_t AsciiFormatter::ParseAddr(const string &s) const - { - threading::Value::addr_t val; - - if ( s.find(':') == std::string::npos ) // IPv4. - { - val.family = IPv4; - - if ( inet_aton(s.c_str(), &(val.in.in4)) <= 0 ) - { - thread->Error(thread->Fmt("Bad address: %s", s.c_str())); - memset(&val.in.in4.s_addr, 0, sizeof(val.in.in4.s_addr)); - } - } - - else - { - val.family = IPv6; - if ( inet_pton(AF_INET6, s.c_str(), val.in.in6.s6_addr) <=0 ) - { - thread->Error(thread->Fmt("Bad address: %s", s.c_str())); - memset(val.in.in6.s6_addr, 0, sizeof(val.in.in6.s6_addr)); - } - } - - return val; - } - -string AsciiFormatter::Render(const threading::Value::subnet_t& subnet) const - { - char l[16]; - - if ( subnet.prefix.family == IPv4 ) - modp_uitoa10(subnet.length - 96, l); - else - modp_uitoa10(subnet.length, l); - - string s = Render(subnet.prefix) + "/" + l; - - return s; - } - -string AsciiFormatter::Render(double d) const - { - char buf[256]; - modp_dtoa(d, buf, 6); - return buf; - } - diff --git a/src/threading/formatters/Ascii.h b/src/threading/formatters/Ascii.h new file mode 100644 index 0000000000..ab5e29c1f0 --- /dev/null +++ b/src/threading/formatters/Ascii.h @@ -0,0 +1,63 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef THREADING_FORMATTERS_ASCII_H +#define THREADING_FORMATTERS_ASCII_H + +#include "../Formatter.h" + +namespace threading { namespace formatter { + +class Ascii : public Formatter { +public: + /** + * A struct to pass the necessary configuration values to the + * Ascii module on initialization. + */ + struct SeparatorInfo + { + string separator; // Separator between columns + string set_separator; // Separator between set elements. + string unset_field; // String marking an unset field. + string empty_field; // String marking an empty (but set) field. + + /** + * Constructor that defines all the configuration options. + * Use if you need either ValToODesc or EntryToVal. + */ + SeparatorInfo(const string& separator, const string& set_separator, const string& unset_field, const string& empty_field); + + /** + * Constructor that leaves separators etc unset to dummy + * values. Useful if you use only methods that don't need any + * of them, like StringToAddr, etc. + */ + SeparatorInfo(); + }; + + /** + * Constructor. + * + * @param t The thread that uses this class instance. The class uses + * some of the thread's methods, e.g., for error reporting and + * internal formatting. + * + * @param info SeparatorInfo structure defining the necessary + * separators. + */ + Ascii(threading::MsgThread* t, const SeparatorInfo& info); + virtual ~Ascii(); + + virtual bool Describe(ODesc* desc, threading::Value* val, const string& name = "") const; + virtual bool Describe(ODesc* desc, int num_fields, const threading::Field* const * fields, + threading::Value** vals) const; + virtual threading::Value* ParseValue(string s, string name, TypeTag type, TypeTag subtype = TYPE_ERROR) const; + +private: + bool CheckNumberError(const char* start, const char* end) const; + + SeparatorInfo separators; +}; + +}} + +#endif /* THREADING_FORMATTERS_ASCII_H */ diff --git a/src/threading/formatters/JSON.cc b/src/threading/formatters/JSON.cc new file mode 100644 index 0000000000..7f3321ca96 --- /dev/null +++ b/src/threading/formatters/JSON.cc @@ -0,0 +1,213 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "config.h" + +#include +#include +#include + +#include "./JSON.h" + +using namespace threading::formatter; + +JSON::JSON(MsgThread* t, TimeFormat tf) : Formatter(t) + { + timestamps = tf; + } + +JSON::~JSON() + { + } + +bool JSON::Describe(ODesc* desc, int num_fields, const Field* const * fields, + Value** vals) const + { + desc->AddRaw("{"); + + for ( int i = 0; i < num_fields; i++ ) + { + const u_char* bytes = desc->Bytes(); + int len = desc->Len(); + + if ( i > 0 && len > 0 && bytes[len-1] != ',' && vals[i]->present ) + desc->AddRaw(","); + + if ( ! Describe(desc, vals[i], fields[i]->name) ) + return false; + } + + desc->AddRaw("}"); + + return true; + } + +bool JSON::Describe(ODesc* desc, Value* val, const string& name) const + { + if ( ! val->present ) + return true; + + if ( name.size() ) + { + desc->AddRaw("\"", 1); + desc->Add(name); + desc->AddRaw("\":", 2); + } + + switch ( val->type ) + { + case TYPE_BOOL: + desc->AddRaw(val->val.int_val == 0 ? "false" : "true"); + break; + + case TYPE_INT: + desc->Add(val->val.int_val); + break; + + case TYPE_COUNT: + case TYPE_COUNTER: + { + // JSON doesn't support unsigned 64bit ints. + if ( val->val.uint_val >= INT64_MAX ) + { + GetThread()->Error(GetThread()->Fmt("count value too large for JSON: %" PRIu64, val->val.uint_val)); + desc->AddRaw("null", 4); + } + else + desc->Add(val->val.uint_val); + break; + } + + case TYPE_PORT: + desc->Add(val->val.port_val.port); + break; + + case TYPE_SUBNET: + desc->AddRaw("\"", 1); + desc->Add(Render(val->val.subnet_val)); + desc->AddRaw("\"", 1); + break; + + case TYPE_ADDR: + desc->AddRaw("\"", 1); + desc->Add(Render(val->val.addr_val)); + desc->AddRaw("\"", 1); + break; + + case TYPE_DOUBLE: + case TYPE_INTERVAL: + desc->Add(val->val.double_val); + break; + + case TYPE_TIME: + { + if ( timestamps == TS_ISO8601 ) + { + char buffer[40]; + char buffer2[40]; + time_t t = time_t(val->val.double_val); + + if ( strftime(buffer, sizeof(buffer), "%Y-%m-%dT%H:%M:%S", gmtime(&t)) > 0 ) + { + double integ; + double frac = modf(val->val.double_val, &integ); + snprintf(buffer2, sizeof(buffer2), "%s.%06.0fZ", buffer, frac * 1000000); + desc->AddRaw("\"", 1); + desc->Add(buffer2); + desc->AddRaw("\"", 1); + } + + else + GetThread()->Error(GetThread()->Fmt("strftime error for JSON: %" PRIu64)); + + } + + else if ( timestamps == TS_EPOCH ) + desc->Add(val->val.double_val); + + else if ( timestamps == TS_MILLIS ) + { + // ElasticSearch uses milliseconds for timestamps and json only + // supports signed ints (uints can be too large). + uint64_t ts = (uint64_t) (val->val.double_val * 1000); + if ( ts < INT64_MAX ) + desc->Add(ts); + else + { + GetThread()->Error(GetThread()->Fmt("time value too large for JSON milliseconds: %" PRIu64, ts)); + desc->AddRaw("null", 4); + } + } + + break; + } + + case TYPE_ENUM: + case TYPE_STRING: + case TYPE_FILE: + case TYPE_FUNC: + { + desc->AddRaw("\"", 1); + + for ( int i = 0; i < val->val.string_val.length; ++i ) + { + char c = val->val.string_val.data[i]; + + // 2byte Unicode escape special characters. + if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' ) + { + static const char hex_chars[] = "0123456789abcdef"; + desc->AddRaw("\\u00", 4); + desc->AddRaw(&hex_chars[(c & 0xf0) >> 4], 1); + desc->AddRaw(&hex_chars[c & 0x0f], 1); + } + else + desc->AddRaw(&c, 1); + } + + desc->AddRaw("\"", 1); + break; + } + + case TYPE_TABLE: + { + desc->AddRaw("[", 1); + + for ( int j = 0; j < val->val.set_val.size; j++ ) + { + if ( j > 0 ) + desc->AddRaw(",", 1); + + Describe(desc, val->val.set_val.vals[j]); + } + + desc->AddRaw("]", 1); + break; + } + + case TYPE_VECTOR: + { + desc->AddRaw("[", 1); + + for ( int j = 0; j < val->val.vector_val.size; j++ ) + { + if ( j > 0 ) + desc->AddRaw(",", 1); + Describe(desc, val->val.vector_val.vals[j]); + } + + desc->AddRaw("]", 1); + break; + } + + default: + return false; + } + + return true; + } + +threading::Value* JSON::ParseValue(string s, string name, TypeTag type, TypeTag subtype) const + { + GetThread()->Error("JSON formatter does not support parsing yet."); + return NULL; + } diff --git a/src/threading/formatters/JSON.h b/src/threading/formatters/JSON.h new file mode 100644 index 0000000000..86f3472125 --- /dev/null +++ b/src/threading/formatters/JSON.h @@ -0,0 +1,36 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#ifndef THREADING_FORMATTERS_JSON_H +#define THREADING_FORMATTERS_JSON_H + +#include "../Formatter.h" + +namespace threading { namespace formatter { + +/** + * A thread-safe class for converting values into a JSON representation + * and vice versa. + */ +class JSON : public Formatter { +public: + enum TimeFormat { + TS_EPOCH, // Doubles that represents seconds from the UNIX epoch. + TS_ISO8601, // ISO 8601 defined human readable timestamp format. + TS_MILLIS // Milliseconds from the UNIX epoch. Some consumers need this (e.g., elasticsearch). + }; + + JSON(threading::MsgThread* t, TimeFormat tf); + virtual ~JSON(); + + virtual bool Describe(ODesc* desc, threading::Value* val, const string& name = "") const; + virtual bool Describe(ODesc* desc, int num_fields, const threading::Field* const * fields, + threading::Value** vals) const; + virtual threading::Value* ParseValue(string s, string name, TypeTag type, TypeTag subtype = TYPE_ERROR) const; + +private: + TimeFormat timestamps; +}; + +}} + +#endif /* THREADING_FORMATTERS_JSON_H */ diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-json-iso-timestamps/ssh.log b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-json-iso-timestamps/ssh.log new file mode 100644 index 0000000000..5673a0605a --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-json-iso-timestamps/ssh.log @@ -0,0 +1,2 @@ +{"t":"2008-07-09T16:13:30.005432Z"} +{"t":"1986-12-01T01:01:01.900000Z"} diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-json/ssh.log b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-json/ssh.log new file mode 100644 index 0000000000..180d6a4374 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.ascii-json/ssh.log @@ -0,0 +1 @@ +{"b":true,"i":-42,"e":"SSH::LOG","c":21,"p":123,"sn":"10.0.0.0/24","a":"1.2.3.4","d":3.14,"t":1215620010.54321,"iv":100.0,"s":"hurz","sc":[2,4,1,3],"ss":["CC","AA","BB"],"se":[],"vc":[10,20,30],"ve":[],"f":"SSH::foo\u000a{ \u000aif (0 < SSH::i) \u000a\u0009return (Foo);\u000aelse\u000a\u0009return (Bar);\u000a\u000a}"} diff --git a/testing/btest/scripts/base/frameworks/logging/ascii-json-iso-timestamps.bro b/testing/btest/scripts/base/frameworks/logging/ascii-json-iso-timestamps.bro new file mode 100644 index 0000000000..fa2a6f1efd --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/ascii-json-iso-timestamps.bro @@ -0,0 +1,31 @@ +# +# @TEST-EXEC: bro -b %INPUT +# @TEST-EXEC: btest-diff ssh.log +# +# Testing all possible types. + +redef LogAscii::use_json = T; +redef LogAscii::json_timestamps = JSON::TS_ISO8601; + +module SSH; + +export { + redef enum Log::ID += { LOG }; + + type Log: record { + t: time; + } &log; +} + +event bro_init() +{ + Log::create_stream(SSH::LOG, [$columns=Log]); + Log::write(SSH::LOG, [ + $t=(strptime("%Y-%m-%dT%H:%M:%SZ", "2008-07-09T16:13:30Z") + 0.00543210 secs) + ]); + Log::write(SSH::LOG, [ + $t=(strptime("%Y-%m-%dT%H:%M:%SZ", "1986-12-01T01:01:01Z") + 0.90 secs) + ]); + +} + diff --git a/testing/btest/scripts/base/frameworks/logging/ascii-json.bro b/testing/btest/scripts/base/frameworks/logging/ascii-json.bro new file mode 100644 index 0000000000..2b6055930f --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/ascii-json.bro @@ -0,0 +1,70 @@ +# +# @TEST-EXEC: bro -b %INPUT +# @TEST-EXEC: btest-diff ssh.log +# +# Testing all possible types. + +redef LogAscii::use_json = T; + +module SSH; + +export { + redef enum Log::ID += { LOG }; + + type Log: record { + b: bool; + i: int; + e: Log::ID; + c: count; + p: port; + sn: subnet; + a: addr; + d: double; + t: time; + iv: interval; + s: string; + sc: set[count]; + ss: set[string]; + se: set[string]; + vc: vector of count; + ve: vector of string; + f: function(i: count) : string; + } &log; +} + +function foo(i : count) : string + { + if ( i > 0 ) + return "Foo"; + else + return "Bar"; + } + +event bro_init() +{ + Log::create_stream(SSH::LOG, [$columns=Log]); + + local empty_set: set[string]; + local empty_vector: vector of string; + + Log::write(SSH::LOG, [ + $b=T, + $i=-42, + $e=SSH::LOG, + $c=21, + $p=123/tcp, + $sn=10.0.0.1/24, + $a=1.2.3.4, + $d=3.14, + $t=(strptime("%Y-%m-%dT%H:%M:%SZ", "2008-07-09T16:13:30Z") + 0.543210 secs), + $iv=100secs, + $s="hurz", + $sc=set(1,2,3,4), + $ss=set("AA", "BB", "CC"), + $se=empty_set, + $vc=vector(10, 20, 30), + $ve=empty_vector, + $f=foo + ]); +} + diff --git a/testing/external/scripts/testing-setup.bro b/testing/external/scripts/testing-setup.bro index 4b4d110864..5ef35ff3b2 100644 --- a/testing/external/scripts/testing-setup.bro +++ b/testing/external/scripts/testing-setup.bro @@ -10,3 +10,9 @@ #log forwarding to ES. redef LogElasticSearch::server_host = ""; @endif + +@ifdef ( LogAscii::use_json ) + # Don't start logging everything as JSON. + # (json-logs.bro activates this). + redef LogAscii::use_json = F; +@endif