Merge remote-tracking branch 'origin/topic/bernhard/input-logging-commmon-functions'

* origin/topic/bernhard/input-logging-commmon-functions:
  add the last of Robins suggestions (separate info-struct for constructors).
  port memory leak fix from master
  harmonize function naming
  move AsciiInputOutput over to threading
  and thinking about it, ascii-io doesn't need the separator
  change constructors
  and factor stuff out the input framework too.
  factor out ascii input/output.
  std::string accessors to escape_sequence functionality
  intermediate commit - it has been over a month since I touched this...

I cleaned up the AsciiInputOutput class somewhat, including renaming
it to AsciiFormatter, renaming some of its methods, and turning the
static methods into members for consistency.

Closes #929.
This commit is contained in:
Robin Sommer 2013-01-23 16:17:29 -08:00
commit 762c034ec2
25 changed files with 807 additions and 597 deletions

22
CHANGES
View file

@ -1,4 +1,26 @@
2.1-302 | 2013-01-23 16:17:29 -0800
* Refactoring ASCII formatting/parsing from loggers/readers into a
separate AsciiFormatter class. (Bernhard Amann)
* Fix uninitialized locals in event/hook handlers from having a
value. Addresses #932. (Jon Siwek)
* Add a null value check in CompositeHash::ComputeHash. Addresses
#930. (Jon Siwek)
* Change reporter messages to more reliably print to stderr.
Addressed #930 (and revisits #836). (Jon Siwek)
* Changing test=suite's btest call to use "-j" instead of "-j 5".
(Robin Sommer)
* Require "case" blocks to end with either "break", "return", or a
new "fallthrough" statement that passes control on to the
subsequent case. This gives us the best mix of safety,
readability, and flexibility. Addresses #754. (Jon Siwek)
2.1-279 | 2013-01-18 17:18:22 -0800
* Revert "Trick for parallelizing input framework unit tests." The

View file

@ -1 +1 @@
2.1-279
2.1-302

View file

@ -11,6 +11,24 @@ export {
## The default reader mode used. Defaults to `MANUAL`.
const default_mode = MANUAL &redef;
## Separator between fields.
## Please note that the separator has to be exactly one character long.
## Can be overwritten by individual writers.
const separator = "\t" &redef;
## Separator between set elements.
## Please note that the separator has to be exactly one character long.
## Can be overwritten by individual writers.
const set_separator = "," &redef;
## String to use for empty fields.
## Can be overwritten by individual writers.
const empty_field = "(empty)" &redef;
## String to use for an unset &optional field.
## Can be overwritten by individual writers.
const unset_field = "-" &redef;
## Flag that controls if the input framework accepts records
## that contain types that are not supported (at the moment
## file and function). If true, the input framework will
@ -115,7 +133,7 @@ export {
global add_event: function(description: Input::EventDescription) : bool;
## Remove a input stream. Returns true on success and false if the named stream was
## not found.
## not found.
##
## id: string value identifying the stream to be removed
global remove: function(id: string) : bool;

View file

@ -7,15 +7,15 @@ module InputAscii;
export {
## Separator between fields.
## Please note that the separator has to be exactly one character long
const separator = "\t" &redef;
const separator = Input::separator &redef;
## Separator between set elements.
## Please note that the separator has to be exactly one character long
const set_separator = "," &redef;
const set_separator = Input::set_separator &redef;
## String to use for empty fields.
const empty_field = "(empty)" &redef;
const empty_field = Input::empty_field &redef;
## String to use for an unset &optional field.
const unset_field = "-" &redef;
const unset_field = Input::unset_field &redef;
}

View file

@ -17,6 +17,23 @@ export {
## anything else.
const default_writer = WRITER_ASCII &redef;
## Default separator between fields for logwriters.
## Can be overwritten by individual writers.
const separator = "\t" &redef;
## Separator between set elements.
## Can be overwritten by individual writers.
const set_separator = "," &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
## Can be overwritten by individual writers.
const empty_field = "(empty)" &redef;
## String to use for an unset &optional field.
## Can be overwritten by individual writers.
const unset_field = "-" &redef;
## Type defining the content of a logging stream.
type Stream: record {
## A record type defining the log's columns.

View file

@ -28,17 +28,17 @@ export {
const meta_prefix = "#" &redef;
## Separator between fields.
const separator = "\t" &redef;
const separator = Log::separator &redef;
## Separator between set elements.
const set_separator = "," &redef;
const set_separator = Log::set_separator &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
const empty_field = "(empty)" &redef;
const empty_field = Log::empty_field &redef;
## String to use for an unset &optional field.
const unset_field = "-" &redef;
const unset_field = Log::unset_field &redef;
}
# Default function to postprocess a rotated ASCII log file. It moves the rotated

View file

@ -426,6 +426,7 @@ set(bro_SRCS
strsep.c
modp_numtoa.c
threading/AsciiFormatter.cc
threading/BasicThread.cc
threading/Manager.cc
threading/MsgThread.cc

View file

@ -57,9 +57,13 @@ public:
void AddEscapeSequence(const char* s) { escape_sequences.push_back(s); }
void AddEscapeSequence(const char* s, size_t n)
{ escape_sequences.push_back(string(s, n)); }
void AddEscapeSequence(const string & s)
{ escape_sequences.push_back(s); }
void RemoveEscapeSequence(const char* s) { escape_sequences.remove(s); }
void RemoveEscapeSequence(const char* s, size_t n)
{ escape_sequences.remove(string(s, n)); }
void RemoveEscapeSequence(const string & s)
{ escape_sequences.remove(s); }
void PushIndent();
void PopIndent();
@ -114,6 +118,7 @@ public:
// Bypasses the escaping enabled via SetEscape().
void AddRaw(const char* s, int len) { AddBytesRaw(s, len); }
void AddRaw(const string &s) { AddBytesRaw(s.data(), s.size()); }
// Returns the description as a string.
const char* Description() const { return (const char*) base; }

View file

@ -281,50 +281,4 @@ bool ReaderBackend::OnHeartbeat(double network_time, double current_time)
return DoHeartbeat(network_time, current_time);
}
TransportProto ReaderBackend::StringToProto(const string &proto)
{
if ( proto == "unknown" )
return TRANSPORT_UNKNOWN;
else if ( proto == "tcp" )
return TRANSPORT_TCP;
else if ( proto == "udp" )
return TRANSPORT_UDP;
else if ( proto == "icmp" )
return TRANSPORT_ICMP;
Error(Fmt("Tried to parse invalid/unknown protocol: %s", proto.c_str()));
return TRANSPORT_UNKNOWN;
}
// More or less verbose copy from IPAddr.cc -- which uses reporter.
Value::addr_t ReaderBackend::StringToAddr(const string &s)
{
Value::addr_t val;
if ( s.find(':') == std::string::npos ) // IPv4.
{
val.family = IPv4;
if ( inet_aton(s.c_str(), &(val.in.in4)) <= 0 )
{
Error(Fmt("Bad address: %s", s.c_str()));
memset(&val.in.in4.s_addr, 0, sizeof(val.in.in4.s_addr));
}
}
else
{
val.family = IPv6;
if ( inet_pton(AF_INET6, s.c_str(), val.in.in6.s6_addr) <=0 )
{
Error(Fmt("Bad address: %s", s.c_str()));
memset(val.in.in6.s6_addr, 0, sizeof(val.in.in6.s6_addr));
}
}
return val;
}
}

View file

@ -315,21 +315,6 @@ protected:
*/
void EndCurrentSend();
/**
* Convert a string into a TransportProto. This is just a utility
* function for Readers.
*
* @param proto the transport protocol
*/
TransportProto StringToProto(const string &proto);
/**
* Convert a string into a Value::addr_t. This is just a utility
* function for Readers.
*
* @param addr containing an ipv4 or ipv6 address
*/
threading::Value::addr_t StringToAddr(const string &addr);
private:
// Frontend that instantiated us. This object must not be accessed

View file

@ -67,11 +67,14 @@ Ascii::Ascii(ReaderFrontend *frontend) : ReaderBackend(frontend)
unset_field.assign( (const char*) BifConst::InputAscii::unset_field->Bytes(),
BifConst::InputAscii::unset_field->Len());
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field));
}
Ascii::~Ascii()
{
DoClose();
delete ascii;
}
void Ascii::DoClose()
@ -210,246 +213,6 @@ bool Ascii::GetLine(string& str)
return false;
}
bool Ascii::CheckNumberError(const string& s, const char * end)
{
// Do this check first, before executing s.c_str() or similar.
// otherwise the value to which *end is pointing at the moment might
// be gone ...
bool endnotnull = (*end != '\0');
if ( s.length() == 0 )
{
Error("Got empty string for number field");
return true;
}
if ( end == s.c_str() ) {
Error(Fmt("String '%s' contained no parseable number", s.c_str()));
return true;
}
if ( endnotnull )
Warning(Fmt("Number '%s' contained non-numeric trailing characters. Ignored trailing characters '%s'", s.c_str(), end));
if ( errno == EINVAL )
{
Error(Fmt("String '%s' could not be converted to a number", s.c_str()));
return true;
}
else if ( errno == ERANGE )
{
Error(Fmt("Number '%s' out of supported range.", s.c_str()));
return true;
}
return false;
}
Value* Ascii::EntryToVal(string s, FieldMapping field)
{
if ( s.compare(unset_field) == 0 ) // field is not set...
return new Value(field.type, false);
Value* val = new Value(field.type, true);
char* end = 0;
errno = 0;
switch ( field.type ) {
case TYPE_ENUM:
case TYPE_STRING:
s = get_unescaped_string(s);
val->val.string_val.length = s.size();
val->val.string_val.data = copy_string(s.c_str());
break;
case TYPE_BOOL:
if ( s == "T" )
val->val.int_val = 1;
else if ( s == "F" )
val->val.int_val = 0;
else
{
Error(Fmt("Field: %s Invalid value for boolean: %s",
field.name.c_str(), s.c_str()));
goto parse_error;
}
break;
case TYPE_INT:
val->val.int_val = strtoll(s.c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
break;
case TYPE_DOUBLE:
case TYPE_TIME:
case TYPE_INTERVAL:
val->val.double_val = strtod(s.c_str(), &end);
if ( CheckNumberError(s, end) )
goto parse_error;
break;
case TYPE_COUNT:
case TYPE_COUNTER:
val->val.uint_val = strtoull(s.c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
break;
case TYPE_PORT:
val->val.port_val.port = strtoull(s.c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
val->val.port_val.proto = TRANSPORT_UNKNOWN;
break;
case TYPE_SUBNET:
{
s = get_unescaped_string(s);
size_t pos = s.find("/");
if ( pos == s.npos )
{
Error(Fmt("Invalid value for subnet: %s", s.c_str()));
goto parse_error;
}
uint8_t width = (uint8_t) strtol(s.substr(pos+1).c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
string addr = s.substr(0, pos);
val->val.subnet_val.prefix = StringToAddr(addr);
val->val.subnet_val.length = width;
break;
}
case TYPE_ADDR:
s = get_unescaped_string(s);
val->val.addr_val = StringToAddr(s);
break;
case TYPE_TABLE:
case TYPE_VECTOR:
// First - common initialization
// Then - initialization for table.
// Then - initialization for vector.
// Then - common stuff
{
// how many entries do we have...
unsigned int length = 1;
for ( unsigned int i = 0; i < s.size(); i++ )
{
if ( s[i] == set_separator[0] )
length++;
}
unsigned int pos = 0;
bool error = false;
if ( s.compare(empty_field) == 0 )
length = 0;
Value** lvals = new Value* [length];
if ( field.type == TYPE_TABLE )
{
val->val.set_val.vals = lvals;
val->val.set_val.size = length;
}
else if ( field.type == TYPE_VECTOR )
{
val->val.vector_val.vals = lvals;
val->val.vector_val.size = length;
}
else
assert(false);
if ( length == 0 )
break; //empty
istringstream splitstream(s);
while ( splitstream )
{
string element;
if ( ! getline(splitstream, element, set_separator[0]) )
break;
if ( pos >= length )
{
Error(Fmt("Internal error while parsing set. pos %d >= length %d."
" Element: %s", pos, length, element.c_str()));
error = true;
break;
}
Value* newval = EntryToVal(element, field.subType());
if ( newval == 0 )
{
Error("Error while reading set or vector");
error = true;
break;
}
lvals[pos] = newval;
pos++;
}
// Test if the string ends with a set_separator... or if the
// complete string is empty. In either of these cases we have
// to push an empty val on top of it.
if ( ! error && (s.empty() || *s.rbegin() == set_separator[0]) )
{
lvals[pos] = EntryToVal("", field.subType());
if ( lvals[pos] == 0 )
{
Error("Error while trying to add empty set element");
goto parse_error;
}
pos++;
}
if ( error ) {
// We had an error while reading a set or a vector.
// Hence we have to clean up the values that have
// been read so far
for ( unsigned int i = 0; i < pos; i++ )
delete lvals[i];
goto parse_error;
}
if ( pos != length )
{
Error(Fmt("Internal error while parsing set: did not find all elements: %s", s.c_str()));
goto parse_error;
}
break;
}
default:
Error(Fmt("unsupported field format %d for %s", field.type,
field.name.c_str()));
goto parse_error;
}
return val;
parse_error:
delete val;
return 0;
}
// read the entire file and send appropriate thingies back to InputMgr
bool Ascii::DoUpdate()
{
@ -560,7 +323,8 @@ bool Ascii::DoUpdate()
return false;
}
Value* val = EntryToVal(stringfields[(*fit).position], *fit);
Value* val = ascii->ParseValue(stringfields[(*fit).position], (*fit).name, (*fit).type, (*fit).subtype);
if ( val == 0 )
{
Error(Fmt("Could not convert line '%s' to Val. Ignoring line.", line.c_str()));
@ -574,7 +338,7 @@ bool Ascii::DoUpdate()
assert(val->type == TYPE_PORT );
// Error(Fmt("Got type %d != PORT with secondary position!", val->type));
val->val.port_val.proto = StringToProto(stringfields[(*fit).secondary_position]);
val->val.port_val.proto = ascii->ParseProto(stringfields[(*fit).secondary_position]);
}
fields[fpos] = val;

View file

@ -7,6 +7,7 @@
#include <vector>
#include "../ReaderBackend.h"
#include "threading/AsciiFormatter.h"
namespace input { namespace reader {
@ -47,8 +48,6 @@ private:
bool ReadHeader(bool useCached);
bool GetLine(string& str);
threading::Value* EntryToVal(string s, FieldMapping type);
bool CheckNumberError(const string& s, const char * end);
ifstream* file;
time_t mtime;
@ -64,6 +63,8 @@ private:
string set_separator;
string empty_field;
string unset_field;
AsciiFormatter* ascii;
};

View file

@ -25,11 +25,15 @@ Benchmark::Benchmark(ReaderFrontend *frontend) : ReaderBackend(frontend)
stopspreadat = int(BifConst::InputBenchmark::stopspreadat);
timedspread = double(BifConst::InputBenchmark::timedspread);
heartbeat_interval = double(BifConst::Threading::heartbeat_interval);
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo());
}
Benchmark::~Benchmark()
{
DoClose();
delete ascii;
}
void Benchmark::DoClose()
@ -162,13 +166,13 @@ threading::Value* Benchmark::EntryToVal(TypeTag type, TypeTag subtype)
case TYPE_SUBNET:
{
val->val.subnet_val.prefix = StringToAddr("192.168.17.1");
val->val.subnet_val.prefix = ascii->ParseAddr("192.168.17.1");
val->val.subnet_val.length = 16;
}
break;
case TYPE_ADDR:
val->val.addr_val = StringToAddr("192.168.17.1");
val->val.addr_val = ascii->ParseAddr("192.168.17.1");
break;
case TYPE_TABLE:

View file

@ -4,6 +4,7 @@
#define INPUT_READERS_BENCHMARK_H
#include "../ReaderBackend.h"
#include "threading/AsciiFormatter.h"
namespace input { namespace reader {
@ -38,6 +39,8 @@ private:
double heartbeatstarttime;
double timedspread;
double heartbeat_interval;
AsciiFormatter* ascii;
};

View file

@ -1,7 +1,6 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "util.h"
#include "bro_inet_ntop.h"
#include "threading/SerialTypes.h"
#include "Manager.h"
@ -328,46 +327,3 @@ bool WriterBackend::OnHeartbeat(double network_time, double current_time)
SendOut(new FlushWriteBufferMessage(frontend));
return DoHeartbeat(network_time, current_time);
}
string WriterBackend::Render(const threading::Value::addr_t& addr) const
{
if ( addr.family == IPv4 )
{
char s[INET_ADDRSTRLEN];
if ( ! bro_inet_ntop(AF_INET, &addr.in.in4, s, INET_ADDRSTRLEN) )
return "<bad IPv4 address conversion>";
else
return s;
}
else
{
char s[INET6_ADDRSTRLEN];
if ( ! bro_inet_ntop(AF_INET6, &addr.in.in6, s, INET6_ADDRSTRLEN) )
return "<bad IPv6 address conversion>";
else
return s;
}
}
string WriterBackend::Render(const threading::Value::subnet_t& subnet) const
{
char l[16];
if ( subnet.prefix.family == IPv4 )
modp_uitoa10(subnet.length - 96, l);
else
modp_uitoa10(subnet.length, l);
string s = Render(subnet.prefix) + "/" + l;
return s;
}
string WriterBackend::Render(double d) const
{
char buf[256];
modp_dtoa(d, buf, 6);
return buf;
}

View file

@ -256,30 +256,6 @@ public:
*/
bool FinishedRotation();
/** Helper method to render an IP address as a string.
*
* @param addr The address.
*
* @return An ASCII representation of the address.
*/
string Render(const threading::Value::addr_t& addr) const;
/** Helper method to render an subnet value as a string.
*
* @param addr The address.
*
* @return An ASCII representation of the address.
*/
string Render(const threading::Value::subnet_t& subnet) const;
/** Helper method to render a double in Bro's standard precision.
*
* @param d The double.
*
* @return An ASCII representation of the double.
*/
string Render(double d) const;
// Overridden from MsgThread.
virtual bool OnHeartbeat(double network_time, double current_time);
virtual bool OnFinish(double network_time);

View file

@ -24,33 +24,35 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend)
output_to_stdout = BifConst::LogAscii::output_to_stdout;
include_meta = BifConst::LogAscii::include_meta;
separator_len = BifConst::LogAscii::separator->Len();
separator = new char[separator_len];
memcpy(separator, BifConst::LogAscii::separator->Bytes(),
separator_len);
separator.assign(
(const char*) BifConst::LogAscii::separator->Bytes(),
BifConst::LogAscii::separator->Len()
);
set_separator_len = BifConst::LogAscii::set_separator->Len();
set_separator = new char[set_separator_len];
memcpy(set_separator, BifConst::LogAscii::set_separator->Bytes(),
set_separator_len);
set_separator.assign(
(const char*) BifConst::LogAscii::set_separator->Bytes(),
BifConst::LogAscii::set_separator->Len()
);
empty_field_len = BifConst::LogAscii::empty_field->Len();
empty_field = new char[empty_field_len];
memcpy(empty_field, BifConst::LogAscii::empty_field->Bytes(),
empty_field_len);
empty_field.assign(
(const char*) BifConst::LogAscii::empty_field->Bytes(),
BifConst::LogAscii::empty_field->Len()
);
unset_field_len = BifConst::LogAscii::unset_field->Len();
unset_field = new char[unset_field_len];
memcpy(unset_field, BifConst::LogAscii::unset_field->Bytes(),
unset_field_len);
unset_field.assign(
(const char*) BifConst::LogAscii::unset_field->Bytes(),
BifConst::LogAscii::unset_field->Len()
);
meta_prefix_len = BifConst::LogAscii::meta_prefix->Len();
meta_prefix = new char[meta_prefix_len];
memcpy(meta_prefix, BifConst::LogAscii::meta_prefix->Bytes(),
meta_prefix_len);
meta_prefix.assign(
(const char*) BifConst::LogAscii::meta_prefix->Bytes(),
BifConst::LogAscii::meta_prefix->Len()
);
desc.EnableEscaping();
desc.AddEscapeSequence(separator, separator_len);
desc.AddEscapeSequence(separator);
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo(set_separator, unset_field, empty_field));
}
Ascii::~Ascii()
@ -61,17 +63,12 @@ Ascii::~Ascii()
abort();
}
delete [] separator;
delete [] set_separator;
delete [] empty_field;
delete [] unset_field;
delete [] meta_prefix;
delete ascii;
}
bool Ascii::WriteHeaderField(const string& key, const string& val)
{
string str = string(meta_prefix, meta_prefix_len) +
key + string(separator, separator_len) + val + "\n";
string str = meta_prefix + key + separator + val + "\n";
return safe_write(fd, str.c_str(), str.length());
}
@ -136,8 +133,8 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
{
if ( i > 0 )
{
names += string(separator, separator_len);
types += string(separator, separator_len);
names += separator;
types += separator;
}
names += string(fields[i]->name);
@ -154,20 +151,17 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
return true;
}
string str = string(meta_prefix, meta_prefix_len)
string str = meta_prefix
+ "separator " // Always use space as separator here.
+ get_escaped_string(string(separator, separator_len), false)
+ get_escaped_string(separator, false)
+ "\n";
if ( ! safe_write(fd, str.c_str(), str.length()) )
goto write_error;
if ( ! (WriteHeaderField("set_separator", get_escaped_string(
string(set_separator, set_separator_len), false)) &&
WriteHeaderField("empty_field", get_escaped_string(
string(empty_field, empty_field_len), false)) &&
WriteHeaderField("unset_field", get_escaped_string(
string(unset_field, unset_field_len), false)) &&
if ( ! (WriteHeaderField("set_separator", get_escaped_string(set_separator, false)) &&
WriteHeaderField("empty_field", get_escaped_string(empty_field, false)) &&
WriteHeaderField("unset_field", get_escaped_string(unset_field, false)) &&
WriteHeaderField("path", get_escaped_string(path, false)) &&
WriteHeaderField("open", Timestamp(0))) )
goto write_error;
@ -176,7 +170,7 @@ bool Ascii::DoInit(const WriterInfo& info, int num_fields, const Field* const *
&& WriteHeaderField("types", types)) )
goto write_error;
}
return true;
write_error:
@ -205,151 +199,6 @@ bool Ascii::DoFinish(double network_time)
return true;
}
bool Ascii::DoWriteOne(ODesc* desc, Value* val, const Field* field)
{
if ( ! val->present )
{
desc->AddN(unset_field, unset_field_len);
return true;
}
switch ( val->type ) {
case TYPE_BOOL:
desc->Add(val->val.int_val ? "T" : "F");
break;
case TYPE_INT:
desc->Add(val->val.int_val);
break;
case TYPE_COUNT:
case TYPE_COUNTER:
desc->Add(val->val.uint_val);
break;
case TYPE_PORT:
desc->Add(val->val.port_val.port);
break;
case TYPE_SUBNET:
desc->Add(Render(val->val.subnet_val));
break;
case TYPE_ADDR:
desc->Add(Render(val->val.addr_val));
break;
case TYPE_DOUBLE:
// Rendering via Add() truncates trailing 0s after the
// decimal point. The difference with TIME/INTERVAL is mainly
// to keep the log format consistent.
desc->Add(val->val.double_val);
break;
case TYPE_INTERVAL:
case TYPE_TIME:
// Rendering via Render() keeps trailing 0s after the decimal
// point. The difference with DOUBLEis mainly to keep the log
// format consistent.
desc->Add(Render(val->val.double_val));
break;
case TYPE_ENUM:
case TYPE_STRING:
case TYPE_FILE:
case TYPE_FUNC:
{
int size = val->val.string_val.length;
const char* data = val->val.string_val.data;
if ( ! size )
{
desc->AddN(empty_field, empty_field_len);
break;
}
if ( size == unset_field_len && memcmp(data, unset_field, size) == 0 )
{
// The value we'd write out would match exactly the
// place-holder we use for unset optional fields. We
// escape the first character so that the output
// won't be ambigious.
static const char hex_chars[] = "0123456789abcdef";
char hex[6] = "\\x00";
hex[2] = hex_chars[((*data) & 0xf0) >> 4];
hex[3] = hex_chars[(*data) & 0x0f];
desc->AddRaw(hex, 4);
++data;
--size;
}
if ( size )
desc->AddN(data, size);
break;
}
case TYPE_TABLE:
{
if ( ! val->val.set_val.size )
{
desc->AddN(empty_field, empty_field_len);
break;
}
desc->AddEscapeSequence(set_separator, set_separator_len);
for ( int j = 0; j < val->val.set_val.size; j++ )
{
if ( j > 0 )
desc->AddRaw(set_separator, set_separator_len);
if ( ! DoWriteOne(desc, val->val.set_val.vals[j], field) )
{
desc->RemoveEscapeSequence(set_separator, set_separator_len);
return false;
}
}
desc->RemoveEscapeSequence(set_separator, set_separator_len);
break;
}
case TYPE_VECTOR:
{
if ( ! val->val.vector_val.size )
{
desc->AddN(empty_field, empty_field_len);
break;
}
desc->AddEscapeSequence(set_separator, set_separator_len);
for ( int j = 0; j < val->val.vector_val.size; j++ )
{
if ( j > 0 )
desc->AddRaw(set_separator, set_separator_len);
if ( ! DoWriteOne(desc, val->val.vector_val.vals[j], field) )
{
desc->RemoveEscapeSequence(set_separator, set_separator_len);
return false;
}
}
desc->RemoveEscapeSequence(set_separator, set_separator_len);
break;
}
default:
Error(Fmt("unsupported field format %d for %s", val->type, field->name));
return false;
}
return true;
}
bool Ascii::DoWrite(int num_fields, const Field* const * fields,
Value** vals)
{
@ -361,9 +210,9 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
for ( int i = 0; i < num_fields; i++ )
{
if ( i > 0 )
desc.AddRaw(separator, separator_len);
desc.AddRaw(separator);
if ( ! DoWriteOne(&desc, vals[i], fields[i]) )
if ( ! ascii->Describe(&desc, vals[i], fields[i]->name) )
return false;
}
@ -372,7 +221,7 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
const char* bytes = (const char*)desc.Bytes();
int len = desc.Len();
if ( strncmp(bytes, meta_prefix, meta_prefix_len) == 0 )
if ( strncmp(bytes, meta_prefix.data(), meta_prefix.size()) == 0 )
{
// It would so escape the first character.
char buf[16];

View file

@ -6,6 +6,7 @@
#define LOGGING_WRITER_ASCII_H
#include "../WriterBackend.h"
#include "threading/AsciiFormatter.h"
namespace logging { namespace writer {
@ -32,7 +33,6 @@ protected:
private:
bool IsSpecial(string path) { return path.find("/dev/") == 0; }
bool DoWriteOne(ODesc* desc, threading::Value* val, const threading::Field* field);
bool WriteHeaderField(const string& key, const string& value);
void CloseFile(double t);
string Timestamp(double t); // Uses current time if t is zero.
@ -47,20 +47,13 @@ private:
bool include_meta;
bool tsv;
char* separator;
int separator_len;
string separator;
string set_separator;
string empty_field;
string unset_field;
string meta_prefix;
char* set_separator;
int set_separator_len;
char* empty_field;
int empty_field_len;
char* unset_field;
int unset_field_len;
char* meta_prefix;
int meta_prefix_len;
AsciiFormatter* ascii;
};
}

View file

@ -46,10 +46,10 @@ std::string DataSeries::LogValueToString(threading::Value *val)
}
case TYPE_SUBNET:
return Render(val->val.subnet_val);
return ascii->Render(val->val.subnet_val);
case TYPE_ADDR:
return Render(val->val.addr_val);
return ascii->Render(val->val.addr_val);
// Note: These two cases are relatively special. We need to convert
// these values into their integer equivalents to maximize precision.
@ -69,10 +69,10 @@ std::string DataSeries::LogValueToString(threading::Value *val)
return ostr.str();
}
else
return Render(val->val.double_val);
return ascii->Render(val->val.double_val);
case TYPE_DOUBLE:
return Render(val->val.double_val);
return ascii->Render(val->val.double_val);
case TYPE_ENUM:
case TYPE_STRING:
@ -231,11 +231,14 @@ DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend)
ds_num_threads = BifConst::LogDataSeries::num_threads;
ds_use_integer_for_time = BifConst::LogDataSeries::use_integer_for_time;
ds_set_separator = ",";
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo());
}
DataSeries::~DataSeries()
{
}
{
delete ascii;
}
bool DataSeries::OpenLog(string path)
{

View file

@ -12,6 +12,7 @@
#include <DataSeries/GeneralField.hpp>
#include "../WriterBackend.h"
#include "threading/AsciiFormatter.h"
namespace logging { namespace writer {
@ -116,6 +117,8 @@ private:
bool ds_dump_schema;
bool ds_use_integer_for_time;
string ds_set_separator;
AsciiFormatter* ascii;
};
}

View file

@ -16,6 +16,7 @@
#include "BroString.h"
#include "NetVar.h"
#include "threading/SerialTypes.h"
#include "threading/AsciiFormatter.h"
#include <curl/curl.h>
#include <curl/easy.h>
@ -51,11 +52,14 @@ ElasticSearch::ElasticSearch(WriterFrontend* frontend) : WriterBackend(frontend)
transfer_timeout = static_cast<long>(BifConst::LogElasticSearch::transfer_timeout);
curl_handle = HTTPSetup();
ascii = new AsciiFormatter(this, AsciiFormatter::SeparatorInfo());
}
ElasticSearch::~ElasticSearch()
{
delete [] cluster_name;
delete ascii;
}
bool ElasticSearch::DoInit(const WriterInfo& info, int num_fields, const threading::Field* const* fields)
@ -124,13 +128,13 @@ bool ElasticSearch::AddValueToBuffer(ODesc* b, Value* val)
case TYPE_SUBNET:
b->AddRaw("\"", 1);
b->Add(Render(val->val.subnet_val));
b->Add(ascii->Render(val->val.subnet_val));
b->AddRaw("\"", 1);
break;
case TYPE_ADDR:
b->AddRaw("\"", 1);
b->Add(Render(val->val.addr_val));
b->Add(ascii->Render(val->val.addr_val));
b->AddRaw("\"", 1);
break;

View file

@ -72,6 +72,8 @@ private:
bool failing;
uint64 batch_size;
AsciiFormatter* ascii;
};
}

View file

@ -0,0 +1,508 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "config.h"
#include <sstream>
#include <errno.h>
#include "AsciiFormatter.h"
#include "bro_inet_ntop.h"
AsciiFormatter::SeparatorInfo::SeparatorInfo()
{
this->set_separator = "SHOULD_NOT_BE_USED";
this->unset_field = "SHOULD_NOT_BE_USED";
this->empty_field = "SHOULD_NOT_BE_USED";
}
AsciiFormatter::SeparatorInfo::SeparatorInfo(const string & set_separator,
const string & unset_field, const string & empty_field)
{
this->set_separator = set_separator;
this->unset_field = unset_field;
this->empty_field = empty_field;
}
AsciiFormatter::AsciiFormatter(threading::MsgThread* t, const SeparatorInfo info)
{
thread = t;
this->separators = info;
}
AsciiFormatter::~AsciiFormatter()
{
}
bool AsciiFormatter::Describe(ODesc* desc, threading::Value* val, const string& name) const
{
if ( ! val->present )
{
desc->Add(separators.unset_field);
return true;
}
switch ( val->type ) {
case TYPE_BOOL:
desc->Add(val->val.int_val ? "T" : "F");
break;
case TYPE_INT:
desc->Add(val->val.int_val);
break;
case TYPE_COUNT:
case TYPE_COUNTER:
desc->Add(val->val.uint_val);
break;
case TYPE_PORT:
desc->Add(val->val.port_val.port);
break;
case TYPE_SUBNET:
desc->Add(Render(val->val.subnet_val));
break;
case TYPE_ADDR:
desc->Add(Render(val->val.addr_val));
break;
case TYPE_DOUBLE:
// Rendering via Add() truncates trailing 0s after the
// decimal point. The difference with TIME/INTERVAL is mainly
// to keep the log format consistent.
desc->Add(val->val.double_val);
break;
case TYPE_INTERVAL:
case TYPE_TIME:
// Rendering via Render() keeps trailing 0s after the decimal
// point. The difference with DOUBLE is mainly to keep the
// log format consistent.
desc->Add(Render(val->val.double_val));
break;
case TYPE_ENUM:
case TYPE_STRING:
case TYPE_FILE:
case TYPE_FUNC:
{
int size = val->val.string_val.length;
const char* data = val->val.string_val.data;
if ( ! size )
{
desc->Add(separators.empty_field);
break;
}
if ( size == (int)separators.unset_field.size() && memcmp(data, separators.unset_field.data(), size) == 0 )
{
// The value we'd write out would match exactly the
// place-holder we use for unset optional fields. We
// escape the first character so that the output
// won't be ambigious.
static const char hex_chars[] = "0123456789abcdef";
char hex[6] = "\\x00";
hex[2] = hex_chars[((*data) & 0xf0) >> 4];
hex[3] = hex_chars[(*data) & 0x0f];
desc->AddRaw(hex, 4);
++data;
--size;
}
if ( size )
desc->AddN(data, size);
break;
}
case TYPE_TABLE:
{
if ( ! val->val.set_val.size )
{
desc->Add(separators.empty_field);
break;
}
desc->AddEscapeSequence(separators.set_separator);
for ( int j = 0; j < val->val.set_val.size; j++ )
{
if ( j > 0 )
desc->AddRaw(separators.set_separator);
if ( ! Describe(desc, val->val.set_val.vals[j], name) )
{
desc->RemoveEscapeSequence(separators.set_separator);
return false;
}
}
desc->RemoveEscapeSequence(separators.set_separator);
break;
}
case TYPE_VECTOR:
{
if ( ! val->val.vector_val.size )
{
desc->Add(separators.empty_field);
break;
}
desc->AddEscapeSequence(separators.set_separator);
for ( int j = 0; j < val->val.vector_val.size; j++ )
{
if ( j > 0 )
desc->AddRaw(separators.set_separator);
if ( ! Describe(desc, val->val.vector_val.vals[j], name) )
{
desc->RemoveEscapeSequence(separators.set_separator);
return false;
}
}
desc->RemoveEscapeSequence(separators.set_separator);
break;
}
default:
thread->Error(thread->Fmt("unsupported field format %d for %s", val->type, name.c_str()));
return false;
}
return true;
}
threading::Value* AsciiFormatter::ParseValue(string s, string name, TypeTag type, TypeTag subtype) const
{
if ( s.compare(separators.unset_field) == 0 ) // field is not set...
return new threading::Value(type, false);
threading::Value* val = new threading::Value(type, true);
char* end = 0;
errno = 0;
switch ( type ) {
case TYPE_ENUM:
case TYPE_STRING:
s = get_unescaped_string(s);
val->val.string_val.length = s.size();
val->val.string_val.data = copy_string(s.c_str());
break;
case TYPE_BOOL:
if ( s == "T" )
val->val.int_val = 1;
else if ( s == "F" )
val->val.int_val = 0;
else
{
thread->Error(thread->Fmt("Field: %s Invalid value for boolean: %s",
name.c_str(), s.c_str()));
goto parse_error;
}
break;
case TYPE_INT:
val->val.int_val = strtoll(s.c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
break;
case TYPE_DOUBLE:
case TYPE_TIME:
case TYPE_INTERVAL:
val->val.double_val = strtod(s.c_str(), &end);
if ( CheckNumberError(s, end) )
goto parse_error;
break;
case TYPE_COUNT:
case TYPE_COUNTER:
val->val.uint_val = strtoull(s.c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
break;
case TYPE_PORT:
val->val.port_val.port = strtoull(s.c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
val->val.port_val.proto = TRANSPORT_UNKNOWN;
break;
case TYPE_SUBNET:
{
s = get_unescaped_string(s);
size_t pos = s.find("/");
if ( pos == s.npos )
{
thread->Error(thread->Fmt("Invalid value for subnet: %s", s.c_str()));
goto parse_error;
}
uint8_t width = (uint8_t) strtol(s.substr(pos+1).c_str(), &end, 10);
if ( CheckNumberError(s, end) )
goto parse_error;
string addr = s.substr(0, pos);
val->val.subnet_val.prefix = ParseAddr(addr);
val->val.subnet_val.length = width;
break;
}
case TYPE_ADDR:
s = get_unescaped_string(s);
val->val.addr_val = ParseAddr(s);
break;
case TYPE_TABLE:
case TYPE_VECTOR:
// First - common initialization
// Then - initialization for table.
// Then - initialization for vector.
// Then - common stuff
{
// how many entries do we have...
unsigned int length = 1;
for ( unsigned int i = 0; i < s.size(); i++ )
{
if ( s[i] == separators.set_separator[0] )
length++;
}
unsigned int pos = 0;
bool error = false;
if ( separators.empty_field.size() > 0 && s.compare(separators.empty_field) == 0 )
length = 0;
threading::Value** lvals = new threading::Value* [length];
if ( type == TYPE_TABLE )
{
val->val.set_val.vals = lvals;
val->val.set_val.size = length;
}
else if ( type == TYPE_VECTOR )
{
val->val.vector_val.vals = lvals;
val->val.vector_val.size = length;
}
else
assert(false);
if ( length == 0 )
break; //empty
istringstream splitstream(s);
while ( splitstream )
{
string element;
if ( ! getline(splitstream, element, separators.set_separator[0]) )
break;
if ( pos >= length )
{
thread->Error(thread->Fmt("Internal error while parsing set. pos %d >= length %d."
" Element: %s", pos, length, element.c_str()));
error = true;
break;
}
threading::Value* newval = ParseValue(element, name, subtype);
if ( newval == 0 )
{
thread->Error("Error while reading set or vector");
error = true;
break;
}
lvals[pos] = newval;
pos++;
}
// Test if the string ends with a set_separator... or if the
// complete string is empty. In either of these cases we have
// to push an empty val on top of it.
if ( ! error && (s.empty() || *s.rbegin() == separators.set_separator[0]) )
{
lvals[pos] = ParseValue("", name, subtype);
if ( lvals[pos] == 0 )
{
thread->Error("Error while trying to add empty set element");
goto parse_error;
}
pos++;
}
if ( error ) {
// We had an error while reading a set or a vector.
// Hence we have to clean up the values that have
// been read so far
for ( unsigned int i = 0; i < pos; i++ )
delete lvals[i];
goto parse_error;
}
if ( pos != length )
{
thread->Error(thread->Fmt("Internal error while parsing set: did not find all elements: %s", s.c_str()));
goto parse_error;
}
break;
}
default:
thread->Error(thread->Fmt("unsupported field format %d for %s", type,
name.c_str()));
goto parse_error;
}
return val;
parse_error:
delete val;
return 0;
}
bool AsciiFormatter::CheckNumberError(const string& s, const char* end) const
{
// Do this check first, before executing s.c_str() or similar.
// otherwise the value to which *end is pointing at the moment might
// be gone ...
bool endnotnull = (*end != '\0');
if ( s.length() == 0 )
{
thread->Error("Got empty string for number field");
return true;
}
if ( end == s.c_str() ) {
thread->Error(thread->Fmt("String '%s' contained no parseable number", s.c_str()));
return true;
}
if ( endnotnull )
thread->Warning(thread->Fmt("Number '%s' contained non-numeric trailing characters. Ignored trailing characters '%s'", s.c_str(), end));
if ( errno == EINVAL )
{
thread->Error(thread->Fmt("String '%s' could not be converted to a number", s.c_str()));
return true;
}
else if ( errno == ERANGE )
{
thread->Error(thread->Fmt("Number '%s' out of supported range.", s.c_str()));
return true;
}
return false;
}
string AsciiFormatter::Render(const threading::Value::addr_t& addr) const
{
if ( addr.family == IPv4 )
{
char s[INET_ADDRSTRLEN];
if ( ! bro_inet_ntop(AF_INET, &addr.in.in4, s, INET_ADDRSTRLEN) )
return "<bad IPv4 address conversion>";
else
return s;
}
else
{
char s[INET6_ADDRSTRLEN];
if ( ! bro_inet_ntop(AF_INET6, &addr.in.in6, s, INET6_ADDRSTRLEN) )
return "<bad IPv6 address conversion>";
else
return s;
}
}
TransportProto AsciiFormatter::ParseProto(const string &proto) const
{
if ( proto == "unknown" )
return TRANSPORT_UNKNOWN;
else if ( proto == "tcp" )
return TRANSPORT_TCP;
else if ( proto == "udp" )
return TRANSPORT_UDP;
else if ( proto == "icmp" )
return TRANSPORT_ICMP;
thread->Error(thread->Fmt("Tried to parse invalid/unknown protocol: %s", proto.c_str()));
return TRANSPORT_UNKNOWN;
}
// More or less verbose copy from IPAddr.cc -- which uses reporter.
threading::Value::addr_t AsciiFormatter::ParseAddr(const string &s) const
{
threading::Value::addr_t val;
if ( s.find(':') == std::string::npos ) // IPv4.
{
val.family = IPv4;
if ( inet_aton(s.c_str(), &(val.in.in4)) <= 0 )
{
thread->Error(thread->Fmt("Bad address: %s", s.c_str()));
memset(&val.in.in4.s_addr, 0, sizeof(val.in.in4.s_addr));
}
}
else
{
val.family = IPv6;
if ( inet_pton(AF_INET6, s.c_str(), val.in.in6.s6_addr) <=0 )
{
thread->Error(thread->Fmt("Bad address: %s", s.c_str()));
memset(val.in.in6.s6_addr, 0, sizeof(val.in.in6.s6_addr));
}
}
return val;
}
string AsciiFormatter::Render(const threading::Value::subnet_t& subnet) const
{
char l[16];
if ( subnet.prefix.family == IPv4 )
modp_uitoa10(subnet.length - 96, l);
else
modp_uitoa10(subnet.length, l);
string s = Render(subnet.prefix) + "/" + l;
return s;
}
string AsciiFormatter::Render(double d) const
{
char buf[256];
modp_dtoa(d, buf, 6);
return buf;
}

View file

@ -0,0 +1,142 @@
// See the file "COPYING" in the main distribution directory for copyright.
#ifndef THREADING_ASCII_FORMATTER_H
#define THREADING_ASCII_FORMATTER_H
#include "../Desc.h"
#include "MsgThread.h"
/**
* A thread-safe class for converting values into a readable ASCII
* representation, and vice versa. This is a utility class that factors out
* common rendering/parsing code needed by a number of input/output threads.
*/
class AsciiFormatter {
public:
/**
* A struct to pass the necessary configuration values to the
* AsciiFormatter module on initialization.
*/
struct SeparatorInfo
{
string set_separator; // Separator between set elements.
string unset_field; // String marking an unset field.
string empty_field; // String marking an empty (but set) field.
/**
* Constructor that leaves separators etc unset to dummy
* values. Useful if you use only methods that don't need any
* of them, like StringToAddr, etc.
*/
SeparatorInfo();
/**
* Constructor that defines all the configuration options.
* Use if you need either ValToODesc or EntryToVal.
*/
SeparatorInfo(const string& set_separator, const string& unset_field, const string& empty_field);
};
/**
* Constructor.
*
* @param t The thread that uses this class instance. The class uses
* some of the thread's methods, e.g., for error reporting and
* internal formatting.
*
* @param info SeparatorInfo structure defining the necessary
* separators.
*/
AsciiFormatter(threading::MsgThread* t, const SeparatorInfo info);
/**
* Destructor.
*/
~AsciiFormatter();
/**
* Convert a threading value into a corresponding ASCII.
* representation.
*
* @param desc The ODesc object to write to.
*
* @param val the Value to render to the ODesc object.
*
* @param The name of a field associated with the value. Used only
* for error reporting.
*
* @return Returns true on success, false on error. Errors are also
* flagged via the reporter.
*/
bool Describe(ODesc* desc, threading::Value* val, const string& name) const;
/**
* Convert an IP address into a string.
*
* @param addr The address.
*
* @return An ASCII representation of the address.
*/
string Render(const threading::Value::addr_t& addr) const;
/**
* Convert an subnet value into a string.
*
* @param addr The address.
*
* @return An ASCII representation of the subnet.
*/
string Render(const threading::Value::subnet_t& subnet) const;
/**
* Convert a double into a string. This renders the double with Bro's
* standard precision.
*
* @param d The double.
*
* @return An ASCII representation of the double.
*/
string Render(double d) const;
/**
* Convert the ASCII representation of a field into a value.
*
* @param s The string to parse.
*
* @param The name of a field associated with the value. Used only
* for error reporting.
*
* @return The new value, or null on error. Errors are also flagged
* via the reporter.
*/
threading::Value* ParseValue(string s, string name, TypeTag type, TypeTag subtype = TYPE_ERROR) const;
/**
* Convert a string into a TransportProto. The string must be one of
* \c tcp, \c udp, \c icmp, or \c unknown.
*
* @param proto The transport protocol
*
* @return The transport protocol, which will be \c TRANSPORT_UNKNOWN
* on error. Errors are also flagged via the reporter.
*/
TransportProto ParseProto(const string &proto) const;
/**
* Convert a string into a Value::addr_t.
*
* @param addr String containing an IPv4 or IPv6 address.
*
* @return The address, which will be all-zero on error. Errors are
* also flagged via the reporter.
*/
threading::Value::addr_t ParseAddr(const string &addr) const;
private:
bool CheckNumberError(const string& s, const char * end) const;
SeparatorInfo separators;
threading::MsgThread* thread;
};
#endif /* THREADING_ASCII_FORMATTER_H */

View file

@ -390,7 +390,7 @@ protected:
* @param name: A descriptive name for the type of message. Used
* mainly for debugging purposes.
*
* @param arg_object: An object to store with the message.
* @param arg_object: An object to store with the message.
*/
InputMessage(const char* name, O* arg_object) : BasicInputMessage(name)
{ object = arg_object; }
@ -400,7 +400,7 @@ private:
};
/**
* A paremeterized OututMessage that stores a pointer to an argument object.
* A parameterized OutputMessage that stores a pointer to an argument object.
* Normally, the objects will be used from the Process() callback.
*/
template<typename O>
@ -419,7 +419,7 @@ protected:
* @param name A descriptive name for the type of message. Used
* mainly for debugging purposes.
*
* @param arg_object An object to store with the message.
* @param arg_object An object to store with the message.
*/
OutputMessage(const char* name, O* arg_object) : BasicOutputMessage(name)
{ object = arg_object; }