mirror of
https://github.com/zeek/zeek.git
synced 2025-10-11 11:08:20 +00:00
Fix for when not producing local output; that hung.
* origin/topic/robin/dataseries: Moving trace for rotation test into traces directory. Fixing a rotation race condition at termination. Portability fixes. Extending DS docs with some examples. Updating doc. Fixing pack_scale and time-as-int. Adding format specifier to DS spec to print out double as %.6f. DataSeries updates and fixes. DataSeries tuning. Tweaking DataSeries support. Extending log post-processor call to include the name of the writer. Removing an unnecessary const cast. DataSeries TODO list with open issues/questions. Starting DataSeries HowTo. Additional test output canonification for ds2txt's timestamps. In threads, an internal error now immediately aborts. DataSeries cleanup. Working on DataSeries support. Merging in DataSeries support from topic/gilbert/logging. Fixing threads' DoFinish() method.
This commit is contained in:
commit
7cc863c5fc
52 changed files with 1844 additions and 133 deletions
|
@ -7,6 +7,7 @@
|
|||
#include "../NetVar.h"
|
||||
#include "../Net.h"
|
||||
|
||||
#include "threading/Manager.h"
|
||||
#include "threading/SerialTypes.h"
|
||||
|
||||
#include "Manager.h"
|
||||
|
@ -16,9 +17,11 @@
|
|||
#include "writers/Ascii.h"
|
||||
#include "writers/None.h"
|
||||
|
||||
#ifdef USE_DATASERIES
|
||||
#include "writers/DataSeries.h"
|
||||
#endif
|
||||
|
||||
using namespace logging;
|
||||
using threading::Value;
|
||||
using threading::Field;
|
||||
|
||||
// Structure describing a log writer type.
|
||||
struct WriterDefinition {
|
||||
|
@ -32,6 +35,9 @@ struct WriterDefinition {
|
|||
WriterDefinition log_writers[] = {
|
||||
{ BifEnum::Log::WRITER_NONE, "None", 0, writer::None::Instantiate },
|
||||
{ BifEnum::Log::WRITER_ASCII, "Ascii", 0, writer::Ascii::Instantiate },
|
||||
#ifdef USE_DATASERIES
|
||||
{ BifEnum::Log::WRITER_DATASERIES, "DataSeries", 0, writer::DataSeries::Instantiate },
|
||||
#endif
|
||||
|
||||
// End marker, don't touch.
|
||||
{ BifEnum::Log::WRITER_DEFAULT, "None", 0, (WriterBackend* (*)(WriterFrontend* frontend))0 }
|
||||
|
@ -51,7 +57,7 @@ struct Manager::Filter {
|
|||
Func* postprocessor;
|
||||
|
||||
int num_fields;
|
||||
Field** fields;
|
||||
threading::Field** fields;
|
||||
|
||||
// Vector indexed by field number. Each element is a list of record
|
||||
// indices defining a path leading to the value across potential
|
||||
|
@ -119,6 +125,7 @@ Manager::Stream::~Stream()
|
|||
|
||||
Manager::Manager()
|
||||
{
|
||||
rotations_pending = 0;
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
|
@ -127,6 +134,16 @@ Manager::~Manager()
|
|||
delete *s;
|
||||
}
|
||||
|
||||
list<string> Manager::SupportedFormats()
|
||||
{
|
||||
list<string> formats;
|
||||
|
||||
for ( WriterDefinition* ld = log_writers; ld->type != BifEnum::Log::WRITER_DEFAULT; ++ld )
|
||||
formats.push_back(ld->name);
|
||||
|
||||
return formats;
|
||||
}
|
||||
|
||||
WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type)
|
||||
{
|
||||
WriterDefinition* ld = log_writers;
|
||||
|
@ -135,7 +152,7 @@ WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type)
|
|||
{
|
||||
if ( ld->type == BifEnum::Log::WRITER_DEFAULT )
|
||||
{
|
||||
reporter->Error("unknow writer when creating writer");
|
||||
reporter->Error("unknown writer type requested");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -159,10 +176,8 @@ WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type)
|
|||
// function.
|
||||
ld->factory = 0;
|
||||
|
||||
DBG_LOG(DBG_LOGGING, "failed to init writer class %s",
|
||||
ld->name);
|
||||
|
||||
return false;
|
||||
reporter->Error("initialization of writer %s failed", ld->name);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -449,7 +464,7 @@ bool Manager::TraverseRecord(Stream* stream, Filter* filter, RecordType* rt,
|
|||
|
||||
filter->indices.push_back(new_indices);
|
||||
|
||||
filter->fields = (Field**)
|
||||
filter->fields = (threading::Field**)
|
||||
realloc(filter->fields,
|
||||
sizeof(Field) * ++filter->num_fields);
|
||||
|
||||
|
@ -459,7 +474,7 @@ bool Manager::TraverseRecord(Stream* stream, Filter* filter, RecordType* rt,
|
|||
return false;
|
||||
}
|
||||
|
||||
Field* field = new Field();
|
||||
threading::Field* field = new threading::Field();
|
||||
field->name = new_path;
|
||||
field->type = t->Tag();
|
||||
|
||||
|
@ -571,7 +586,7 @@ bool Manager::AddFilter(EnumVal* id, RecordVal* fval)
|
|||
|
||||
for ( int i = 0; i < filter->num_fields; i++ )
|
||||
{
|
||||
Field* field = filter->fields[i];
|
||||
threading::Field* field = filter->fields[i];
|
||||
DBG_LOG(DBG_LOGGING, " field %10s: %s",
|
||||
field->name.c_str(), type_name(field->type));
|
||||
}
|
||||
|
@ -743,10 +758,10 @@ bool Manager::Write(EnumVal* id, RecordVal* columns)
|
|||
|
||||
// Copy the fields for WriterFrontend::Init() as it
|
||||
// will take ownership.
|
||||
Field** arg_fields = new Field*[filter->num_fields];
|
||||
threading::Field** arg_fields = new threading::Field*[filter->num_fields];
|
||||
|
||||
for ( int j = 0; j < filter->num_fields; ++j )
|
||||
arg_fields[j] = new Field(*filter->fields[j]);
|
||||
arg_fields[j] = new threading::Field(*filter->fields[j]);
|
||||
|
||||
writer = CreateWriter(stream->id, filter->writer,
|
||||
path, filter->num_fields,
|
||||
|
@ -897,10 +912,10 @@ threading::Value* Manager::ValToLogVal(Val* val, BroType* ty)
|
|||
return lval;
|
||||
}
|
||||
|
||||
Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
|
||||
threading::Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
|
||||
RecordVal* columns)
|
||||
{
|
||||
Value** vals = new Value*[filter->num_fields];
|
||||
threading::Value** vals = new threading::Value*[filter->num_fields];
|
||||
|
||||
for ( int i = 0; i < filter->num_fields; ++i )
|
||||
{
|
||||
|
@ -919,7 +934,7 @@ Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
|
|||
if ( ! val )
|
||||
{
|
||||
// Value, or any of its parents, is not set.
|
||||
vals[i] = new Value(filter->fields[i]->type, false);
|
||||
vals[i] = new threading::Value(filter->fields[i]->type, false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -932,7 +947,7 @@ Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
|
|||
}
|
||||
|
||||
WriterFrontend* Manager::CreateWriter(EnumVal* id, EnumVal* writer, string path,
|
||||
int num_fields, const Field* const* fields, bool local, bool remote)
|
||||
int num_fields, const threading::Field* const* fields, bool local, bool remote)
|
||||
{
|
||||
Stream* stream = FindStream(id);
|
||||
|
||||
|
@ -996,7 +1011,7 @@ WriterFrontend* Manager::CreateWriter(EnumVal* id, EnumVal* writer, string path,
|
|||
return writer_obj;
|
||||
}
|
||||
|
||||
void Manager::DeleteVals(int num_fields, Value** vals)
|
||||
void Manager::DeleteVals(int num_fields, threading::Value** vals)
|
||||
{
|
||||
// Note this code is duplicated in WriterBackend::DeleteVals().
|
||||
for ( int i = 0; i < num_fields; i++ )
|
||||
|
@ -1006,7 +1021,7 @@ void Manager::DeleteVals(int num_fields, Value** vals)
|
|||
}
|
||||
|
||||
bool Manager::Write(EnumVal* id, EnumVal* writer, string path, int num_fields,
|
||||
Value** vals)
|
||||
threading::Value** vals)
|
||||
{
|
||||
Stream* stream = FindStream(id);
|
||||
|
||||
|
@ -1113,10 +1128,19 @@ bool Manager::Flush(EnumVal* id)
|
|||
|
||||
void Manager::Terminate()
|
||||
{
|
||||
// Make sure we process all the pending rotations.
|
||||
while ( rotations_pending )
|
||||
{
|
||||
thread_mgr->ForceProcessing(); // A blatant layering violation ...
|
||||
usleep(1000);
|
||||
}
|
||||
|
||||
for ( vector<Stream *>::iterator s = streams.begin(); s != streams.end(); ++s )
|
||||
{
|
||||
if ( *s )
|
||||
Flush((*s)->id);
|
||||
if ( ! *s )
|
||||
continue;
|
||||
|
||||
Flush((*s)->id);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1219,11 +1243,19 @@ void Manager::Rotate(WriterInfo* winfo)
|
|||
|
||||
// Trigger the rotation.
|
||||
winfo->writer->Rotate(tmp, winfo->open_time, network_time, terminating);
|
||||
|
||||
++rotations_pending;
|
||||
}
|
||||
|
||||
bool Manager::FinishedRotation(WriterFrontend* writer, string new_name, string old_name,
|
||||
double open, double close, bool terminating)
|
||||
{
|
||||
--rotations_pending;
|
||||
|
||||
if ( ! writer )
|
||||
// Writer didn't produce local output.
|
||||
return true;
|
||||
|
||||
DBG_LOG(DBG_LOGGING, "Finished rotating %s at %.6f, new name %s",
|
||||
writer->Path().c_str(), network_time, new_name.c_str());
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@ class RotationTimer;
|
|||
|
||||
namespace logging {
|
||||
|
||||
|
||||
class WriterBackend;
|
||||
class WriterFrontend;
|
||||
class RotationFinishedMessage;
|
||||
|
@ -56,7 +55,7 @@ public:
|
|||
* logging.bif, which just forwards here.
|
||||
*/
|
||||
bool EnableStream(EnumVal* id);
|
||||
|
||||
|
||||
/**
|
||||
* Disables a log stream.
|
||||
*
|
||||
|
@ -145,6 +144,11 @@ public:
|
|||
*/
|
||||
void Terminate();
|
||||
|
||||
/**
|
||||
* Returns a list of supported output formats.
|
||||
*/
|
||||
static list<string> SupportedFormats();
|
||||
|
||||
protected:
|
||||
friend class WriterFrontend;
|
||||
friend class RotationFinishedMessage;
|
||||
|
@ -196,6 +200,7 @@ private:
|
|||
WriterInfo* FindWriter(WriterFrontend* writer);
|
||||
|
||||
vector<Stream *> streams; // Indexed by stream enum.
|
||||
int rotations_pending; // Number of rotations not yet finished.
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -223,17 +223,6 @@ bool WriterBackend::Flush()
|
|||
return true;
|
||||
}
|
||||
|
||||
bool WriterBackend::Finish()
|
||||
{
|
||||
if ( ! DoFlush() )
|
||||
{
|
||||
DisableFrontend();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WriterBackend::DoHeartbeat(double network_time, double current_time)
|
||||
{
|
||||
MsgThread::DoHeartbeat(network_time, current_time);
|
||||
|
@ -279,4 +268,9 @@ string WriterBackend::Render(const threading::Value::subnet_t& subnet) const
|
|||
return s;
|
||||
}
|
||||
|
||||
|
||||
string WriterBackend::Render(double d) const
|
||||
{
|
||||
char buf[256];
|
||||
modp_dtoa(d, buf, 6);
|
||||
return buf;
|
||||
}
|
||||
|
|
|
@ -101,15 +101,6 @@ public:
|
|||
*/
|
||||
bool Rotate(string rotated_path, double open, double close, bool terminating);
|
||||
|
||||
/**
|
||||
* Finishes writing to this logger in a regularl fashion. Must not be
|
||||
* called if an error has been indicated earlier. After calling this,
|
||||
* no further writing must be performed.
|
||||
*
|
||||
* @return False if an error occured.
|
||||
*/
|
||||
bool Finish();
|
||||
|
||||
/**
|
||||
* Disables the frontend that has instantiated this backend. Once
|
||||
* disabled,the frontend will not send any further message over.
|
||||
|
@ -174,7 +165,17 @@ public:
|
|||
*/
|
||||
string Render(const threading::Value::subnet_t& subnet) const;
|
||||
|
||||
/** Helper method to render a double in Bro's standard precision.
|
||||
*
|
||||
* @param d The double.
|
||||
*
|
||||
* @return An ASCII representation of the double.
|
||||
*/
|
||||
string Render(double d) const;
|
||||
|
||||
protected:
|
||||
friend class FinishMessage;
|
||||
|
||||
/**
|
||||
* Writer-specific intialization method.
|
||||
*
|
||||
|
@ -272,26 +273,18 @@ protected:
|
|||
bool terminating) = 0;
|
||||
|
||||
/**
|
||||
* Writer-specific method implementing log output finalization at
|
||||
* termination. Not called when any of the other methods has
|
||||
* previously signaled an error, i.e., executing this method signals
|
||||
* a regular shutdown of the writer.
|
||||
* Writer-specific method called just before the threading system is
|
||||
* going to shutdown.
|
||||
*
|
||||
* A writer implementation must override this method but it can just
|
||||
* ignore calls if flushing doesn't align with its semantics.
|
||||
*
|
||||
* If the method returns false, it will be assumed that a fatal error
|
||||
* has occured that prevents the writer from further operation; it
|
||||
* will then be disabled and eventually deleted. When returning
|
||||
* false, an implementation should also call Error() to indicate what
|
||||
* happened.
|
||||
* This method can be overridden but one must call
|
||||
* WriterBackend::DoFinish().
|
||||
*/
|
||||
virtual bool DoFinish() = 0;
|
||||
virtual bool DoFinish() { return MsgThread::DoFinish(); }
|
||||
|
||||
/**
|
||||
* Triggered by regular heartbeat messages from the main thread.
|
||||
*
|
||||
* This method can be overridden but once must call
|
||||
* This method can be overridden but one must call
|
||||
* WriterBackend::DoHeartbeat().
|
||||
*/
|
||||
virtual bool DoHeartbeat(double network_time, double current_time);
|
||||
|
|
|
@ -90,7 +90,7 @@ public:
|
|||
FinishMessage(WriterBackend* backend)
|
||||
: threading::InputMessage<WriterBackend>("Finish", backend) {}
|
||||
|
||||
virtual bool Process() { return Object()->Finish(); }
|
||||
virtual bool Process() { return Object()->DoFinish(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -117,8 +117,9 @@ WriterFrontend::WriterFrontend(EnumVal* arg_stream, EnumVal* arg_writer, bool ar
|
|||
if ( local )
|
||||
{
|
||||
backend = log_mgr->CreateBackend(this, writer->AsEnum());
|
||||
assert(backend);
|
||||
backend->Start();
|
||||
|
||||
if ( backend )
|
||||
backend->Start();
|
||||
}
|
||||
|
||||
else
|
||||
|
@ -256,6 +257,10 @@ void WriterFrontend::Rotate(string rotated_path, double open, double close, bool
|
|||
|
||||
if ( backend )
|
||||
backend->SendIn(new RotateMessage(backend, this, rotated_path, open, close, terminating));
|
||||
else
|
||||
// Still signal log manager that we're done, but signal that
|
||||
// nothing happened by setting the writer to zeri.
|
||||
log_mgr->FinishedRotation(0, "", rotated_path, open, close, terminating);
|
||||
}
|
||||
|
||||
void WriterFrontend::Finish()
|
||||
|
|
|
@ -69,8 +69,7 @@ bool Ascii::WriteHeaderField(const string& key, const string& val)
|
|||
return (fwrite(str.c_str(), str.length(), 1, file) == 1);
|
||||
}
|
||||
|
||||
bool Ascii::DoInit(string path, int num_fields,
|
||||
const Field* const * fields)
|
||||
bool Ascii::DoInit(string path, int num_fields, const Field* const * fields)
|
||||
{
|
||||
if ( output_to_stdout )
|
||||
path = "/dev/stdout";
|
||||
|
@ -87,6 +86,9 @@ bool Ascii::DoInit(string path, int num_fields,
|
|||
|
||||
if ( include_header )
|
||||
{
|
||||
string names;
|
||||
string types;
|
||||
|
||||
string str = string(header_prefix, header_prefix_len)
|
||||
+ "separator " // Always use space as separator here.
|
||||
+ get_escaped_string(string(separator, separator_len), false)
|
||||
|
@ -104,9 +106,6 @@ bool Ascii::DoInit(string path, int num_fields,
|
|||
WriteHeaderField("path", get_escaped_string(path, false))) )
|
||||
goto write_error;
|
||||
|
||||
string names;
|
||||
string types;
|
||||
|
||||
for ( int i = 0; i < num_fields; ++i )
|
||||
{
|
||||
if ( i > 0 )
|
||||
|
@ -115,15 +114,8 @@ bool Ascii::DoInit(string path, int num_fields,
|
|||
types += string(separator, separator_len);
|
||||
}
|
||||
|
||||
const Field* field = fields[i];
|
||||
names += field->name;
|
||||
types += type_name(field->type);
|
||||
if ( (field->type == TYPE_TABLE) || (field->type == TYPE_VECTOR) )
|
||||
{
|
||||
types += "[";
|
||||
types += type_name(field->subtype);
|
||||
types += "]";
|
||||
}
|
||||
names += fields[i]->name;
|
||||
types += fields[i]->TypeName();
|
||||
}
|
||||
|
||||
if ( ! (WriteHeaderField("fields", names)
|
||||
|
@ -146,7 +138,7 @@ bool Ascii::DoFlush()
|
|||
|
||||
bool Ascii::DoFinish()
|
||||
{
|
||||
return true;
|
||||
return WriterBackend::DoFinish();
|
||||
}
|
||||
|
||||
bool Ascii::DoWriteOne(ODesc* desc, Value* val, const Field* field)
|
||||
|
@ -184,15 +176,19 @@ bool Ascii::DoWriteOne(ODesc* desc, Value* val, const Field* field)
|
|||
desc->Add(Render(val->val.addr_val));
|
||||
break;
|
||||
|
||||
case TYPE_TIME:
|
||||
case TYPE_INTERVAL:
|
||||
char buf[256];
|
||||
modp_dtoa(val->val.double_val, buf, 6);
|
||||
desc->Add(buf);
|
||||
case TYPE_DOUBLE:
|
||||
// Rendering via Add() truncates trailing 0s after the
|
||||
// decimal point. The difference with TIME/INTERVAL is mainly
|
||||
// to keep the log format consistent.
|
||||
desc->Add(val->val.double_val);
|
||||
break;
|
||||
|
||||
case TYPE_DOUBLE:
|
||||
desc->Add(val->val.double_val);
|
||||
case TYPE_INTERVAL:
|
||||
case TYPE_TIME:
|
||||
// Rendering via Render() keeps trailing 0s after the decimal
|
||||
// point. The difference with DOUBLEis mainly to keep the log
|
||||
// format consistent.
|
||||
desc->Add(Render(val->val.double_val));
|
||||
break;
|
||||
|
||||
case TYPE_ENUM:
|
||||
|
|
417
src/logging/writers/DataSeries.cc
Normal file
417
src/logging/writers/DataSeries.cc
Normal file
|
@ -0,0 +1,417 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <errno.h>
|
||||
|
||||
#include <DataSeries/GeneralField.hpp>
|
||||
|
||||
#include "NetVar.h"
|
||||
#include "threading/SerialTypes.h"
|
||||
|
||||
#include "DataSeries.h"
|
||||
|
||||
using namespace logging;
|
||||
using namespace writer;
|
||||
|
||||
std::string DataSeries::LogValueToString(threading::Value *val)
|
||||
{
|
||||
// In some cases, no value is attached. If this is the case, return
|
||||
// an empty string.
|
||||
if( ! val->present )
|
||||
return "";
|
||||
|
||||
switch(val->type) {
|
||||
case TYPE_BOOL:
|
||||
return (val->val.int_val ? "true" : "false");
|
||||
|
||||
case TYPE_INT:
|
||||
{
|
||||
std::ostringstream ostr;
|
||||
ostr << val->val.int_val;
|
||||
return ostr.str();
|
||||
}
|
||||
|
||||
case TYPE_COUNT:
|
||||
case TYPE_COUNTER:
|
||||
case TYPE_PORT:
|
||||
{
|
||||
std::ostringstream ostr;
|
||||
ostr << val->val.uint_val;
|
||||
return ostr.str();
|
||||
}
|
||||
|
||||
case TYPE_SUBNET:
|
||||
return Render(val->val.subnet_val);
|
||||
|
||||
case TYPE_ADDR:
|
||||
return Render(val->val.addr_val);
|
||||
|
||||
// Note: These two cases are relatively special. We need to convert
|
||||
// these values into their integer equivalents to maximize precision.
|
||||
// At the moment, there won't be a noticeable effect (Bro uses the
|
||||
// double format everywhere internally, so we've already lost the
|
||||
// precision we'd gain here), but timestamps may eventually switch to
|
||||
// this representation within Bro.
|
||||
//
|
||||
// In the near-term, this *should* lead to better pack_relative (and
|
||||
// thus smaller output files).
|
||||
case TYPE_TIME:
|
||||
case TYPE_INTERVAL:
|
||||
if ( ds_use_integer_for_time )
|
||||
{
|
||||
std::ostringstream ostr;
|
||||
ostr << (uint64_t)(DataSeries::TIME_SCALE * val->val.double_val);
|
||||
return ostr.str();
|
||||
}
|
||||
else
|
||||
return Render(val->val.double_val);
|
||||
|
||||
case TYPE_DOUBLE:
|
||||
return Render(val->val.double_val);
|
||||
|
||||
case TYPE_ENUM:
|
||||
case TYPE_STRING:
|
||||
case TYPE_FILE:
|
||||
case TYPE_FUNC:
|
||||
if ( ! val->val.string_val->size() )
|
||||
return "";
|
||||
|
||||
return string(val->val.string_val->data(), val->val.string_val->size());
|
||||
|
||||
case TYPE_TABLE:
|
||||
{
|
||||
if ( ! val->val.set_val.size )
|
||||
return "";
|
||||
|
||||
string tmpString = "";
|
||||
|
||||
for ( int j = 0; j < val->val.set_val.size; j++ )
|
||||
{
|
||||
if ( j > 0 )
|
||||
tmpString += ds_set_separator;
|
||||
|
||||
tmpString += LogValueToString(val->val.set_val.vals[j]);
|
||||
}
|
||||
|
||||
return tmpString;
|
||||
}
|
||||
|
||||
case TYPE_VECTOR:
|
||||
{
|
||||
if ( ! val->val.vector_val.size )
|
||||
return "";
|
||||
|
||||
string tmpString = "";
|
||||
|
||||
for ( int j = 0; j < val->val.vector_val.size; j++ )
|
||||
{
|
||||
if ( j > 0 )
|
||||
tmpString += ds_set_separator;
|
||||
|
||||
tmpString += LogValueToString(val->val.vector_val.vals[j]);
|
||||
}
|
||||
|
||||
return tmpString;
|
||||
}
|
||||
|
||||
default:
|
||||
InternalError(Fmt("unknown type %s in DataSeries::LogValueToString", type_name(val->type)));
|
||||
return "cannot be reached";
|
||||
}
|
||||
}
|
||||
|
||||
string DataSeries::GetDSFieldType(const threading::Field *field)
|
||||
{
|
||||
switch(field->type) {
|
||||
case TYPE_BOOL:
|
||||
return "bool";
|
||||
|
||||
case TYPE_COUNT:
|
||||
case TYPE_COUNTER:
|
||||
case TYPE_PORT:
|
||||
case TYPE_INT:
|
||||
return "int64";
|
||||
|
||||
case TYPE_DOUBLE:
|
||||
return "double";
|
||||
|
||||
case TYPE_TIME:
|
||||
case TYPE_INTERVAL:
|
||||
return ds_use_integer_for_time ? "int64" : "double";
|
||||
|
||||
case TYPE_SUBNET:
|
||||
case TYPE_ADDR:
|
||||
case TYPE_ENUM:
|
||||
case TYPE_STRING:
|
||||
case TYPE_FILE:
|
||||
case TYPE_TABLE:
|
||||
case TYPE_VECTOR:
|
||||
case TYPE_FUNC:
|
||||
return "variable32";
|
||||
|
||||
default:
|
||||
InternalError(Fmt("unknown type %s in DataSeries::GetDSFieldType", type_name(field->type)));
|
||||
return "cannot be reached";
|
||||
}
|
||||
}
|
||||
|
||||
string DataSeries::BuildDSSchemaFromFieldTypes(const vector<SchemaValue>& vals, string sTitle)
|
||||
{
|
||||
if( ! sTitle.size() )
|
||||
sTitle = "GenericBroStream";
|
||||
|
||||
string xmlschema = "<ExtentType name=\""
|
||||
+ sTitle
|
||||
+ "\" version=\"1.0\" namespace=\"bro-ids.org\">\n";
|
||||
|
||||
for( size_t i = 0; i < vals.size(); ++i )
|
||||
{
|
||||
xmlschema += "\t<field type=\""
|
||||
+ vals[i].ds_type
|
||||
+ "\" name=\""
|
||||
+ vals[i].field_name
|
||||
+ "\" " + vals[i].field_options
|
||||
+ "/>\n";
|
||||
}
|
||||
|
||||
xmlschema += "</ExtentType>\n";
|
||||
|
||||
for( size_t i = 0; i < vals.size(); ++i )
|
||||
{
|
||||
xmlschema += "<!-- " + vals[i].field_name
|
||||
+ " : " + vals[i].bro_type
|
||||
+ " -->\n";
|
||||
}
|
||||
|
||||
return xmlschema;
|
||||
}
|
||||
|
||||
std::string DataSeries::GetDSOptionsForType(const threading::Field *field)
|
||||
{
|
||||
switch( field->type ) {
|
||||
case TYPE_TIME:
|
||||
case TYPE_INTERVAL:
|
||||
{
|
||||
std::string s;
|
||||
s += "pack_relative=\"" + std::string(field->name) + "\"";
|
||||
|
||||
if ( ! ds_use_integer_for_time )
|
||||
s += " pack_scale=\"1e-6\" print_format=\"%.6f\" pack_scale_warn=\"no\"";
|
||||
else
|
||||
s += string(" units=\"") + TIME_UNIT() + "\" epoch=\"unix\"";
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
case TYPE_SUBNET:
|
||||
case TYPE_ADDR:
|
||||
case TYPE_ENUM:
|
||||
case TYPE_STRING:
|
||||
case TYPE_FILE:
|
||||
case TYPE_TABLE:
|
||||
case TYPE_VECTOR:
|
||||
return "pack_unique=\"yes\"";
|
||||
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend)
|
||||
{
|
||||
ds_compression = string((const char *)BifConst::LogDataSeries::compression->Bytes(),
|
||||
BifConst::LogDataSeries::compression->Len());
|
||||
ds_dump_schema = BifConst::LogDataSeries::dump_schema;
|
||||
ds_extent_size = BifConst::LogDataSeries::extent_size;
|
||||
ds_num_threads = BifConst::LogDataSeries::num_threads;
|
||||
ds_use_integer_for_time = BifConst::LogDataSeries::use_integer_for_time;
|
||||
ds_set_separator = ",";
|
||||
}
|
||||
|
||||
DataSeries::~DataSeries()
|
||||
{
|
||||
}
|
||||
|
||||
bool DataSeries::OpenLog(string path)
|
||||
{
|
||||
log_file = new DataSeriesSink(path + ".ds", compress_type);
|
||||
log_file->writeExtentLibrary(log_types);
|
||||
|
||||
for( size_t i = 0; i < schema_list.size(); ++i )
|
||||
extents.insert(std::make_pair(schema_list[i].field_name,
|
||||
GeneralField::create(log_series, schema_list[i].field_name)));
|
||||
|
||||
if ( ds_extent_size < ROW_MIN )
|
||||
{
|
||||
Warning(Fmt("%d is not a valid value for 'rows'. Using min of %d instead", (int)ds_extent_size, (int)ROW_MIN));
|
||||
ds_extent_size = ROW_MIN;
|
||||
}
|
||||
|
||||
else if( ds_extent_size > ROW_MAX )
|
||||
{
|
||||
Warning(Fmt("%d is not a valid value for 'rows'. Using max of %d instead", (int)ds_extent_size, (int)ROW_MAX));
|
||||
ds_extent_size = ROW_MAX;
|
||||
}
|
||||
|
||||
log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DataSeries::DoInit(string path, int num_fields, const threading::Field* const * fields)
|
||||
{
|
||||
// We first construct an XML schema thing (and, if ds_dump_schema is
|
||||
// set, dump it to path + ".ds.xml"). Assuming that goes well, we
|
||||
// use that schema to build our output logfile and prepare it to be
|
||||
// written to.
|
||||
|
||||
// Note: compressor count must be set *BEFORE* DataSeriesSink is
|
||||
// instantiated.
|
||||
if( ds_num_threads < THREAD_MIN && ds_num_threads != 0 )
|
||||
{
|
||||
Warning(Fmt("%d is too few threads! Using %d instead", (int)ds_num_threads, (int)THREAD_MIN));
|
||||
ds_num_threads = THREAD_MIN;
|
||||
}
|
||||
|
||||
if( ds_num_threads > THREAD_MAX )
|
||||
{
|
||||
Warning(Fmt("%d is too many threads! Dropping back to %d", (int)ds_num_threads, (int)THREAD_MAX));
|
||||
ds_num_threads = THREAD_MAX;
|
||||
}
|
||||
|
||||
if( ds_num_threads > 0 )
|
||||
DataSeriesSink::setCompressorCount(ds_num_threads);
|
||||
|
||||
for ( int i = 0; i < num_fields; i++ )
|
||||
{
|
||||
const threading::Field* field = fields[i];
|
||||
SchemaValue val;
|
||||
val.ds_type = GetDSFieldType(field);
|
||||
val.field_name = string(field->name);
|
||||
val.field_options = GetDSOptionsForType(field);
|
||||
val.bro_type = field->TypeName();
|
||||
schema_list.push_back(val);
|
||||
}
|
||||
|
||||
string schema = BuildDSSchemaFromFieldTypes(schema_list, path);
|
||||
|
||||
if( ds_dump_schema )
|
||||
{
|
||||
FILE* pFile = fopen ( string(path + ".ds.xml").c_str() , "wb" );
|
||||
|
||||
if( pFile )
|
||||
{
|
||||
fwrite(schema.c_str(), 1, schema.length(), pFile);
|
||||
fclose(pFile);
|
||||
}
|
||||
|
||||
else
|
||||
Error(Fmt("cannot dump schema: %s", strerror(errno)));
|
||||
}
|
||||
|
||||
compress_type = Extent::compress_all;
|
||||
|
||||
if( ds_compression == "lzf" )
|
||||
compress_type = Extent::compress_lzf;
|
||||
|
||||
else if( ds_compression == "lzo" )
|
||||
compress_type = Extent::compress_lzo;
|
||||
|
||||
else if( ds_compression == "gz" )
|
||||
compress_type = Extent::compress_gz;
|
||||
|
||||
else if( ds_compression == "bz2" )
|
||||
compress_type = Extent::compress_bz2;
|
||||
|
||||
else if( ds_compression == "none" )
|
||||
compress_type = Extent::compress_none;
|
||||
|
||||
else if( ds_compression == "any" )
|
||||
compress_type = Extent::compress_all;
|
||||
|
||||
else
|
||||
Warning(Fmt("%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'. Defaulting to 'any'", ds_compression.c_str()));
|
||||
|
||||
log_type = log_types.registerTypePtr(schema);
|
||||
log_series.setType(log_type);
|
||||
|
||||
return OpenLog(path);
|
||||
}
|
||||
|
||||
bool DataSeries::DoFlush()
|
||||
{
|
||||
// Flushing is handled by DataSeries automatically, so this function
|
||||
// doesn't do anything.
|
||||
return true;
|
||||
}
|
||||
|
||||
void DataSeries::CloseLog()
|
||||
{
|
||||
for( ExtentIterator iter = extents.begin(); iter != extents.end(); ++iter )
|
||||
delete iter->second;
|
||||
|
||||
extents.clear();
|
||||
|
||||
// Don't delete the file before you delete the output, or bad things
|
||||
// will happen.
|
||||
delete log_output;
|
||||
delete log_file;
|
||||
|
||||
log_output = 0;
|
||||
log_file = 0;
|
||||
}
|
||||
|
||||
bool DataSeries::DoFinish()
|
||||
{
|
||||
CloseLog();
|
||||
|
||||
return WriterBackend::DoFinish();
|
||||
}
|
||||
|
||||
bool DataSeries::DoWrite(int num_fields, const threading::Field* const * fields,
|
||||
threading::Value** vals)
|
||||
{
|
||||
log_output->newRecord();
|
||||
|
||||
for( size_t i = 0; i < (size_t)num_fields; ++i )
|
||||
{
|
||||
ExtentIterator iter = extents.find(fields[i]->name);
|
||||
assert(iter != extents.end());
|
||||
|
||||
if( iter != extents.end() )
|
||||
{
|
||||
GeneralField *cField = iter->second;
|
||||
|
||||
if( vals[i]->present )
|
||||
cField->set(LogValueToString(vals[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DataSeries::DoRotate(string rotated_path, double open, double close, bool terminating)
|
||||
{
|
||||
// Note that if DS files are rotated too often, the aggregate log
|
||||
// size will be (much) larger.
|
||||
CloseLog();
|
||||
|
||||
string dsname = Path() + ".ds";
|
||||
string nname = rotated_path + ".ds";
|
||||
rename(dsname.c_str(), nname.c_str());
|
||||
|
||||
if ( ! FinishedRotation(nname, dsname, open, close, terminating) )
|
||||
{
|
||||
Error(Fmt("error rotating %s to %s", dsname.c_str(), nname.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
return OpenLog(Path());
|
||||
}
|
||||
|
||||
bool DataSeries::DoSetBuf(bool enabled)
|
||||
{
|
||||
// DataSeries is *always* buffered to some degree. This option is ignored.
|
||||
return true;
|
||||
}
|
124
src/logging/writers/DataSeries.h
Normal file
124
src/logging/writers/DataSeries.h
Normal file
|
@ -0,0 +1,124 @@
|
|||
// See the file "COPYING" in the main distribution directory for copyright.
|
||||
//
|
||||
// A binary log writer producing DataSeries output. See doc/data-series.rst
|
||||
// for more information.
|
||||
|
||||
#ifndef LOGGING_WRITER_DATA_SERIES_H
|
||||
#define LOGGING_WRITER_DATA_SERIES_H
|
||||
|
||||
#include <DataSeries/ExtentType.hpp>
|
||||
#include <DataSeries/DataSeriesFile.hpp>
|
||||
#include <DataSeries/DataSeriesModule.hpp>
|
||||
#include <DataSeries/GeneralField.hpp>
|
||||
|
||||
#include "../WriterBackend.h"
|
||||
|
||||
namespace logging { namespace writer {
|
||||
|
||||
class DataSeries : public WriterBackend {
|
||||
public:
|
||||
DataSeries(WriterFrontend* frontend);
|
||||
~DataSeries();
|
||||
|
||||
static WriterBackend* Instantiate(WriterFrontend* frontend)
|
||||
{ return new DataSeries(frontend); }
|
||||
|
||||
protected:
|
||||
// Overidden from WriterBackend.
|
||||
|
||||
virtual bool DoInit(string path, int num_fields,
|
||||
const threading::Field* const * fields);
|
||||
|
||||
virtual bool DoWrite(int num_fields, const threading::Field* const* fields,
|
||||
threading::Value** vals);
|
||||
virtual bool DoSetBuf(bool enabled);
|
||||
virtual bool DoRotate(string rotated_path, double open,
|
||||
double close, bool terminating);
|
||||
virtual bool DoFlush();
|
||||
virtual bool DoFinish();
|
||||
|
||||
private:
|
||||
static const size_t ROW_MIN = 2048; // Minimum extent size.
|
||||
static const size_t ROW_MAX = (1024 * 1024 * 100); // Maximum extent size.
|
||||
static const size_t THREAD_MIN = 1; // Minimum number of compression threads that DataSeries may spawn.
|
||||
static const size_t THREAD_MAX = 128; // Maximum number of compression threads that DataSeries may spawn.
|
||||
static const size_t TIME_SCALE = 1000000; // Fixed-point multiplier for time values when converted to integers.
|
||||
const char* TIME_UNIT() { return "microseconds"; } // DS name for time resolution when converted to integers. Must match TIME_SCALE.
|
||||
|
||||
struct SchemaValue
|
||||
{
|
||||
string ds_type;
|
||||
string bro_type;
|
||||
string field_name;
|
||||
string field_options;
|
||||
};
|
||||
|
||||
/**
|
||||
* Turns a log value into a std::string. Uses an ostringstream to do the
|
||||
* heavy lifting, but still need to switch on the type to know which value
|
||||
* in the union to give to the string string for processing.
|
||||
*
|
||||
* @param val The value we wish to convert to a string
|
||||
* @return the string value of val
|
||||
*/
|
||||
std::string LogValueToString(threading::Value *val);
|
||||
|
||||
/**
|
||||
* Takes a field type and converts it to a relevant DataSeries type.
|
||||
*
|
||||
* @param field We extract the type from this and convert it into a relevant DS type.
|
||||
* @return String representation of type that DataSeries can understand.
|
||||
*/
|
||||
string GetDSFieldType(const threading::Field *field);
|
||||
|
||||
/**
|
||||
* Are there any options we should put into the XML schema?
|
||||
*
|
||||
* @param field We extract the type from this and return any options that make sense for that type.
|
||||
* @return Options that can be added directly to the XML (e.g. "pack_relative=\"yes\"")
|
||||
*/
|
||||
std::string GetDSOptionsForType(const threading::Field *field);
|
||||
|
||||
/**
|
||||
* Takes a list of types, a list of names, and a title, and uses it to construct a valid DataSeries XML schema
|
||||
* thing, which is then returned as a std::string
|
||||
*
|
||||
* @param opts std::vector of strings containing a list of options to be appended to each field (e.g. "pack_relative=yes")
|
||||
* @param sTitle Name of this schema. Ideally, these schemas would be aggregated and re-used.
|
||||
*/
|
||||
string BuildDSSchemaFromFieldTypes(const vector<SchemaValue>& vals, string sTitle);
|
||||
|
||||
/** Closes the currently open file. */
|
||||
void CloseLog();
|
||||
|
||||
/** Opens a new file. */
|
||||
bool OpenLog(string path);
|
||||
|
||||
typedef std::map<string, GeneralField *> ExtentMap;
|
||||
typedef ExtentMap::iterator ExtentIterator;
|
||||
|
||||
// Internal DataSeries structures we need to keep track of.
|
||||
vector<SchemaValue> schema_list;
|
||||
ExtentTypeLibrary log_types;
|
||||
ExtentType::Ptr log_type;
|
||||
ExtentSeries log_series;
|
||||
ExtentMap extents;
|
||||
int compress_type;
|
||||
|
||||
DataSeriesSink* log_file;
|
||||
OutputModule* log_output;
|
||||
|
||||
// Options set from the script-level.
|
||||
uint64 ds_extent_size;
|
||||
uint64 ds_num_threads;
|
||||
string ds_compression;
|
||||
bool ds_dump_schema;
|
||||
bool ds_use_integer_for_time;
|
||||
string ds_set_separator;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue