From 99e3c584942724946f6c54eb80213f4b84d88559 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 3 Apr 2012 22:12:44 -0700 Subject: [PATCH 01/20] Fixing threads' DoFinish() method. It wasn't called reliably. Now, it's always called before the thread is destroyed (assuming processing has went normally so far). --- src/threading/MsgThread.cc | 47 ++++++++++++++++++++++++++++++++------ src/threading/MsgThread.h | 10 ++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/threading/MsgThread.cc b/src/threading/MsgThread.cc index ddcd3df1dd..0b91f8790a 100644 --- a/src/threading/MsgThread.cc +++ b/src/threading/MsgThread.cc @@ -10,13 +10,21 @@ namespace threading { ////// Messages. -// Signals child thread to terminate. This is actually a no-op; its only -// purpose is unblock the current read operation so that the child's Run() -// methods can check the termination status. -class TerminateMessage : public InputMessage +// Signals child thread to shutdown operation. +class FinishMessage : public InputMessage { public: - TerminateMessage(MsgThread* thread) : InputMessage("Terminate", thread) { } + FinishMessage(MsgThread* thread) : InputMessage("Finish", thread) { } + + virtual bool Process() { return Object()->DoFinish(); } +}; + +// A dummy message that's only purpose is unblock the current read operation +// so that the child's Run() methods can check the termination status. +class UnblockMessage : public InputMessage +{ +public: + UnblockMessage(MsgThread* thread) : InputMessage("Unblock", thread) { } virtual bool Process() { return true; } }; @@ -130,13 +138,30 @@ bool ReporterMessage::Process() MsgThread::MsgThread() : BasicThread() { cnt_sent_in = cnt_sent_out = 0; + finished = false; thread_mgr->AddMsgThread(this); } void MsgThread::OnStop() { - // This is to unblock the current queue read operation. - SendIn(new TerminateMessage(this), true); + // Signal thread to terminate and wait until it has acknowledged. + SendIn(new FinishMessage(this), true); + + int cnt = 0; + while ( ! finished ) + { + if ( ++cnt > 1000 ) // Insurance against broken threads ... + { + abort(); + reporter->Warning("thread %s didn't finish in time", Name().c_str()); + break; + } + + usleep(1000); + } + + // One more message to make sure the current queue read operation unblocks. + SendIn(new UnblockMessage(this), true); } void MsgThread::Heartbeat() @@ -157,6 +182,14 @@ bool MsgThread::DoHeartbeat(double network_time, double current_time) return true; } +bool MsgThread::DoFinish() + { + // This is thread-safe "enough", we're the only one ever writing + // there. + finished = true; + return true; + } + void MsgThread::Info(const char* msg) { SendOut(new ReporterMessage(ReporterMessage::INFO, this, msg)); diff --git a/src/threading/MsgThread.h b/src/threading/MsgThread.h index 5ac1c0f780..16e6a92772 100644 --- a/src/threading/MsgThread.h +++ b/src/threading/MsgThread.h @@ -171,6 +171,8 @@ public: protected: friend class Manager; friend class HeartbeatMessage; + friend class FinishMessage; + friend class FinishedMessage; /** * Pops a message sent by the child from the child-to-main queue. @@ -215,6 +217,12 @@ protected: */ virtual bool DoHeartbeat(double network_time, double current_time); + /** Triggered for execution in the child thread just before shutting threads down. + * The child thread shoudl finish its operations and then *must* + * call this class' implementation. + */ + virtual bool DoFinish(); + private: /** * Pops a message sent by the main thread from the main-to-chold @@ -270,6 +278,8 @@ private: uint64_t cnt_sent_in; // Counts message sent to child. uint64_t cnt_sent_out; // Counts message sent by child. + + bool finished; // Set to true by Finished message. }; /** From 952b6b293a6068ea9892efd61890047206bd60ae Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 3 Apr 2012 22:14:56 -0700 Subject: [PATCH 02/20] Merging in DataSeries support from topic/gilbert/logging. I copied the code over manually, no merging, because (1) it needed to be adapted to the new threading API, and (2) there's more stuff in the branch that I haven't ported yet. The DS output generally seems to work, but it has seen no further testing yet. Not unit tests yet either. --- CMakeLists.txt | 16 + cmake | 2 +- config.h.in | 3 + configure | 9 + scripts/base/frameworks/logging/__load__.bro | 1 + .../frameworks/logging/writers/dataseries.bro | 62 +++ src/CMakeLists.txt | 1 + src/logging.bif | 8 + src/logging/Manager.cc | 58 ++- src/logging/Manager.h | 8 +- src/logging/WriterBackend.cc | 11 - src/logging/WriterBackend.h | 33 +- src/logging/WriterFrontend.cc | 7 +- src/logging/writers/Ascii.cc | 5 +- src/logging/writers/DataSeries.cc | 476 ++++++++++++++++++ src/logging/writers/DataSeries.h | 69 +++ src/main.cc | 21 + src/types.bif | 1 + 18 files changed, 726 insertions(+), 65 deletions(-) create mode 100644 scripts/base/frameworks/logging/writers/dataseries.bro create mode 100644 src/logging/writers/DataSeries.cc create mode 100644 src/logging/writers/DataSeries.h diff --git a/CMakeLists.txt b/CMakeLists.txt index febc2d6ec1..04b28d2c32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,6 +107,21 @@ if (GOOGLEPERFTOOLS_FOUND) endif () endif () +set(USE_DATASERIES false) +find_package(Lintel) +find_package(DataSeries) +find_package(LibXML2) + +if (LINTEL_FOUND AND DATASERIES_FOUND AND LIBXML2_FOUND) + set(USE_DATASERIES true) + include_directories(BEFORE ${Lintel_INCLUDE_DIR}) + include_directories(BEFORE ${DataSeries_INCLUDE_DIR}) + include_directories(BEFORE ${LibXML2_INCLUDE_DIR}) + list(APPEND OPTLIBS ${Lintel_LIBRARIES}) + list(APPEND OPTLIBS ${DataSeries_LIBRARIES}) + list(APPEND OPTLIBS ${LibXML2_LIBRARIES}) +endif() + set(brodeps ${BinPAC_LIBRARY} ${PCAP_LIBRARY} @@ -193,6 +208,7 @@ message( "\nGeoIP: ${USE_GEOIP}" "\nGoogle perftools: ${USE_PERFTOOLS}" "\n debugging: ${USE_PERFTOOLS_DEBUG}" + "\nDataSeries: ${USE_DATASERIES}" "\n" "\n================================================================\n" ) diff --git a/cmake b/cmake index 550ab2c8d9..60b2873937 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit 550ab2c8d95b1d3e18e40a903152650e6c7a3c45 +Subproject commit 60b28739379da75f26c5c2a312b7886f5209a1cc diff --git a/config.h.in b/config.h.in index e744cb7dbd..558337d1bc 100644 --- a/config.h.in +++ b/config.h.in @@ -111,6 +111,9 @@ /* Use Google's perftools */ #cmakedefine USE_PERFTOOLS +/* Use the DataSeries writer. */ +#cmakedefine USE_DATASERIES + /* Version number of package */ #define VERSION "@VERSION@" diff --git a/configure b/configure index 05aa12815b..fe7db3b06d 100755 --- a/configure +++ b/configure @@ -54,6 +54,8 @@ Usage: $0 [OPTION]... [VAR=VALUE]... --with-ruby-lib=PATH path to ruby library --with-ruby-inc=PATH path to ruby headers --with-swig=PATH path to SWIG executable + --with-dataseries=PATH path to DataSeries and Lintel libraries + --with-xml2=PATH path to libxml2 installation (for DataSeries) Packaging Options (for developers): --binary-package toggle special logic for binary packaging @@ -203,6 +205,13 @@ while [ $# -ne 0 ]; do --with-swig=*) append_cache_entry SWIG_EXECUTABLE PATH $optarg ;; + --with-dataseries=*) + append_cache_entry DataSeries_ROOT_DIR PATH $optarg + append_cache_entry Lintel_ROOT_DIR PATH $optarg + ;; + --with-xml2=*) + append_cache_entry LibXML2_ROOT_DIR PATH $optarg + ;; --binary-package) append_cache_entry BINARY_PACKAGING_MODE BOOL true ;; diff --git a/scripts/base/frameworks/logging/__load__.bro b/scripts/base/frameworks/logging/__load__.bro index 42b2d7c564..17e03e2ef7 100644 --- a/scripts/base/frameworks/logging/__load__.bro +++ b/scripts/base/frameworks/logging/__load__.bro @@ -1,3 +1,4 @@ @load ./main @load ./postprocessors @load ./writers/ascii +@load ./writers/dataseries diff --git a/scripts/base/frameworks/logging/writers/dataseries.bro b/scripts/base/frameworks/logging/writers/dataseries.bro new file mode 100644 index 0000000000..c8ba922d2a --- /dev/null +++ b/scripts/base/frameworks/logging/writers/dataseries.bro @@ -0,0 +1,62 @@ +##! Interface for the dataseries log writer. + +module LogDataSeries; + +export { + ## Compression to use with the DS output file. Options are: + ## + ## 'none' -- No compression. + ## 'lzf' -- LZF compression. Very quick, but leads to larger output files. + ## 'lzo' -- LZO compression. Very fast decompression times. + ## 'gz' -- GZIP compression. Slower than LZF, but also produces smaller output. + ## 'bz2' -- BZIP2 compression. Slower than GZIP, but also produces smaller output. + const ds_compression = "lzf" &redef; + + ## The extent buffer size. + ## Larger values here lead to better compression and more efficient writes, but + ## also increases the lag between the time events are received and the time they + ## are actually written to disk. + const ds_extent_size = 65536 &redef; + + ## Should we dump the XML schema we use for this ds file to disk? + ## If yes, the XML schema shares the name of the logfile, but has + ## an XML ending. + const ds_dump_schema = T &redef; + + ## How many threads should DataSeries spawn to perform compression? + ## Note that this dictates the number of threads per log stream. If + ## you're using a lot of streams, you may want to keep this number + ## relatively small. + ## + ## Default value is 1, which will spawn one thread / core / stream. + ## + ## MAX is 128, MIN is 1. + const ds_num_threads = 1 &redef; + + ## Should time be stored as an integer or a double? + ## Storing time as a double leads to possible precision issues and + ## could (significantly) increase the size of the resulting DS log. + ## That said, timestamps stored in double form are more consistent + ## with the rest of Bro and are more easily readable / understandable + ## when working with the raw DataSeries format. + ## + ## Double timestamps are used by default. + const ds_use_integer = F &redef; +} + +# Default function to postprocess a rotated DataSeries log file. It moves the +# rotated file to a new name that includes a timestamp with the opening time, and +# then runs the writer's default postprocessor command on it. +function default_rotation_postprocessor_func(info: Log::RotationInfo) : bool + { + # Move file to name including both opening and closing time. + local dst = fmt("%s.%s.ds", info$path, + strftime(Log::default_rotation_date_format, info$open)); + + system(fmt("/bin/mv %s %s", info$fname, dst)); + + # Run default postprocessor. + return Log::run_rotation_postprocessor_cmd(info, dst); + } + +redef Log::default_rotation_postprocessors += { [Log::WRITER_DATASERIES] = default_rotation_postprocessor_func }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ce1b25dd42..ad40fc377c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -419,6 +419,7 @@ set(bro_SRCS logging/WriterBackend.cc logging/WriterFrontend.cc logging/writers/Ascii.cc + logging/writers/DataSeries.cc logging/writers/None.cc ${dns_SRCS} diff --git a/src/logging.bif b/src/logging.bif index c8960b4e38..6e66de8772 100644 --- a/src/logging.bif +++ b/src/logging.bif @@ -72,3 +72,11 @@ const set_separator: string; const empty_field: string; const unset_field: string; +# Options for the DataSeries writer. + +module LogDataSeries; + +const ds_compression: string; +const ds_extent_size: count; +const ds_dump_schema: bool; +const ds_num_threads: count; diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index 74220ecde4..04b4ef4b86 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -16,9 +16,11 @@ #include "writers/Ascii.h" #include "writers/None.h" +#ifdef USE_DATASERIES +#include "writers/DataSeries.h" +#endif + using namespace logging; -using threading::Value; -using threading::Field; // Structure describing a log writer type. struct WriterDefinition { @@ -32,6 +34,9 @@ struct WriterDefinition { WriterDefinition log_writers[] = { { BifEnum::Log::WRITER_NONE, "None", 0, writer::None::Instantiate }, { BifEnum::Log::WRITER_ASCII, "Ascii", 0, writer::Ascii::Instantiate }, +#ifdef USE_DATASERIES + { BifEnum::Log::WRITER_DATASERIES, "DataSeries", 0, writer::DataSeries::Instantiate }, +#endif // End marker, don't touch. { BifEnum::Log::WRITER_DEFAULT, "None", 0, (WriterBackend* (*)(WriterFrontend* frontend))0 } @@ -51,7 +56,7 @@ struct Manager::Filter { Func* postprocessor; int num_fields; - Field** fields; + threading::Field** fields; // Vector indexed by field number. Each element is a list of record // indices defining a path leading to the value across potential @@ -127,6 +132,17 @@ Manager::~Manager() delete *s; } +list Manager::SupportedFormats() + { + list formats; + + for ( WriterDefinition* ld = log_writers; ld->type != BifEnum::Log::WRITER_DEFAULT; ++ld ) + formats.push_back(ld->name); + + return formats; + } + + WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type) { WriterDefinition* ld = log_writers; @@ -135,7 +151,7 @@ WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type) { if ( ld->type == BifEnum::Log::WRITER_DEFAULT ) { - reporter->Error("unknow writer when creating writer"); + reporter->Error("unknown writer type requested"); return 0; } @@ -159,10 +175,8 @@ WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type) // function. ld->factory = 0; - DBG_LOG(DBG_LOGGING, "failed to init writer class %s", - ld->name); - - return false; + reporter->Error("initialization of writer %s failed", ld->name); + return 0; } } @@ -449,7 +463,7 @@ bool Manager::TraverseRecord(Stream* stream, Filter* filter, RecordType* rt, filter->indices.push_back(new_indices); - filter->fields = (Field**) + filter->fields = (threading::Field**) realloc(filter->fields, sizeof(Field) * ++filter->num_fields); @@ -459,7 +473,7 @@ bool Manager::TraverseRecord(Stream* stream, Filter* filter, RecordType* rt, return false; } - Field* field = new Field(); + threading::Field* field = new threading::Field(); field->name = new_path; field->type = t->Tag(); if ( field->type == TYPE_TABLE ) @@ -572,7 +586,7 @@ bool Manager::AddFilter(EnumVal* id, RecordVal* fval) for ( int i = 0; i < filter->num_fields; i++ ) { - Field* field = filter->fields[i]; + threading::Field* field = filter->fields[i]; DBG_LOG(DBG_LOGGING, " field %10s: %s", field->name.c_str(), type_name(field->type)); } @@ -744,10 +758,10 @@ bool Manager::Write(EnumVal* id, RecordVal* columns) // Copy the fields for WriterFrontend::Init() as it // will take ownership. - Field** arg_fields = new Field*[filter->num_fields]; + threading::Field** arg_fields = new threading::Field*[filter->num_fields]; for ( int j = 0; j < filter->num_fields; ++j ) - arg_fields[j] = new Field(*filter->fields[j]); + arg_fields[j] = new threading::Field(*filter->fields[j]); writer = CreateWriter(stream->id, filter->writer, path, filter->num_fields, @@ -898,10 +912,10 @@ threading::Value* Manager::ValToLogVal(Val* val, BroType* ty) return lval; } -Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter, +threading::Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter, RecordVal* columns) { - Value** vals = new Value*[filter->num_fields]; + threading::Value** vals = new threading::Value*[filter->num_fields]; for ( int i = 0; i < filter->num_fields; ++i ) { @@ -920,7 +934,7 @@ Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter, if ( ! val ) { // Value, or any of its parents, is not set. - vals[i] = new Value(filter->fields[i]->type, false); + vals[i] = new threading::Value(filter->fields[i]->type, false); break; } } @@ -933,7 +947,7 @@ Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter, } WriterFrontend* Manager::CreateWriter(EnumVal* id, EnumVal* writer, string path, - int num_fields, const Field* const* fields, bool local, bool remote) + int num_fields, const threading::Field* const* fields, bool local, bool remote) { Stream* stream = FindStream(id); @@ -997,7 +1011,7 @@ WriterFrontend* Manager::CreateWriter(EnumVal* id, EnumVal* writer, string path, return writer_obj; } -void Manager::DeleteVals(int num_fields, Value** vals) +void Manager::DeleteVals(int num_fields, threading::Value** vals) { // Note this code is duplicated in WriterBackend::DeleteVals(). for ( int i = 0; i < num_fields; i++ ) @@ -1007,7 +1021,7 @@ void Manager::DeleteVals(int num_fields, Value** vals) } bool Manager::Write(EnumVal* id, EnumVal* writer, string path, int num_fields, - Value** vals) + threading::Value** vals) { Stream* stream = FindStream(id); @@ -1116,8 +1130,10 @@ void Manager::Terminate() { for ( vector::iterator s = streams.begin(); s != streams.end(); ++s ) { - if ( *s ) - Flush((*s)->id); + if ( ! *s ) + continue; + + Flush((*s)->id); } } diff --git a/src/logging/Manager.h b/src/logging/Manager.h index bf097c5e1a..5af3e55b4a 100644 --- a/src/logging/Manager.h +++ b/src/logging/Manager.h @@ -15,7 +15,6 @@ class RotationTimer; namespace logging { - class WriterBackend; class WriterFrontend; class RotationFinishedMessage; @@ -56,7 +55,7 @@ public: * logging.bif, which just forwards here. */ bool EnableStream(EnumVal* id); - + /** * Disables a log stream. * @@ -145,6 +144,11 @@ public: */ void Terminate(); + /** + * Returns a list of supported output formats. + */ + static list SupportedFormats(); + protected: friend class WriterFrontend; friend class RotationFinishedMessage; diff --git a/src/logging/WriterBackend.cc b/src/logging/WriterBackend.cc index 7c71c09604..28b623988c 100644 --- a/src/logging/WriterBackend.cc +++ b/src/logging/WriterBackend.cc @@ -222,17 +222,6 @@ bool WriterBackend::Flush() return true; } -bool WriterBackend::Finish() - { - if ( ! DoFlush() ) - { - DisableFrontend(); - return false; - } - - return true; - } - bool WriterBackend::DoHeartbeat(double network_time, double current_time) { MsgThread::DoHeartbeat(network_time, current_time); diff --git a/src/logging/WriterBackend.h b/src/logging/WriterBackend.h index efb3b5d95e..cf58430e9a 100644 --- a/src/logging/WriterBackend.h +++ b/src/logging/WriterBackend.h @@ -101,15 +101,6 @@ public: */ bool Rotate(string rotated_path, double open, double close, bool terminating); - /** - * Finishes writing to this logger in a regularl fashion. Must not be - * called if an error has been indicated earlier. After calling this, - * no further writing must be performed. - * - * @return False if an error occured. - */ - bool Finish(); - /** * Disables the frontend that has instantiated this backend. Once * disabled,the frontend will not send any further message over. @@ -175,6 +166,8 @@ public: string Render(const threading::Value::subnet_t& subnet) const; protected: + friend class FinishMessage; + /** * Writer-specific intialization method. * @@ -272,26 +265,18 @@ protected: bool terminating) = 0; /** - * Writer-specific method implementing log output finalization at - * termination. Not called when any of the other methods has - * previously signaled an error, i.e., executing this method signals - * a regular shutdown of the writer. - * - * A writer implementation must override this method but it can just - * ignore calls if flushing doesn't align with its semantics. - * - * If the method returns false, it will be assumed that a fatal error - * has occured that prevents the writer from further operation; it - * will then be disabled and eventually deleted. When returning - * false, an implementation should also call Error() to indicate what - * happened. + * Writer-specific method called just before the threading system is + * going to shutdown. + * + * This method can be overridden but one must call + * WriterBackend::DoFinish(). */ - virtual bool DoFinish() = 0; + virtual bool DoFinish() { return MsgThread::DoFinish(); } /** * Triggered by regular heartbeat messages from the main thread. * - * This method can be overridden but once must call + * This method can be overridden but one must call * WriterBackend::DoHeartbeat(). */ virtual bool DoHeartbeat(double network_time, double current_time); diff --git a/src/logging/WriterFrontend.cc b/src/logging/WriterFrontend.cc index 26e8eaf22e..c6a90c1fa5 100644 --- a/src/logging/WriterFrontend.cc +++ b/src/logging/WriterFrontend.cc @@ -90,7 +90,7 @@ public: FinishMessage(WriterBackend* backend) : threading::InputMessage("Finish", backend) {} - virtual bool Process() { return Object()->Finish(); } + virtual bool Process() { return Object()->DoFinish(); } }; } @@ -117,8 +117,9 @@ WriterFrontend::WriterFrontend(EnumVal* arg_stream, EnumVal* arg_writer, bool ar if ( local ) { backend = log_mgr->CreateBackend(this, writer->AsEnum()); - assert(backend); - backend->Start(); + + if ( backend ) + backend->Start(); } else diff --git a/src/logging/writers/Ascii.cc b/src/logging/writers/Ascii.cc index 0759e60a82..2f25ac418f 100644 --- a/src/logging/writers/Ascii.cc +++ b/src/logging/writers/Ascii.cc @@ -69,8 +69,7 @@ bool Ascii::WriteHeaderField(const string& key, const string& val) return (fwrite(str.c_str(), str.length(), 1, file) == 1); } -bool Ascii::DoInit(string path, int num_fields, - const Field* const * fields) +bool Ascii::DoInit(string path, int num_fields, const Field* const * fields) { if ( output_to_stdout ) path = "/dev/stdout"; @@ -146,7 +145,7 @@ bool Ascii::DoFlush() bool Ascii::DoFinish() { - return true; + return WriterBackend::DoFinish(); } bool Ascii::DoWriteOne(ODesc* desc, Value* val, const Field* field) diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc new file mode 100644 index 0000000000..27c4cd6009 --- /dev/null +++ b/src/logging/writers/DataSeries.cc @@ -0,0 +1,476 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include +#include + +#include + +#include "NetVar.h" +#include "threading/SerialTypes.h" + +#include "DataSeries.h" + +using namespace logging; +using namespace writer; + +// NOTE: Naming conventions are a little bit scattershot at the moment. +// Within the scope of this file, a function name prefixed by '_' denotes a +// static function. + +// ************************ LOCAL PROTOTYPES ********************************* + +struct SchemaValue; + +/** + * Turns a log value into a std::string. Uses an ostringstream to do the + * heavy lifting, but still need to switch on the type to know which value + * in the union to give to the string string for processing. + * + * @param val The value we wish to convert to a string + * @return the string value of val + */ +static std::string _LogValueToString(threading::Value* val); + +/** + * Takes a field type and converts it to a relevant DataSeries type. + * + * @param field We extract the type from this and convert it into a relevant DS type. + * @return String representation of type that DataSeries can understand. + */ +static string _GetDSFieldType(const threading::Field* field); + +/** + * Takes a field type and converts it to a readable string. + * + * @param field We extract the type from this and convert it into a readable string. + * @return String representation of the field's type + */ +static string _GetBroTypeString(const threading::Field *field); + +/** + * Takes a list of types, a list of names, and a title, and uses it to construct a valid DataSeries XML schema + * thing, which is then returned as a std::string + * + * @param opts std::vector of strings containing a list of options to be appended to each field (e.g. "pack_relative=yes") + * @param sTitle Name of this schema. Ideally, these schemas would be aggregated and re-used. + */ +static string _BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle); + +/** + * Are there any options we should put into the XML schema? + * + * @param field We extract the type from this and return any options that make sense for that type. + * @return Options that can be added directly to the XML (e.g. "pack_relative=\"yes\"") + */ +static std::string _GetDSOptionsForType(const threading::Field *field); + +/** + * Internal helper structure; populate a vector of these which is passed to the XML generator for its use. + */ +struct SchemaValue +{ + string ds_type; + string bro_type; + string field_name; + string field_options; + + SchemaValue(const threading::Field *field) + { + ds_type = _GetDSFieldType(field); + field_name = string(field->name); + field_options = _GetDSOptionsForType(field); + bro_type = _GetBroTypeString(field); + } +}; + +// ************************ LOCAL IMPL ********************************* + +std::string DataSeries::LogValueToString(threading::Value *val) +{ + const int strsz = 1024; + char strbuf[strsz]; + + // In some cases, no value is attached. If this is the case, return an empty string. + if(!val->present) + return ""; + + std::ostringstream ostr; + switch(val->type) + { + case TYPE_BOOL: + return (val->val.int_val ? "true" : "false"); + + case TYPE_INT: + ostr << val->val.int_val; + return ostr.str(); + + case TYPE_COUNT: + case TYPE_COUNTER: + case TYPE_PORT: + ostr << val->val.uint_val; + return ostr.str(); + + case TYPE_SUBNET: + ostr << Render(val->val.subnet_val); + return ostr.str(); + + case TYPE_ADDR: + ostr << Render(val->val.addr_val); + return ostr.str(); + + // Note: These two cases are relatively special. We need to convert these values into their integer equivalents + // to maximize precision. At the moment, there won't be a noticeable effect (Bro uses the double format everywhere + // internally, so we've already lost the precision we'd gain here), but timestamps may eventually switch to this + // representation within Bro. + // + // in the near-term, this *should* lead to better pack_relative (and thus smaller output files). + case TYPE_TIME: + case TYPE_INTERVAL: + ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val); + return ostr.str(); + + case TYPE_DOUBLE: + ostr << val->val.double_val; + return ostr.str(); + + case TYPE_ENUM: + case TYPE_STRING: + case TYPE_FILE: + { + int size = val->val.string_val->size(); + string tmpString = ""; + if(size) + tmpString = string(val->val.string_val->data(), val->val.string_val->size()); + else + tmpString = string(""); + return tmpString; + } + case TYPE_TABLE: + { + if ( ! val->val.set_val.size ) + { + return ""; + } + + string tmpString = ""; + for ( int j = 0; j < val->val.set_val.size; j++ ) + { + if ( j > 0 ) + tmpString += ":"; //TODO: Specify set separator char in configuration. + + tmpString += LogValueToString(val->val.set_val.vals[j]); + } + return tmpString; + } + case TYPE_VECTOR: + { + if ( ! val->val.vector_val.size ) + { + return ""; + } + + string tmpString = ""; + for ( int j = 0; j < val->val.vector_val.size; j++ ) + { + if ( j > 0 ) + tmpString += ":"; //TODO: Specify set separator char in configuration. + + tmpString += LogValueToString(val->val.vector_val.vals[j]); + } + + return tmpString; + } + default: + return "???"; + } +} + +static string _GetDSFieldType(const threading::Field *field) +{ + switch(field->type) + { + case TYPE_BOOL: + return "bool"; + + case TYPE_COUNT: + case TYPE_COUNTER: + case TYPE_PORT: + case TYPE_INT: + case TYPE_TIME: + case TYPE_INTERVAL: + return "int64"; + + case TYPE_DOUBLE: + return "double"; + + case TYPE_SUBNET: + case TYPE_ADDR: + case TYPE_ENUM: + case TYPE_STRING: + case TYPE_FILE: + case TYPE_TABLE: + case TYPE_VECTOR: + default: + return "variable32"; + + } +} + +static string _GetBroTypeString(const threading::Field *field) +{ + switch(field->type) + { + case TYPE_BOOL: + return "bool"; + case TYPE_COUNT: + return "count"; + case TYPE_COUNTER: + return "counter"; + case TYPE_PORT: + return "port"; + case TYPE_INT: + return "int"; + case TYPE_TIME: + return "time"; + case TYPE_INTERVAL: + return "interval"; + case TYPE_DOUBLE: + return "double"; + case TYPE_SUBNET: + return "subnet"; + case TYPE_ADDR: + return "addr"; + case TYPE_ENUM: + return "enum"; + case TYPE_STRING: + return "string"; + case TYPE_FILE: + return "file"; + case TYPE_TABLE: + return "table"; + case TYPE_VECTOR: + return "vector"; + default: + return "???"; + } +} + +static string _BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle) +{ + if("" == sTitle) + { + sTitle = "GenericBroStream"; + } + string xmlschema; + xmlschema = "\n"; + for(size_t i = 0; i < vals.size(); ++i) + { + xmlschema += "\t\n"; + } + xmlschema += "\n"; + for(size_t i = 0; i < vals.size(); ++i) + { + xmlschema += "\n"; + } + return xmlschema; +} + +static std::string _GetDSOptionsForType(const threading::Field *field) +{ + switch(field->type) + { + case TYPE_TIME: + case TYPE_INTERVAL: + return "pack_relative=\"" + std::string(field->name) + "\""; + case TYPE_SUBNET: + case TYPE_ADDR: + case TYPE_ENUM: + case TYPE_STRING: + case TYPE_FILE: + case TYPE_TABLE: + case TYPE_VECTOR: + return "pack_unique=\"yes\""; + default: + return ""; + } +} + +// ************************ CLASS IMPL ********************************* + +DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend) +{ + ds_compression = string((const char *)BifConst::LogDataSeries::ds_compression->Bytes(), BifConst::LogDataSeries::ds_compression->Len()); + ds_dump_schema = BifConst::LogDataSeries::ds_dump_schema; + ds_extent_size = BifConst::LogDataSeries::ds_extent_size; + ds_num_threads = BifConst::LogDataSeries::ds_num_threads; +} + +DataSeries::~DataSeries() +{ +} + +bool DataSeries::DoInit(string path, int num_fields, const threading::Field* const * fields) + { + // We first construct an XML schema thing (and, if ds_dump_schema is + // set, dump it to path + ".ds.xml"). Assuming that goes well, we + // use that schema to build our output logfile and prepare it to be + // written to. + + // Note: compressor count must be set *BEFORE* DataSeriesSink is instantiated. + if(ds_num_threads < THREAD_MIN && ds_num_threads != 0) + { + fprintf(stderr, "%d is too few threads! Using %d instead\n", (int)ds_num_threads, (int)THREAD_MIN); + ds_num_threads = THREAD_MIN; + } + if(ds_num_threads > THREAD_MAX) + { + fprintf(stderr, "%d is too many threads! Dropping back to %d\n", (int)ds_num_threads, (int)THREAD_MAX); + ds_num_threads = THREAD_MAX; + } + + if(ds_num_threads > 0) + { + DataSeriesSink::setCompressorCount(ds_num_threads); + } + vector schema_list; + for ( int i = 0; i < num_fields; i++ ) + { + const threading::Field* field = fields[i]; + SchemaValue val(field); + schema_list.push_back(val); + } + string schema = _BuildDSSchemaFromFieldTypes(schema_list, path); + if(ds_dump_schema) + { + FILE * pFile; + pFile = fopen ( string(path + ".ds.xml").c_str() , "wb" ); + if(NULL == pFile) + { + perror("Could not dump schema"); + } + fwrite (schema.c_str(), 1 , schema.length() , pFile ); + fclose (pFile); + } + + int compress_type = Extent::compress_all; + + if(ds_compression == "lzf") + { + compress_type = Extent::compress_lzf; + } + else if(ds_compression == "lzo") + { + compress_type = Extent::compress_lzo; + } + else if(ds_compression == "gz") + { + compress_type = Extent::compress_gz; + } + else if(ds_compression == "bz2") + { + compress_type = Extent::compress_bz2; + } + else if(ds_compression == "none") + { + compress_type = Extent::compress_none; + } + else if(ds_compression == "any") + { + compress_type = Extent::compress_all; + } + else + { + fprintf(stderr, "%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'\n", ds_compression.c_str()); + fprintf(stderr, "Defaulting to 'any'\n"); + } + + log_type = const_cast(log_types.registerType(schema)); + + log_series.setType(*log_type); + log_file = new DataSeriesSink(path + ".ds", compress_type); + log_file->writeExtentLibrary(log_types); + + for(size_t i = 0; i < schema_list.size(); ++i) + extents.insert(std::make_pair(schema_list[i].field_name, GeneralField::create(log_series, schema_list[i].field_name))); + + if(ds_extent_size < ROW_MIN) + { + fprintf(stderr, "%d is not a valid value for 'rows'. Using min of %d instead.\n", (int)ds_extent_size, (int)ROW_MIN); + ds_extent_size = ROW_MIN; + } + else if(ds_extent_size > ROW_MAX) + { + fprintf(stderr, "%d is not a valid value for 'rows'. Using max of %d instead.\n", (int)ds_extent_size, (int)ROW_MAX); + ds_extent_size = ROW_MAX; + } + log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size); + + return true; + + } + +bool DataSeries::DoFlush() +{ + // Flushing is handled by DataSeries automatically, so this function doesn't do anything. + return true; +} + +bool DataSeries::DoFinish() +{ + for(ExtentIterator iter = extents.begin(); + iter != extents.end(); ++iter) + { + delete iter->second; + } + extents.clear(); + // Don't delete the file before you delete the output, or bad things happen. + delete log_output; + delete log_file; + + return WriterBackend::DoFinish(); +} + +bool DataSeries::DoWrite(int num_fields, const threading::Field* const * fields, + threading::Value** vals) +{ + log_output->newRecord(); + for(size_t i = 0; i < (size_t)num_fields; ++i) + { + ExtentIterator iter = extents.find(fields[i]->name); + assert(iter != extents.end()); + if( iter != extents.end() ) + { + GeneralField *cField = iter->second; + if(vals[i]->present) + cField->set(LogValueToString(vals[i])); + } + } + + return true; +} + +bool DataSeries::DoRotate(string rotated_path, double open, double close, bool terminating) +{ + // Note that if DS files are rotated too often, the aggregate log size will be (much) larger. + + DoFinish(); + + string dsname = Path() + ".ds"; + string nname = rotated_path + ".ds"; + rename(dsname.c_str(), nname.c_str()); + + if ( ! FinishedRotation(nname, dsname, open, close, terminating) ) + { + Error(Fmt("error rotating %s to %s", dsname.c_str(), nname.c_str())); + return false; + } + + return DoInit(Path(), NumFields(), Fields()); +} + +bool DataSeries::DoSetBuf(bool enabled) +{ + // DataSeries is *always* buffered to some degree. This option is ignored. + return true; +} diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h new file mode 100644 index 0000000000..5331975937 --- /dev/null +++ b/src/logging/writers/DataSeries.h @@ -0,0 +1,69 @@ +// See the file "COPYING" in the main distribution directory for copyright. +// +// A binary log writer producing DataSeries output. See doc/data-series.rst +// for more information. + +#ifndef LOGGING_WRITER_DATA_SERIES_H +#define LOGGING_WRITER_DATA_SERIES_H + +#include "../WriterBackend.h" + +#include +#include +#include +#include + +namespace logging { namespace writer { + +class DataSeries : public WriterBackend { +public: + DataSeries(WriterFrontend* frontend); + ~DataSeries(); + + static WriterBackend* Instantiate(WriterFrontend* frontend) + { return new DataSeries(frontend); } + +protected: + virtual bool DoInit(string path, int num_fields, + const threading::Field* const * fields); + + virtual bool DoWrite(int num_fields, const threading::Field* const* fields, + threading::Value** vals); + virtual bool DoSetBuf(bool enabled); + virtual bool DoRotate(string rotated_path, double open, + double close, bool terminating); + virtual bool DoFlush(); + virtual bool DoFinish(); + +private: + static const size_t ROW_MIN = 2048; // Minimum extent size. + static const size_t ROW_MAX = (1024 * 1024 * 100); // Maximum extent size. + static const size_t THREAD_MIN = 1; // Minimum number of compression threads that DataSeries may spawn. + static const size_t THREAD_MAX = 128; // Maximum number of compression threads that DataSeries may spawn. + static const size_t TIME_SCALE = 1000000; // Fixed-point multiplier for time values when converted to integers. + + std::string LogValueToString(threading::Value *val); + + typedef std::map ExtentMap; + typedef ExtentMap::iterator ExtentIterator; + + // Internal DataSeries structures we need to keep track of. + DataSeriesSink* log_file; + ExtentTypeLibrary log_types; + ExtentType *log_type; + ExtentSeries log_series; + OutputModule* log_output; + ExtentMap extents; + + // Options set from the script-level. + uint64 ds_extent_size; + uint64 ds_num_threads; + string ds_compression; + bool ds_dump_schema; +}; + +} +} + +#endif + diff --git a/src/main.cc b/src/main.cc index ff33a3859d..f604d379ac 100644 --- a/src/main.cc +++ b/src/main.cc @@ -201,6 +201,27 @@ void usage() fprintf(stderr, " $BRO_LOG_SUFFIX | ASCII log file extension (.%s)\n", logging::writer::Ascii::LogExt().c_str()); fprintf(stderr, " $BRO_PROFILER_FILE | Output file for script execution statistics (not set)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Supported log formats: "); + + bool first = true; + list fmts = logging::Manager::SupportedFormats(); + + for ( list::const_iterator i = fmts.begin(); i != fmts.end(); ++i ) + { + if ( *i == "None" ) + // Skip, it's uninteresting. + continue; + + if ( ! first ) + fprintf(stderr, ","); + + fprintf(stderr, "%s", (*i).c_str()); + first = false; + } + + fprintf(stderr, "\n"); + exit(1); } diff --git a/src/types.bif b/src/types.bif index 4657584a90..fe2e6ff861 100644 --- a/src/types.bif +++ b/src/types.bif @@ -162,6 +162,7 @@ enum Writer %{ WRITER_DEFAULT, WRITER_NONE, WRITER_ASCII, + WRITER_DATASERIES, %} enum ID %{ From 7131feefbc5164c7e92fbba938531fef0d913514 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 9 Apr 2012 17:30:57 -0700 Subject: [PATCH 03/20] Working on DataSeries support. - The option to use integers insteads of double was ignored. - Renaming script-level options to remove the ds_ prefix. - Log rotation didn't work. - A set of simple unit tests. --- .../frameworks/logging/writers/dataseries.bro | 10 +- src/logging.bif | 9 +- src/logging/writers/DataSeries.cc | 194 ++++----- src/logging/writers/DataSeries.h | 63 ++- .../ssh.ds.xml | 16 + .../out | 380 ++++++++++++++++++ .../ssh.ds.txt | 43 ++ .../conn.ds.txt | 96 +++++ .../conn.ds.txt | 96 +++++ .../http.ds.txt | 90 +++++ .../frameworks/logging/dataseries/options.bro | 43 ++ .../frameworks/logging/dataseries/rotate.bro | 33 ++ .../logging/dataseries/test-logging.bro | 34 ++ .../logging/dataseries/time-as-int.bro | 8 + .../logging/dataseries/wikipedia.bro | 8 + testing/scripts/has-writer | 6 + 16 files changed, 1001 insertions(+), 128 deletions(-) create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt create mode 100644 testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt create mode 100644 testing/btest/scripts/base/frameworks/logging/dataseries/options.bro create mode 100644 testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro create mode 100644 testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro create mode 100644 testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro create mode 100644 testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro create mode 100755 testing/scripts/has-writer diff --git a/scripts/base/frameworks/logging/writers/dataseries.bro b/scripts/base/frameworks/logging/writers/dataseries.bro index c8ba922d2a..daf59ebf42 100644 --- a/scripts/base/frameworks/logging/writers/dataseries.bro +++ b/scripts/base/frameworks/logging/writers/dataseries.bro @@ -10,18 +10,18 @@ export { ## 'lzo' -- LZO compression. Very fast decompression times. ## 'gz' -- GZIP compression. Slower than LZF, but also produces smaller output. ## 'bz2' -- BZIP2 compression. Slower than GZIP, but also produces smaller output. - const ds_compression = "lzf" &redef; + const compression = "lzf" &redef; ## The extent buffer size. ## Larger values here lead to better compression and more efficient writes, but ## also increases the lag between the time events are received and the time they ## are actually written to disk. - const ds_extent_size = 65536 &redef; + const extent_size = 65536 &redef; ## Should we dump the XML schema we use for this ds file to disk? ## If yes, the XML schema shares the name of the logfile, but has ## an XML ending. - const ds_dump_schema = T &redef; + const dump_schema = F &redef; ## How many threads should DataSeries spawn to perform compression? ## Note that this dictates the number of threads per log stream. If @@ -31,7 +31,7 @@ export { ## Default value is 1, which will spawn one thread / core / stream. ## ## MAX is 128, MIN is 1. - const ds_num_threads = 1 &redef; + const num_threads = 1 &redef; ## Should time be stored as an integer or a double? ## Storing time as a double leads to possible precision issues and @@ -41,7 +41,7 @@ export { ## when working with the raw DataSeries format. ## ## Double timestamps are used by default. - const ds_use_integer = F &redef; + const use_integer_for_time = F &redef; } # Default function to postprocess a rotated DataSeries log file. It moves the diff --git a/src/logging.bif b/src/logging.bif index 6e66de8772..efc6ed0b4b 100644 --- a/src/logging.bif +++ b/src/logging.bif @@ -76,7 +76,8 @@ const unset_field: string; module LogDataSeries; -const ds_compression: string; -const ds_extent_size: count; -const ds_dump_schema: bool; -const ds_num_threads: count; +const compression: string; +const extent_size: count; +const dump_schema: bool; +const use_integer_for_time: bool; +const num_threads: count; diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index 27c4cd6009..5ee8a812da 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -14,78 +14,6 @@ using namespace logging; using namespace writer; -// NOTE: Naming conventions are a little bit scattershot at the moment. -// Within the scope of this file, a function name prefixed by '_' denotes a -// static function. - -// ************************ LOCAL PROTOTYPES ********************************* - -struct SchemaValue; - -/** - * Turns a log value into a std::string. Uses an ostringstream to do the - * heavy lifting, but still need to switch on the type to know which value - * in the union to give to the string string for processing. - * - * @param val The value we wish to convert to a string - * @return the string value of val - */ -static std::string _LogValueToString(threading::Value* val); - -/** - * Takes a field type and converts it to a relevant DataSeries type. - * - * @param field We extract the type from this and convert it into a relevant DS type. - * @return String representation of type that DataSeries can understand. - */ -static string _GetDSFieldType(const threading::Field* field); - -/** - * Takes a field type and converts it to a readable string. - * - * @param field We extract the type from this and convert it into a readable string. - * @return String representation of the field's type - */ -static string _GetBroTypeString(const threading::Field *field); - -/** - * Takes a list of types, a list of names, and a title, and uses it to construct a valid DataSeries XML schema - * thing, which is then returned as a std::string - * - * @param opts std::vector of strings containing a list of options to be appended to each field (e.g. "pack_relative=yes") - * @param sTitle Name of this schema. Ideally, these schemas would be aggregated and re-used. - */ -static string _BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle); - -/** - * Are there any options we should put into the XML schema? - * - * @param field We extract the type from this and return any options that make sense for that type. - * @return Options that can be added directly to the XML (e.g. "pack_relative=\"yes\"") - */ -static std::string _GetDSOptionsForType(const threading::Field *field); - -/** - * Internal helper structure; populate a vector of these which is passed to the XML generator for its use. - */ -struct SchemaValue -{ - string ds_type; - string bro_type; - string field_name; - string field_options; - - SchemaValue(const threading::Field *field) - { - ds_type = _GetDSFieldType(field); - field_name = string(field->name); - field_options = _GetDSOptionsForType(field); - bro_type = _GetBroTypeString(field); - } -}; - -// ************************ LOCAL IMPL ********************************* - std::string DataSeries::LogValueToString(threading::Value *val) { const int strsz = 1024; @@ -127,7 +55,11 @@ std::string DataSeries::LogValueToString(threading::Value *val) // in the near-term, this *should* lead to better pack_relative (and thus smaller output files). case TYPE_TIME: case TYPE_INTERVAL: - ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val); + if ( ds_use_integer_for_time ) + ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val); + else + ostr << val->val.double_val; + return ostr.str(); case TYPE_DOUBLE: @@ -186,7 +118,7 @@ std::string DataSeries::LogValueToString(threading::Value *val) } } -static string _GetDSFieldType(const threading::Field *field) +string DataSeries::GetDSFieldType(const threading::Field *field) { switch(field->type) { @@ -197,13 +129,15 @@ static string _GetDSFieldType(const threading::Field *field) case TYPE_COUNTER: case TYPE_PORT: case TYPE_INT: - case TYPE_TIME: - case TYPE_INTERVAL: return "int64"; case TYPE_DOUBLE: return "double"; + case TYPE_TIME: + case TYPE_INTERVAL: + return ds_use_integer_for_time ? "int64" : "double"; + case TYPE_SUBNET: case TYPE_ADDR: case TYPE_ENUM: @@ -217,7 +151,7 @@ static string _GetDSFieldType(const threading::Field *field) } } -static string _GetBroTypeString(const threading::Field *field) +string DataSeries::GetBroTypeString(const threading::Field *field) { switch(field->type) { @@ -256,7 +190,7 @@ static string _GetBroTypeString(const threading::Field *field) } } -static string _BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle) +string DataSeries::BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle) { if("" == sTitle) { @@ -276,13 +210,21 @@ static string _BuildDSSchemaFromFieldTypes(const vector& vals, stri return xmlschema; } -static std::string _GetDSOptionsForType(const threading::Field *field) +std::string DataSeries::GetDSOptionsForType(const threading::Field *field) { switch(field->type) { case TYPE_TIME: case TYPE_INTERVAL: - return "pack_relative=\"" + std::string(field->name) + "\""; + { + std::string s = "pack_relative=\"" + std::string(field->name) + "\""; + + if ( ! ds_use_integer_for_time ) + s += " pack_scale=\"1000000\""; + + return s; + } + case TYPE_SUBNET: case TYPE_ADDR: case TYPE_ENUM: @@ -300,16 +242,40 @@ static std::string _GetDSOptionsForType(const threading::Field *field) DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend) { - ds_compression = string((const char *)BifConst::LogDataSeries::ds_compression->Bytes(), BifConst::LogDataSeries::ds_compression->Len()); - ds_dump_schema = BifConst::LogDataSeries::ds_dump_schema; - ds_extent_size = BifConst::LogDataSeries::ds_extent_size; - ds_num_threads = BifConst::LogDataSeries::ds_num_threads; + ds_compression = string((const char *)BifConst::LogDataSeries::compression->Bytes(), BifConst::LogDataSeries::compression->Len()); + ds_dump_schema = BifConst::LogDataSeries::dump_schema; + ds_extent_size = BifConst::LogDataSeries::extent_size; + ds_num_threads = BifConst::LogDataSeries::num_threads; + ds_use_integer_for_time = BifConst::LogDataSeries::use_integer_for_time; } DataSeries::~DataSeries() { } +bool DataSeries::OpenLog(string path) + { + log_file = new DataSeriesSink(path + ".ds", compress_type); + log_file->writeExtentLibrary(log_types); + + for(size_t i = 0; i < schema_list.size(); ++i) + extents.insert(std::make_pair(schema_list[i].field_name, GeneralField::create(log_series, schema_list[i].field_name))); + + if(ds_extent_size < ROW_MIN) + { + fprintf(stderr, "%d is not a valid value for 'rows'. Using min of %d instead.\n", (int)ds_extent_size, (int)ROW_MIN); + ds_extent_size = ROW_MIN; + } + else if(ds_extent_size > ROW_MAX) + { + fprintf(stderr, "%d is not a valid value for 'rows'. Using max of %d instead.\n", (int)ds_extent_size, (int)ROW_MAX); + ds_extent_size = ROW_MAX; + } + log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size); + + return true; + } + bool DataSeries::DoInit(string path, int num_fields, const threading::Field* const * fields) { // We first construct an XML schema thing (and, if ds_dump_schema is @@ -333,14 +299,18 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con { DataSeriesSink::setCompressorCount(ds_num_threads); } - vector schema_list; + for ( int i = 0; i < num_fields; i++ ) { const threading::Field* field = fields[i]; - SchemaValue val(field); + SchemaValue val; + val.ds_type = GetDSFieldType(field); + val.field_name = string(field->name); + val.field_options = GetDSOptionsForType(field); + val.bro_type = GetBroTypeString(field); schema_list.push_back(val); } - string schema = _BuildDSSchemaFromFieldTypes(schema_list, path); + string schema = BuildDSSchemaFromFieldTypes(schema_list, path); if(ds_dump_schema) { FILE * pFile; @@ -353,7 +323,7 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con fclose (pFile); } - int compress_type = Extent::compress_all; + compress_type = Extent::compress_all; if(ds_compression == "lzf") { @@ -385,28 +355,11 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con fprintf(stderr, "Defaulting to 'any'\n"); } - log_type = const_cast(log_types.registerType(schema)); + log_type = const_cast(log_types.registerType(schema)); log_series.setType(*log_type); - log_file = new DataSeriesSink(path + ".ds", compress_type); - log_file->writeExtentLibrary(log_types); - for(size_t i = 0; i < schema_list.size(); ++i) - extents.insert(std::make_pair(schema_list[i].field_name, GeneralField::create(log_series, schema_list[i].field_name))); - - if(ds_extent_size < ROW_MIN) - { - fprintf(stderr, "%d is not a valid value for 'rows'. Using min of %d instead.\n", (int)ds_extent_size, (int)ROW_MIN); - ds_extent_size = ROW_MIN; - } - else if(ds_extent_size > ROW_MAX) - { - fprintf(stderr, "%d is not a valid value for 'rows'. Using max of %d instead.\n", (int)ds_extent_size, (int)ROW_MAX); - ds_extent_size = ROW_MAX; - } - log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size); - - return true; + return OpenLog(path); } @@ -416,18 +369,26 @@ bool DataSeries::DoFlush() return true; } -bool DataSeries::DoFinish() -{ - for(ExtentIterator iter = extents.begin(); - iter != extents.end(); ++iter) - { +void DataSeries::CloseLog() + { + for( ExtentIterator iter = extents.begin(); iter != extents.end(); ++iter ) delete iter->second; - } + extents.clear(); - // Don't delete the file before you delete the output, or bad things happen. + + // Don't delete the file before you delete the output, or bad things + // happen. delete log_output; delete log_file; + log_output = 0; + log_file = 0; + } + +bool DataSeries::DoFinish() +{ + CloseLog(); + return WriterBackend::DoFinish(); } @@ -453,8 +414,7 @@ bool DataSeries::DoWrite(int num_fields, const threading::Field* const * fields, bool DataSeries::DoRotate(string rotated_path, double open, double close, bool terminating) { // Note that if DS files are rotated too often, the aggregate log size will be (much) larger. - - DoFinish(); + CloseLog(); string dsname = Path() + ".ds"; string nname = rotated_path + ".ds"; @@ -466,7 +426,7 @@ bool DataSeries::DoRotate(string rotated_path, double open, double close, bool t return false; } - return DoInit(Path(), NumFields(), Fields()); + return OpenLog(Path()); } bool DataSeries::DoSetBuf(bool enabled) diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h index 5331975937..319cb72ec5 100644 --- a/src/logging/writers/DataSeries.h +++ b/src/logging/writers/DataSeries.h @@ -42,24 +42,83 @@ private: static const size_t THREAD_MAX = 128; // Maximum number of compression threads that DataSeries may spawn. static const size_t TIME_SCALE = 1000000; // Fixed-point multiplier for time values when converted to integers. + struct SchemaValue + { + string ds_type; + string bro_type; + string field_name; + string field_options; + }; + + /** + * Turns a log value into a std::string. Uses an ostringstream to do the + * heavy lifting, but still need to switch on the type to know which value + * in the union to give to the string string for processing. + * + * @param val The value we wish to convert to a string + * @return the string value of val + */ std::string LogValueToString(threading::Value *val); + /** + * Takes a field type and converts it to a relevant DataSeries type. + * + * @param field We extract the type from this and convert it into a relevant DS type. + * @return String representation of type that DataSeries can understand. + */ + string GetDSFieldType(const threading::Field *field); + + /** + * Are there any options we should put into the XML schema? + * + * @param field We extract the type from this and return any options that make sense for that type. + * @return Options that can be added directly to the XML (e.g. "pack_relative=\"yes\"") + */ + std::string GetDSOptionsForType(const threading::Field *field); + + /** + * Takes a list of types, a list of names, and a title, and uses it to construct a valid DataSeries XML schema + * thing, which is then returned as a std::string + * + * @param opts std::vector of strings containing a list of options to be appended to each field (e.g. "pack_relative=yes") + * @param sTitle Name of this schema. Ideally, these schemas would be aggregated and re-used. + */ + string BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle); + + /** + * Takes a field type and converts it to a readable string. + * + * @param field We extract the type from this and convert it into a readable string. + * @return String representation of the field's type + */ + string GetBroTypeString(const threading::Field *field); + + /** Closes the currently open file. */ + void CloseLog(); + + /** XXX */ + bool OpenLog(string path); + typedef std::map ExtentMap; typedef ExtentMap::iterator ExtentIterator; // Internal DataSeries structures we need to keep track of. - DataSeriesSink* log_file; + vector schema_list; ExtentTypeLibrary log_types; ExtentType *log_type; ExtentSeries log_series; - OutputModule* log_output; ExtentMap extents; + int compress_type; + + DataSeriesSink* log_file; + OutputModule* log_output; // Options set from the script-level. uint64 ds_extent_size; uint64 ds_num_threads; string ds_compression; bool ds_dump_schema; + bool ds_use_integer_for_time; }; } diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml new file mode 100644 index 0000000000..71ad5d70a0 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out new file mode 100644 index 0000000000..b6f05003f3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out @@ -0,0 +1,380 @@ +test.2011-03-07-03-00-05.ds test 11-03-07_03.00.05 11-03-07_04.00.05 0 +test.2011-03-07-04-00-05.ds test 11-03-07_04.00.05 11-03-07_05.00.05 0 +test.2011-03-07-05-00-05.ds test 11-03-07_05.00.05 11-03-07_06.00.05 0 +test.2011-03-07-06-00-05.ds test 11-03-07_06.00.05 11-03-07_07.00.05 0 +test.2011-03-07-07-00-05.ds test 11-03-07_07.00.05 11-03-07_08.00.05 0 +test.2011-03-07-08-00-05.ds test 11-03-07_08.00.05 11-03-07_09.00.05 0 +test.2011-03-07-09-00-05.ds test 11-03-07_09.00.05 11-03-07_10.00.05 0 +test.2011-03-07-10-00-05.ds test 11-03-07_10.00.05 11-03-07_11.00.05 0 +test.2011-03-07-11-00-05.ds test 11-03-07_11.00.05 11-03-07_12.00.05 0 +test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 +> test.2011-03-07-03-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1024 +1.299e+09 10.0.0.2 20 10.0.0.3 0 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-04-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1025 +1.299e+09 10.0.0.2 20 10.0.0.3 1 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-05-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1026 +1.299e+09 10.0.0.2 20 10.0.0.3 2 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-06-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1027 +1.299e+09 10.0.0.2 20 10.0.0.3 3 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-07-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1028 +1.299e+09 10.0.0.2 20 10.0.0.3 4 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-08-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1029 +1.299e+09 10.0.0.2 20 10.0.0.3 5 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-09-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1030 +1.299e+09 10.0.0.2 20 10.0.0.3 6 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-10-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1031 +1.299e+09 10.0.0.2 20 10.0.0.3 7 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-11-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.3e+09 10.0.0.1 20 10.0.0.2 1032 +1.3e+09 10.0.0.2 20 10.0.0.3 8 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-12-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.3e+09 10.0.0.1 20 10.0.0.2 1033 +1.3e+09 10.0.0.2 20 10.0.0.3 9 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt new file mode 100644 index 0000000000..f66f40b701 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt @@ -0,0 +1,43 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +400 ssh +604 DataSeries: ExtentIndex +# Extent, type='ssh' +t id.orig_h id.orig_p id.resp_h id.resp_p status country +1.334e+09 1.2.3.4 1234 2.3.4.5 80 success unknown +1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure US +1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure UK +1.334e+09 1.2.3.4 1234 2.3.4.5 80 success BR +1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure MX +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +400 ssh +604 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt new file mode 100644 index 0000000000..e6294b1d71 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt @@ -0,0 +1,96 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +636 conn +2912 DataSeries: ExtentIndex +# Extent, type='conn' +ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes +1300475167096535 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 +1300475167097012 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 +1300475167099816 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 +1300475168853899 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 435 0 89 SHR F 0 Cd 0 0 1 117 +1300475168854378 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 420 0 99 SHR F 0 Cd 0 0 1 127 +1300475168854837 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 391 0 183 SHR F 0 Cd 0 0 1 211 +1300475168857956 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 317 0 89 SHR F 0 Cd 0 0 1 117 +1300475168858306 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 343 0 99 SHR F 0 Cd 0 0 1 127 +1300475168858713 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 375 0 183 SHR F 0 Cd 0 0 1 211 +1300475168891644 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 339 0 89 SHR F 0 Cd 0 0 1 117 +1300475168892037 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 334 0 99 SHR F 0 Cd 0 0 1 127 +1300475168892414 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 420 0 183 SHR F 0 Cd 0 0 1 211 +1300475168893988 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 384 0 89 SHR F 0 Cd 0 0 1 117 +1300475168894422 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 316 0 99 SHR F 0 Cd 0 0 1 127 +1300475168894787 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 422 0 183 SHR F 0 Cd 0 0 1 211 +1300475168901749 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 402 0 131 SHR F 0 Cd 0 0 1 159 +1300475168902195 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 374 0 198 SHR F 0 Cd 0 0 1 226 +1300475169899438 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 85 0 0 +1300475170862384 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 2613016 350 0 S0 F 0 D 7 546 0 0 +1300475171675372 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 100096 66 0 S0 F 0 D 2 162 0 0 +1300475171677081 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 100020 66 0 S0 F 0 D 2 122 0 0 +1300475173116749 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 99801 66 0 S0 F 0 D 2 162 0 0 +1300475173117362 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 99848 66 0 S0 F 0 D 2 122 0 0 +1300475173153679 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0 0 0 S0 F 0 D 1 78 0 0 +1300475168859163 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 215893 1130 734 S1 F 1130 ShACad 4 216 4 950 +1300475168652003 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 61328 0 350 OTH F 0 CdA 1 52 1 402 +1300475168895267 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 227283 1178 734 S1 F 1178 ShACad 4 216 4 950 +1300475168902635 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 120040 534 412 S1 F 534 ShACad 3 164 3 576 +1300475168892936 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 229603 1148 734 S1 F 1148 ShACad 4 216 4 950 +1300475168855305 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 218501 1171 733 S1 F 1171 ShACad 4 216 4 949 +1300475168892913 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 220960 1137 733 S1 F 1137 ShACad 4 216 4 949 +1300475169780331 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 +1300475168724007 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 119904 525 232 S1 F 525 ShACad 3 164 3 396 +1300475168855330 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 219720 1125 734 S1 F 1125 ShACad 4 216 4 950 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +636 conn +2912 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt new file mode 100644 index 0000000000..e85cf9337e --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt @@ -0,0 +1,96 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +660 conn +2564 DataSeries: ExtentIndex +# Extent, type='conn' +ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes +1.3e+09 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 +1.3e+09 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 +1.3e+09 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 +1.3e+09 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 0 0 131 SHR F 0 Cd 0 0 1 159 +1.3e+09 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 0 0 198 SHR F 0 Cd 0 0 1 226 +1.3e+09 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 85 0 0 +1.3e+09 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 0 350 0 S0 F 0 D 7 546 0 0 +1.3e+09 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 +1.3e+09 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 +1.3e+09 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 +1.3e+09 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 +1.3e+09 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0 0 0 S0 F 0 D 1 78 0 0 +1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 0 1130 734 S1 F 1130 ShACad 4 216 4 950 +1.3e+09 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 0 0 350 OTH F 0 CdA 1 52 1 402 +1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 0 1178 734 S1 F 1178 ShACad 4 216 4 950 +1.3e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 0 534 412 S1 F 534 ShACad 3 164 3 576 +1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 0 1148 734 S1 F 1148 ShACad 4 216 4 950 +1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 0 1171 733 S1 F 1171 ShACad 4 216 4 949 +1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 0 1137 733 S1 F 1137 ShACad 4 216 4 949 +1.3e+09 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 +1.3e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 0 525 232 S1 F 525 ShACad 3 164 3 396 +1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 0 1125 734 S1 F 1125 ShACad 4 216 4 950 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +660 conn +2564 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt new file mode 100644 index 0000000000..49e431085c --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt @@ -0,0 +1,90 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +756 http +1144 DataSeries: ExtentIndex +# Extent, type='http' +ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file +1.3e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 +1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 0 0 0 304 Not Modified 0 +1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +756 http +1144 DataSeries: ExtentIndex diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro new file mode 100644 index 0000000000..77ea32908a --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro @@ -0,0 +1,43 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: test -e ssh.ds.xml +# @TEST-EXEC: btest-diff ssh.ds.xml + +module SSH; + +redef LogDataSeries::dump_schema = T; + +# Haven't yet found a way to check for the effect of these. +redef LogDataSeries::compression = "bz2"; +redef LogDataSeries::extent_size = 1000; +redef LogDataSeries::num_threads = 5; + +# LogDataSeries::use_integer_for_time is tested separately. + +export { + redef enum Log::ID += { LOG }; + + type Log: record { + t: time; + id: conn_id; # Will be rolled out into individual columns. + status: string &optional; + country: string &default="unknown"; + } &log; +} + +event bro_init() +{ + Log::create_stream(SSH::LOG, [$columns=Log]); + + local cid = [$orig_h=1.2.3.4, $orig_p=1234/tcp, $resp_h=2.3.4.5, $resp_p=80/tcp]; + + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="US"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="UK"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success", $country="BR"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="MX"]); + +} + diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro new file mode 100644 index 0000000000..639c7f3562 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro @@ -0,0 +1,33 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -b -r %DIR/../rotation.trace %INPUT 2>&1 Log::default_writer=Log::WRITER_DATASERIES | grep "test" >out +# @TEST-EXEC: for i in test.*.ds; do printf '> %s\n' $i; ds2txt $i; done >>out +# @TEST-EXEC: btest-diff out + +module Test; + +export { + # Create a new ID for our log stream + redef enum Log::ID += { LOG }; + + # Define a record with all the columns the log file can have. + # (I'm using a subset of fields from ssh-ext for demonstration.) + type Log: record { + t: time; + id: conn_id; # Will be rolled out into individual columns. + } &log; +} + +redef Log::default_rotation_interval = 1hr; +redef Log::default_rotation_postprocessor_cmd = "echo"; + +event bro_init() +{ + Log::create_stream(Test::LOG, [$columns=Log]); +} + +event new_connection(c: connection) + { + Log::write(Test::LOG, [$t=network_time(), $id=c$id]); + } diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro new file mode 100644 index 0000000000..c7f8a5618f --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro @@ -0,0 +1,34 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: ds2txt ssh.ds >ssh.ds.txt +# @TEST-EXEC: btest-diff ssh.ds.txt + +module SSH; + +export { + redef enum Log::ID += { LOG }; + + type Log: record { + t: time; + id: conn_id; # Will be rolled out into individual columns. + status: string &optional; + country: string &default="unknown"; + } &log; +} + +event bro_init() +{ + Log::create_stream(SSH::LOG, [$columns=Log]); + + local cid = [$orig_h=1.2.3.4, $orig_p=1234/tcp, $resp_h=2.3.4.5, $resp_p=80/tcp]; + + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="US"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="UK"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success", $country="BR"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="MX"]); + +} + diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro new file mode 100644 index 0000000000..3a072998c0 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro @@ -0,0 +1,8 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -r $TRACES/wikipedia.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: ds2txt conn.ds >conn.ds.txt +# @TEST-EXEC: btest-diff conn.ds.txt + +redef LogDataSeries::use_integer_for_time = T; diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro new file mode 100644 index 0000000000..4a4b70afc2 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro @@ -0,0 +1,8 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -r $TRACES/wikipedia.trace Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: ds2txt conn.ds >conn.ds.txt +# @TEST-EXEC: ds2txt http.ds >http.ds.txt +# @TEST-EXEC: btest-diff conn.ds.txt +# @TEST-EXEC: btest-diff http.ds.txt diff --git a/testing/scripts/has-writer b/testing/scripts/has-writer new file mode 100755 index 0000000000..683d31041f --- /dev/null +++ b/testing/scripts/has-writer @@ -0,0 +1,6 @@ +#! /usr/bin/env bash +# +# Returns true if Bro has been compiled with support for writer type +# $1. The type name must match what "bro --help" prints. + +bro --helper 2>&1 | grep -qi "Supported log formats:.*$1" From 891c53277501ab3e6c2dfa555859f4fda1a40486 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 16 Apr 2012 14:48:33 -0700 Subject: [PATCH 04/20] DataSeries cleanup. --- src/Type.cc | 8 +- src/logging/writers/Ascii.cc | 17 +- src/logging/writers/DataSeries.cc | 267 ++++++++++++++---------------- src/logging/writers/DataSeries.h | 27 ++- src/threading/SerialTypes.cc | 14 ++ src/threading/SerialTypes.h | 10 +- 6 files changed, 162 insertions(+), 181 deletions(-) diff --git a/src/Type.cc b/src/Type.cc index 82221303af..d688b15376 100644 --- a/src/Type.cc +++ b/src/Type.cc @@ -15,10 +15,9 @@ extern int generate_documentation; +// Note: This function must be thread-safe. const char* type_name(TypeTag t) { - static char errbuf[512]; - static const char* type_names[int(NUM_TYPES)] = { "void", "bool", "int", "count", "counter", @@ -37,10 +36,7 @@ const char* type_name(TypeTag t) }; if ( int(t) >= NUM_TYPES ) - { - snprintf(errbuf, sizeof(errbuf), "%d: not a type tag", int(t)); - return errbuf; - } + return "type_name(): not a type tag"; return type_names[int(t)]; } diff --git a/src/logging/writers/Ascii.cc b/src/logging/writers/Ascii.cc index 2f25ac418f..3a35eea380 100644 --- a/src/logging/writers/Ascii.cc +++ b/src/logging/writers/Ascii.cc @@ -86,6 +86,9 @@ bool Ascii::DoInit(string path, int num_fields, const Field* const * fields) if ( include_header ) { + string names; + string types; + string str = string(header_prefix, header_prefix_len) + "separator " // Always use space as separator here. + get_escaped_string(string(separator, separator_len), false) @@ -103,9 +106,6 @@ bool Ascii::DoInit(string path, int num_fields, const Field* const * fields) WriteHeaderField("path", get_escaped_string(path, false))) ) goto write_error; - string names; - string types; - for ( int i = 0; i < num_fields; ++i ) { if ( i > 0 ) @@ -114,15 +114,8 @@ bool Ascii::DoInit(string path, int num_fields, const Field* const * fields) types += string(separator, separator_len); } - const Field* field = fields[i]; - names += field->name; - types += type_name(field->type); - if ( (field->type == TYPE_TABLE) || (field->type == TYPE_VECTOR) ) - { - types += "["; - types += type_name(field->subtype); - types += "]"; - } + names += fields[i]->name; + types += fields[i]->TypeName(); } if ( ! (WriteHeaderField("fields", names) diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index 5ee8a812da..f6b26dc494 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -15,17 +15,15 @@ using namespace logging; using namespace writer; std::string DataSeries::LogValueToString(threading::Value *val) -{ - const int strsz = 1024; - char strbuf[strsz]; - - // In some cases, no value is attached. If this is the case, return an empty string. - if(!val->present) + { + // In some cases, no value is attached. If this is the case, return + // an empty string. + if( ! val->present ) return ""; std::ostringstream ostr; - switch(val->type) - { + + switch(val->type) { case TYPE_BOOL: return (val->val.int_val ? "true" : "false"); @@ -40,19 +38,22 @@ std::string DataSeries::LogValueToString(threading::Value *val) return ostr.str(); case TYPE_SUBNET: - ostr << Render(val->val.subnet_val); + ostr << Render(val->val.subnet_val); return ostr.str(); case TYPE_ADDR: - ostr << Render(val->val.addr_val); + ostr << Render(val->val.addr_val); return ostr.str(); - // Note: These two cases are relatively special. We need to convert these values into their integer equivalents - // to maximize precision. At the moment, there won't be a noticeable effect (Bro uses the double format everywhere - // internally, so we've already lost the precision we'd gain here), but timestamps may eventually switch to this - // representation within Bro. + // Note: These two cases are relatively special. We need to convert + // these values into their integer equivalents to maximize precision. + // At the moment, there won't be a noticeable effect (Bro uses the + // double format everywhere internally, so we've already lost the + // precision we'd gain here), but timestamps may eventually switch to + // this representation within Bro. // - // in the near-term, this *should* lead to better pack_relative (and thus smaller output files). + // In the near-term, this *should* lead to better pack_relative (and + // thus smaller output files). case TYPE_TIME: case TYPE_INTERVAL: if ( ds_use_integer_for_time ) @@ -69,59 +70,57 @@ std::string DataSeries::LogValueToString(threading::Value *val) case TYPE_ENUM: case TYPE_STRING: case TYPE_FILE: - { - int size = val->val.string_val->size(); - string tmpString = ""; - if(size) - tmpString = string(val->val.string_val->data(), val->val.string_val->size()); - else - tmpString = string(""); - return tmpString; - } - case TYPE_TABLE: - { - if ( ! val->val.set_val.size ) - { + case TYPE_FUNC: + if ( ! val->val.string_val->size() ) + return ""; + + return string(val->val.string_val->data(), val->val.string_val->size()); + + case TYPE_TABLE: + { + if ( ! val->val.set_val.size ) return ""; - } string tmpString = ""; + for ( int j = 0; j < val->val.set_val.size; j++ ) { if ( j > 0 ) - tmpString += ":"; //TODO: Specify set separator char in configuration. + tmpString += ds_set_separator; tmpString += LogValueToString(val->val.set_val.vals[j]); } + return tmpString; - } + } + case TYPE_VECTOR: - { + { if ( ! val->val.vector_val.size ) - { return ""; - } string tmpString = ""; + for ( int j = 0; j < val->val.vector_val.size; j++ ) { if ( j > 0 ) - tmpString += ":"; //TODO: Specify set separator char in configuration. + tmpString += ds_set_separator; tmpString += LogValueToString(val->val.vector_val.vals[j]); } return tmpString; - } + } + default: - return "???"; + InternalError(Fmt("unknown type %s in DataSeries::LogValueToString", type_name(val->type))); + return "cannot be reached"; } } string DataSeries::GetDSFieldType(const threading::Field *field) { - switch(field->type) - { + switch(field->type) { case TYPE_BOOL: return "bool"; @@ -145,75 +144,49 @@ string DataSeries::GetDSFieldType(const threading::Field *field) case TYPE_FILE: case TYPE_TABLE: case TYPE_VECTOR: - default: + case TYPE_FUNC: return "variable32"; - } -} - -string DataSeries::GetBroTypeString(const threading::Field *field) -{ - switch(field->type) - { - case TYPE_BOOL: - return "bool"; - case TYPE_COUNT: - return "count"; - case TYPE_COUNTER: - return "counter"; - case TYPE_PORT: - return "port"; - case TYPE_INT: - return "int"; - case TYPE_TIME: - return "time"; - case TYPE_INTERVAL: - return "interval"; - case TYPE_DOUBLE: - return "double"; - case TYPE_SUBNET: - return "subnet"; - case TYPE_ADDR: - return "addr"; - case TYPE_ENUM: - return "enum"; - case TYPE_STRING: - return "string"; - case TYPE_FILE: - return "file"; - case TYPE_TABLE: - return "table"; - case TYPE_VECTOR: - return "vector"; default: - return "???"; + InternalError(Fmt("unknown type %s in DataSeries::GetDSFieldType", type_name(field->type))); + return "cannot be reached"; } } string DataSeries::BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle) -{ - if("" == sTitle) - { + { + if( ! sTitle.size() ) sTitle = "GenericBroStream"; - } - string xmlschema; - xmlschema = "\n"; - for(size_t i = 0; i < vals.size(); ++i) + + string xmlschema = "\n"; + + for( size_t i = 0; i < vals.size(); ++i ) { - xmlschema += "\t\n"; + xmlschema += "\t\n"; } + xmlschema += "\n"; - for(size_t i = 0; i < vals.size(); ++i) + + for( size_t i = 0; i < vals.size(); ++i ) { - xmlschema += "\n"; + xmlschema += "\n"; } + return xmlschema; } std::string DataSeries::GetDSOptionsForType(const threading::Field *field) { - switch(field->type) - { + switch( field->type ) { case TYPE_TIME: case TYPE_INTERVAL: { @@ -233,6 +206,7 @@ std::string DataSeries::GetDSOptionsForType(const threading::Field *field) case TYPE_TABLE: case TYPE_VECTOR: return "pack_unique=\"yes\""; + default: return ""; } @@ -242,11 +216,13 @@ std::string DataSeries::GetDSOptionsForType(const threading::Field *field) DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend) { - ds_compression = string((const char *)BifConst::LogDataSeries::compression->Bytes(), BifConst::LogDataSeries::compression->Len()); + ds_compression = string((const char *)BifConst::LogDataSeries::compression->Bytes(), + BifConst::LogDataSeries::compression->Len()); ds_dump_schema = BifConst::LogDataSeries::dump_schema; ds_extent_size = BifConst::LogDataSeries::extent_size; ds_num_threads = BifConst::LogDataSeries::num_threads; ds_use_integer_for_time = BifConst::LogDataSeries::use_integer_for_time; + ds_set_separator = ","; } DataSeries::~DataSeries() @@ -258,20 +234,23 @@ bool DataSeries::OpenLog(string path) log_file = new DataSeriesSink(path + ".ds", compress_type); log_file->writeExtentLibrary(log_types); - for(size_t i = 0; i < schema_list.size(); ++i) - extents.insert(std::make_pair(schema_list[i].field_name, GeneralField::create(log_series, schema_list[i].field_name))); + for( size_t i = 0; i < schema_list.size(); ++i ) + extents.insert(std::make_pair(schema_list[i].field_name, + GeneralField::create(log_series, schema_list[i].field_name))); - if(ds_extent_size < ROW_MIN) + if ( ds_extent_size < ROW_MIN ) { - fprintf(stderr, "%d is not a valid value for 'rows'. Using min of %d instead.\n", (int)ds_extent_size, (int)ROW_MIN); - ds_extent_size = ROW_MIN; + Warning(Fmt("%d is not a valid value for 'rows'. Using min of %d instead", (int)ds_extent_size, (int)ROW_MIN)); + ds_extent_size = ROW_MIN; } - else if(ds_extent_size > ROW_MAX) + + else if( ds_extent_size > ROW_MAX ) { - fprintf(stderr, "%d is not a valid value for 'rows'. Using max of %d instead.\n", (int)ds_extent_size, (int)ROW_MAX); - ds_extent_size = ROW_MAX; + Warning(Fmt("%d is not a valid value for 'rows'. Using max of %d instead", (int)ds_extent_size, (int)ROW_MAX)); + ds_extent_size = ROW_MAX; } - log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size); + + log_output = new OutputModule(*log_file, log_series, *log_type, ds_extent_size); return true; } @@ -283,22 +262,22 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con // use that schema to build our output logfile and prepare it to be // written to. - // Note: compressor count must be set *BEFORE* DataSeriesSink is instantiated. - if(ds_num_threads < THREAD_MIN && ds_num_threads != 0) + // Note: compressor count must be set *BEFORE* DataSeriesSink is + // instantiated. + if( ds_num_threads < THREAD_MIN && ds_num_threads != 0 ) { - fprintf(stderr, "%d is too few threads! Using %d instead\n", (int)ds_num_threads, (int)THREAD_MIN); + Warning(Fmt("%d is too few threads! Using %d instead", (int)ds_num_threads, (int)THREAD_MIN)); ds_num_threads = THREAD_MIN; } - if(ds_num_threads > THREAD_MAX) + + if( ds_num_threads > THREAD_MAX ) { - fprintf(stderr, "%d is too many threads! Dropping back to %d\n", (int)ds_num_threads, (int)THREAD_MAX); + Warning(Fmt("%d is too many threads! Dropping back to %d", (int)ds_num_threads, (int)THREAD_MAX)); ds_num_threads = THREAD_MAX; } - if(ds_num_threads > 0) - { + if( ds_num_threads > 0 ) DataSeriesSink::setCompressorCount(ds_num_threads); - } for ( int i = 0; i < num_fields; i++ ) { @@ -307,65 +286,59 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con val.ds_type = GetDSFieldType(field); val.field_name = string(field->name); val.field_options = GetDSOptionsForType(field); - val.bro_type = GetBroTypeString(field); + val.bro_type = field->TypeName(); schema_list.push_back(val); } + string schema = BuildDSSchemaFromFieldTypes(schema_list, path); - if(ds_dump_schema) + + if( ds_dump_schema ) { - FILE * pFile; - pFile = fopen ( string(path + ".ds.xml").c_str() , "wb" ); - if(NULL == pFile) + FILE* pFile = fopen ( string(path + ".ds.xml").c_str() , "wb" ); + + if( pFile ) { - perror("Could not dump schema"); + fwrite(schema.c_str(), 1, schema.length(), pFile); + fclose(pFile); } - fwrite (schema.c_str(), 1 , schema.length() , pFile ); - fclose (pFile); + + else + Error(Fmt("cannot dump schema: %s", strerror(errno))); } compress_type = Extent::compress_all; - if(ds_compression == "lzf") - { + if( ds_compression == "lzf" ) compress_type = Extent::compress_lzf; - } - else if(ds_compression == "lzo") - { + + else if( ds_compression == "lzo" ) compress_type = Extent::compress_lzo; - } - else if(ds_compression == "gz") - { + + else if( ds_compression == "gz" ) compress_type = Extent::compress_gz; - } - else if(ds_compression == "bz2") - { + + else if( ds_compression == "bz2" ) compress_type = Extent::compress_bz2; - } - else if(ds_compression == "none") - { + + else if( ds_compression == "none" ) compress_type = Extent::compress_none; - } - else if(ds_compression == "any") - { + + else if( ds_compression == "any" ) compress_type = Extent::compress_all; - } + else - { - fprintf(stderr, "%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'\n", ds_compression.c_str()); - fprintf(stderr, "Defaulting to 'any'\n"); - } + Warning(Fmt("%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'. Defaulting to 'any'", ds_compression.c_str())); log_type = const_cast(log_types.registerType(schema)); - log_series.setType(*log_type); return OpenLog(path); - } bool DataSeries::DoFlush() { - // Flushing is handled by DataSeries automatically, so this function doesn't do anything. + // Flushing is handled by DataSeries automatically, so this function + // doesn't do anything. return true; } @@ -377,7 +350,7 @@ void DataSeries::CloseLog() extents.clear(); // Don't delete the file before you delete the output, or bad things - // happen. + // will happen. delete log_output; delete log_file; @@ -396,14 +369,17 @@ bool DataSeries::DoWrite(int num_fields, const threading::Field* const * fields, threading::Value** vals) { log_output->newRecord(); - for(size_t i = 0; i < (size_t)num_fields; ++i) + + for( size_t i = 0; i < (size_t)num_fields; ++i ) { ExtentIterator iter = extents.find(fields[i]->name); assert(iter != extents.end()); + if( iter != extents.end() ) { GeneralField *cField = iter->second; - if(vals[i]->present) + + if( vals[i]->present ) cField->set(LogValueToString(vals[i])); } } @@ -413,7 +389,8 @@ bool DataSeries::DoWrite(int num_fields, const threading::Field* const * fields, bool DataSeries::DoRotate(string rotated_path, double open, double close, bool terminating) { - // Note that if DS files are rotated too often, the aggregate log size will be (much) larger. + // Note that if DS files are rotated too often, the aggregate log + // size will be (much) larger. CloseLog(); string dsname = Path() + ".ds"; diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h index 319cb72ec5..5faa87e1b2 100644 --- a/src/logging/writers/DataSeries.h +++ b/src/logging/writers/DataSeries.h @@ -6,13 +6,13 @@ #ifndef LOGGING_WRITER_DATA_SERIES_H #define LOGGING_WRITER_DATA_SERIES_H -#include "../WriterBackend.h" - #include #include #include #include +#include "../WriterBackend.h" + namespace logging { namespace writer { class DataSeries : public WriterBackend { @@ -24,6 +24,8 @@ public: { return new DataSeries(frontend); } protected: + // Overidden from WriterBackend. + virtual bool DoInit(string path, int num_fields, const threading::Field* const * fields); @@ -36,11 +38,11 @@ protected: virtual bool DoFinish(); private: - static const size_t ROW_MIN = 2048; // Minimum extent size. - static const size_t ROW_MAX = (1024 * 1024 * 100); // Maximum extent size. - static const size_t THREAD_MIN = 1; // Minimum number of compression threads that DataSeries may spawn. - static const size_t THREAD_MAX = 128; // Maximum number of compression threads that DataSeries may spawn. - static const size_t TIME_SCALE = 1000000; // Fixed-point multiplier for time values when converted to integers. + static const size_t ROW_MIN = 2048; // Minimum extent size. + static const size_t ROW_MAX = (1024 * 1024 * 100); // Maximum extent size. + static const size_t THREAD_MIN = 1; // Minimum number of compression threads that DataSeries may spawn. + static const size_t THREAD_MAX = 128; // Maximum number of compression threads that DataSeries may spawn. + static const size_t TIME_SCALE = 1000000; // Fixed-point multiplier for time values when converted to integers. struct SchemaValue { @@ -85,18 +87,10 @@ private: */ string BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle); - /** - * Takes a field type and converts it to a readable string. - * - * @param field We extract the type from this and convert it into a readable string. - * @return String representation of the field's type - */ - string GetBroTypeString(const threading::Field *field); - /** Closes the currently open file. */ void CloseLog(); - /** XXX */ + /** Opens a new file. */ bool OpenLog(string path); typedef std::map ExtentMap; @@ -119,6 +113,7 @@ private: string ds_compression; bool ds_dump_schema; bool ds_use_integer_for_time; + string ds_set_separator; }; } diff --git a/src/threading/SerialTypes.cc b/src/threading/SerialTypes.cc index a5692b2ffd..5ab61b0d41 100644 --- a/src/threading/SerialTypes.cc +++ b/src/threading/SerialTypes.cc @@ -24,6 +24,20 @@ bool Field::Write(SerializationFormat* fmt) const return (fmt->Write(name, "name") && fmt->Write((int)type, "type") && fmt->Write((int)subtype, "subtype")); } +string Field::TypeName() const + { + string n = type_name(type); + + if ( (type == TYPE_TABLE) || (type == TYPE_VECTOR) ) + { + n += "["; + n += type_name(subtype); + n += "]"; + } + + return n; + } + Value::~Value() { if ( (type == TYPE_ENUM || type == TYPE_STRING || type == TYPE_FILE || type == TYPE_FUNC) diff --git a/src/threading/SerialTypes.h b/src/threading/SerialTypes.h index db7dc837bd..eee3b750fe 100644 --- a/src/threading/SerialTypes.h +++ b/src/threading/SerialTypes.h @@ -53,6 +53,12 @@ struct Field { * @return False if an error occured. */ bool Write(SerializationFormat* fmt) const; + + /** + * Returns a textual description of the field's type. This method is + * thread-safe. + */ + string TypeName() const; }; /** @@ -132,8 +138,8 @@ struct Value { /** * Returns true if the type can be represented by a Value. If - * `atomic_only` is true, will not permit composite types. - */ + * `atomic_only` is true, will not permit composite types. This + * method is thread-safe. */ static bool IsCompatibleType(BroType* t, bool atomic_only=false); private: From 08593c5147157511c5ca54872ad58c15dfd87431 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 16 Apr 2012 15:19:14 -0700 Subject: [PATCH 05/20] In threads, an internal error now immediately aborts. Otherwise, the error won't make it back to the main thread for a while and subsequent code in the thread would still execute. --- src/threading/MsgThread.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/threading/MsgThread.cc b/src/threading/MsgThread.cc index 0b91f8790a..c5777042f3 100644 --- a/src/threading/MsgThread.cc +++ b/src/threading/MsgThread.cc @@ -222,7 +222,9 @@ void MsgThread::InternalWarning(const char* msg) void MsgThread::InternalError(const char* msg) { - SendOut(new ReporterMessage(ReporterMessage::INTERNAL_ERROR, this, msg)); + // This one aborts immediately. + fprintf(stderr, "internal error in thread: %s\n", msg); + abort(); } #ifdef DEBUG From 91a3ce951812083dc017116f080fbdd7c3d2ea1b Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 16 Apr 2012 15:20:10 -0700 Subject: [PATCH 06/20] Additional test output canonification for ds2txt's timestamps. --- .../ssh.ds.txt | 10 +++++----- .../http.ds.txt | 12 ++++++------ .../frameworks/logging/dataseries/test-logging.bro | 2 +- testing/external/subdir-btest.cfg | 2 +- testing/scripts/diff-remove-timestamps-dataseries | 6 ++++++ 5 files changed, 19 insertions(+), 13 deletions(-) create mode 100755 testing/scripts/diff-remove-timestamps-dataseries diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt index f66f40b701..05026a24ef 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt @@ -31,11 +31,11 @@ extent offset ExtentType 604 DataSeries: ExtentIndex # Extent, type='ssh' t id.orig_h id.orig_p id.resp_h id.resp_p status country -1.334e+09 1.2.3.4 1234 2.3.4.5 80 success unknown -1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure US -1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure UK -1.334e+09 1.2.3.4 1234 2.3.4.5 80 success BR -1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure MX +X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 success unknown +X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 failure US +X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 failure UK +X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 success BR +X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 failure MX # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt index 49e431085c..a0c6cbbff3 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt @@ -55,18 +55,18 @@ - + - + extent offset ExtentType 40 DataSeries: XmlType -756 http -1144 DataSeries: ExtentIndex +768 http +1156 DataSeries: ExtentIndex # Extent, type='http' ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file 1.3e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 @@ -86,5 +86,5 @@ ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri refer # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -756 http -1144 DataSeries: ExtentIndex +768 http +1156 DataSeries: ExtentIndex diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro index c7f8a5618f..76f2451477 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro @@ -2,7 +2,7 @@ # @TEST-REQUIRES: has-writer DataSeries && which ds2txt # # @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES -# @TEST-EXEC: ds2txt ssh.ds >ssh.ds.txt +# @TEST-EXEC: ds2txt ssh.ds | ${SCRIPTS}/diff-remove-timestamps-dataseries >ssh.ds.txt # @TEST-EXEC: btest-diff ssh.ds.txt module SSH; diff --git a/testing/external/subdir-btest.cfg b/testing/external/subdir-btest.cfg index c4e74f99fa..fba89fb724 100644 --- a/testing/external/subdir-btest.cfg +++ b/testing/external/subdir-btest.cfg @@ -10,7 +10,7 @@ BROPATH=`bash -c %(testbase)s/../../../build/bro-path-dev`:%(testbase)s/../scrip BRO_SEED_FILE=%(testbase)s/../random.seed TZ=UTC LC_ALL=C -PATH=%(testbase)s/../../../build/src:%(testbase)s/../../../aux/btest:%(default_path)s +PATH=%(testbase)s/../../../build/src:%(testbase)s/../../../aux/btest:%(testbase)s/../../scripts:%(default_path)s TEST_DIFF_CANONIFIER=%(testbase)s/../../scripts/diff-canonifier-external TEST_DIFF_BRIEF=1 TRACES=%(testbase)s/Traces diff --git a/testing/scripts/diff-remove-timestamps-dataseries b/testing/scripts/diff-remove-timestamps-dataseries new file mode 100755 index 0000000000..5b20f138af --- /dev/null +++ b/testing/scripts/diff-remove-timestamps-dataseries @@ -0,0 +1,6 @@ +#! /usr/bin/env bash +# +# Replace anything which looks like DataSeries timestamps (which is a double) with XXXs. + +sed 's/1\.[0-9]*e+09/X.XXXe+09/g' + From d1c6183620aa8ee73cd52ae8ac98b90213d093d8 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 16 Apr 2012 16:07:38 -0700 Subject: [PATCH 07/20] Starting DataSeries HowTo. --- doc/logging-dataseries.rst | 102 +++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 doc/logging-dataseries.rst diff --git a/doc/logging-dataseries.rst b/doc/logging-dataseries.rst new file mode 100644 index 0000000000..5289bbaea9 --- /dev/null +++ b/doc/logging-dataseries.rst @@ -0,0 +1,102 @@ + +============================= +Binary Output with DataSeries +============================= + +.. rst-class:: opening + + Bro's default ASCII log format is not exactly the most efficient + way for storing large volumes of data. An an alternative, Bro comes + with experimental support for `DataSeries + `_ + output, an efficient binary format for recording structured bulk + data. DataSeries is developed and maintained at HP Labs. + +.. contents:: + +Installing DataSeries +--------------------- + +To use DataSeries, its libraries must be available at compile-time, +along with the supporting *Lintel* package. Generally, both are +distributed on `HP Labs' web site +`_. Currently, however, you need +to use recent developments of both packages with Bro, which you can +download from github like this:: + + git clone http://github.com/eric-anderson/Lintel + git clone http://github.com/eric-anderson/DataSeries + +To then build and install the two into ````, do:: + + ( cd Lintel && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX= .. && make && make install ) + ( cd DataSeries && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX= .. && make && make install ) + +Please refer to the packages' documentation for more information about +the installation process. In particular, there's more information on +required and optional `dependencies for Lintel +`_ +and `dependencies for DataSeries +`_ + +Compiling Bro with DataSeries Support +------------------------------------- + +Once you have installed DataSeries, Bro's ``configure`` should pick it +up automatically as long as it finds it in a standard system location. +Alternatively, you can specify the DataSeries installation prefix +manually with ``--with-dataseries=``. Keep an eye on +``configure``'s summary output, if it looks like this, Bro will indeed +compile in the DataSeries support:: + + # ./configure --with-dataseries=/usr/local + [...] + ====================| Bro Build Summary |===================== + [...] + DataSeries: true + [...] + ================================================================ + +Activating DataSeries +--------------------- + +The direct way to use DataSeries is to switch *all* log files over to +the binary format. To do that, just add ``redef +Log::default_writer=Log::WRITER_DATASERIES;`` to your ``local.bro`. +For testing, you can also just pass that on the command line:: + + bro -r trace.pcap Log::default_writer=Log::WRITER_DATASERIES + +With that, Bro will now write all its output into DataSeries files +``*.ds``. You can inspect these using DataSeries's set of command line +tools, which its installation process will have installed into +``/bin``. For example, to convert a file back into an ASCII +representation:: + # ds2txt conn .log + [... We skip a bunch of meta data here ...] + ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts res + 1.3e+09 9CqElRsB9Q 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 + 1.3e+09 3bNPfUWuIhb fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 + 1.3e+09 ZoDDN7YuYx3 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 + [...] + +Note that is ASCII format is *not* equivalent to Bro's default format +as DataSeries uses a different internal representation. + +You can also switch only individual files over to DataSeries by adding +code like this to your ``local.bro``:: + + TODO + +Bro's DataSeries writer comes with a few tuning options, see +:doc:`scripts/base/frameworks/logging/writers/dataseries`. + +Working with DataSeries +======================= + +Here are few examples of using DataSeries command line tools to work +with the output files. + +TODO. + + From f85e0bfe9a97daacbe1d5011834ebe6289d9abf8 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 16 Apr 2012 18:15:05 -0700 Subject: [PATCH 08/20] DataSeries TODO list with open issues/questions. --- doc/logging-dataseries.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/logging-dataseries.rst b/doc/logging-dataseries.rst index 5289bbaea9..e530ba7c0b 100644 --- a/doc/logging-dataseries.rst +++ b/doc/logging-dataseries.rst @@ -99,4 +99,32 @@ with the output files. TODO. +TODO +==== + +* I'm seeing lots of warning on stderr:: + + Warning, while packing field ts of record 1, error was > 10%: + (1334620000 / 1000000 = 1334.62, round() = 1335) + Warning, while packing field not_valid_after of record 11, error was > 10%: + (1346460000 / 1000000 = 1346.46, round() = 1346) + +* The compiler warn about a depracated method and I'm not immediately + seeing how to avoid using that. + +* For testing our script-level options: + + - Can we get the extentsize from a ``.ds`` file? + - Can we get the compressio level from a ``.ds`` file? + +* ds2txt can apparently not read a file that is currently being + written. That's not good for the spool directory:: + + # ds2txt http.ds + **** Assertion failure in file + /DataSeriesSink.cpp, line 301 + **** Failed expression: tail[i] == 0xFF + **** Details: bad header for the tail of http.ds! + + Can that be worked around? From 1fba55f4f3eb37ca5b46095891416ebc720b469e Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 18 Apr 2012 14:59:42 -0700 Subject: [PATCH 09/20] Removing an unnecessary const cast. --- src/logging/writers/DataSeries.cc | 4 +--- src/logging/writers/DataSeries.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index f6b26dc494..3c88c65653 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -212,8 +212,6 @@ std::string DataSeries::GetDSOptionsForType(const threading::Field *field) } } -// ************************ CLASS IMPL ********************************* - DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend) { ds_compression = string((const char *)BifConst::LogDataSeries::compression->Bytes(), @@ -329,7 +327,7 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con else Warning(Fmt("%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'. Defaulting to 'any'", ds_compression.c_str())); - log_type = const_cast(log_types.registerType(schema)); + log_type = log_types.registerType(schema); log_series.setType(*log_type); return OpenLog(path); diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h index 5faa87e1b2..bd2eb418f6 100644 --- a/src/logging/writers/DataSeries.h +++ b/src/logging/writers/DataSeries.h @@ -99,7 +99,7 @@ private: // Internal DataSeries structures we need to keep track of. vector schema_list; ExtentTypeLibrary log_types; - ExtentType *log_type; + const ExtentType *log_type; ExtentSeries log_series; ExtentMap extents; int compress_type; From 18aa41c62b943ceb949107c883e182c4ab672220 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 19 Apr 2012 10:41:01 -0700 Subject: [PATCH 10/20] Extending log post-processor call to include the name of the writer. --- scripts/base/frameworks/logging/main.bro | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/base/frameworks/logging/main.bro b/scripts/base/frameworks/logging/main.bro index 2c36b3001e..4093a3b429 100644 --- a/scripts/base/frameworks/logging/main.bro +++ b/scripts/base/frameworks/logging/main.bro @@ -376,13 +376,16 @@ function run_rotation_postprocessor_cmd(info: RotationInfo, npath: string) : boo if ( pp_cmd == "" ) return T; + # Turn, e.g., Log::WRITER_ASCII into "ascii". + local writer = subst_string(to_lower(fmt("%s", info$writer)), "log::writer_", ""); + # The date format is hard-coded here to provide a standardized # script interface. - system(fmt("%s %s %s %s %s %d", + system(fmt("%s %s %s %s %s %d %s", pp_cmd, npath, info$path, strftime("%y-%m-%d_%H.%M.%S", info$open), strftime("%y-%m-%d_%H.%M.%S", info$close), - info$terminating)); + info$terminating, writer)); return T; } From 4b70adcb4b08d2c9357a734ddc30a6007ffaaf93 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 19 Apr 2012 10:41:24 -0700 Subject: [PATCH 11/20] Tweaking DataSeries support. --- doc/logging-dataseries.rst | 14 ------------- src/logging/writers/DataSeries.cc | 10 +++++++++- src/logging/writers/DataSeries.h | 1 + .../out | 20 +++++++++---------- .../conn.ds.txt | 12 +++++------ .../frameworks/logging/dataseries/options.bro | 1 + .../frameworks/logging/dataseries/rotate.bro | 1 + .../logging/dataseries/test-logging.bro | 1 + .../logging/dataseries/time-as-int.bro | 1 + .../logging/dataseries/wikipedia.bro | 1 + 10 files changed, 31 insertions(+), 31 deletions(-) diff --git a/doc/logging-dataseries.rst b/doc/logging-dataseries.rst index e530ba7c0b..6eef223a90 100644 --- a/doc/logging-dataseries.rst +++ b/doc/logging-dataseries.rst @@ -109,22 +109,8 @@ TODO Warning, while packing field not_valid_after of record 11, error was > 10%: (1346460000 / 1000000 = 1346.46, round() = 1346) -* The compiler warn about a depracated method and I'm not immediately - seeing how to avoid using that. - * For testing our script-level options: - Can we get the extentsize from a ``.ds`` file? - Can we get the compressio level from a ``.ds`` file? -* ds2txt can apparently not read a file that is currently being - written. That's not good for the spool directory:: - - # ds2txt http.ds - **** Assertion failure in file - /DataSeriesSink.cpp, line 301 - **** Failed expression: tail[i] == 0xFF - **** Details: bad header for the tail of http.ds! - - Can that be worked around? - diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index 3c88c65653..aacef01f80 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -194,6 +194,8 @@ std::string DataSeries::GetDSOptionsForType(const threading::Field *field) if ( ! ds_use_integer_for_time ) s += " pack_scale=\"1000000\""; + else + s += string(" units=\"") + TIME_UNIT() + "\" epoch=\"unix\""; return s; } @@ -327,7 +329,13 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con else Warning(Fmt("%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'. Defaulting to 'any'", ds_compression.c_str())); - log_type = log_types.registerType(schema); + const ExtentType& type = log_types.registerTypeR(schema); + + // Note: This is a bit dicey as it depends on the implementation of + // registerTypeR(), but its what the DataSeries guys recommended + // given that we function we originally used has been deprecated. + log_type = &type; + log_series.setType(*log_type); return OpenLog(path); diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h index bd2eb418f6..ab2bcec88c 100644 --- a/src/logging/writers/DataSeries.h +++ b/src/logging/writers/DataSeries.h @@ -43,6 +43,7 @@ private: static const size_t THREAD_MIN = 1; // Minimum number of compression threads that DataSeries may spawn. static const size_t THREAD_MAX = 128; // Maximum number of compression threads that DataSeries may spawn. static const size_t TIME_SCALE = 1000000; // Fixed-point multiplier for time values when converted to integers. + const char* TIME_UNIT() { return "microseconds"; } // DS name for time resolution when converted to integers. Must match TIME_SCALE. struct SchemaValue { diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out index b6f05003f3..a12fed36e1 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out @@ -1,13 +1,13 @@ -test.2011-03-07-03-00-05.ds test 11-03-07_03.00.05 11-03-07_04.00.05 0 -test.2011-03-07-04-00-05.ds test 11-03-07_04.00.05 11-03-07_05.00.05 0 -test.2011-03-07-05-00-05.ds test 11-03-07_05.00.05 11-03-07_06.00.05 0 -test.2011-03-07-06-00-05.ds test 11-03-07_06.00.05 11-03-07_07.00.05 0 -test.2011-03-07-07-00-05.ds test 11-03-07_07.00.05 11-03-07_08.00.05 0 -test.2011-03-07-08-00-05.ds test 11-03-07_08.00.05 11-03-07_09.00.05 0 -test.2011-03-07-09-00-05.ds test 11-03-07_09.00.05 11-03-07_10.00.05 0 -test.2011-03-07-10-00-05.ds test 11-03-07_10.00.05 11-03-07_11.00.05 0 -test.2011-03-07-11-00-05.ds test 11-03-07_11.00.05 11-03-07_12.00.05 0 -test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 +test.2011-03-07-03-00-05.ds test 11-03-07_03.00.05 11-03-07_04.00.05 0 dataseries +test.2011-03-07-04-00-05.ds test 11-03-07_04.00.05 11-03-07_05.00.05 0 dataseries +test.2011-03-07-05-00-05.ds test 11-03-07_05.00.05 11-03-07_06.00.05 0 dataseries +test.2011-03-07-06-00-05.ds test 11-03-07_06.00.05 11-03-07_07.00.05 0 dataseries +test.2011-03-07-07-00-05.ds test 11-03-07_07.00.05 11-03-07_08.00.05 0 dataseries +test.2011-03-07-08-00-05.ds test 11-03-07_08.00.05 11-03-07_09.00.05 0 dataseries +test.2011-03-07-09-00-05.ds test 11-03-07_09.00.05 11-03-07_10.00.05 0 dataseries +test.2011-03-07-10-00-05.ds test 11-03-07_10.00.05 11-03-07_11.00.05 0 dataseries +test.2011-03-07-11-00-05.ds test 11-03-07_11.00.05 11-03-07_12.00.05 0 dataseries +test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 dataseries > test.2011-03-07-03-00-05.ds # Extent Types ... diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt index e6294b1d71..65d4ba0a67 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt @@ -9,7 +9,7 @@ - + @@ -17,7 +17,7 @@ - + @@ -51,8 +51,8 @@ extent offset ExtentType 40 DataSeries: XmlType -636 conn -2912 DataSeries: ExtentIndex +672 conn +2948 DataSeries: ExtentIndex # Extent, type='conn' ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes 1300475167096535 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 @@ -92,5 +92,5 @@ ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -636 conn -2912 DataSeries: ExtentIndex +672 conn +2948 DataSeries: ExtentIndex diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro index 77ea32908a..fc3752a168 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro @@ -1,5 +1,6 @@ # # @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# @TEST-GROUP: dataseries # # @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES # @TEST-EXEC: test -e ssh.ds.xml diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro index 639c7f3562..6a0cee5888 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro @@ -1,5 +1,6 @@ # # @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# @TEST-GROUP: dataseries # # @TEST-EXEC: bro -b -r %DIR/../rotation.trace %INPUT 2>&1 Log::default_writer=Log::WRITER_DATASERIES | grep "test" >out # @TEST-EXEC: for i in test.*.ds; do printf '> %s\n' $i; ds2txt $i; done >>out diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro index 76f2451477..d04b0acf44 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro @@ -1,5 +1,6 @@ # # @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# @TEST-GROUP: dataseries # # @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES # @TEST-EXEC: ds2txt ssh.ds | ${SCRIPTS}/diff-remove-timestamps-dataseries >ssh.ds.txt diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro index 3a072998c0..e4dd6a5431 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro @@ -1,5 +1,6 @@ # # @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# @TEST-GROUP: dataseries # # @TEST-EXEC: bro -r $TRACES/wikipedia.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES # @TEST-EXEC: ds2txt conn.ds >conn.ds.txt diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro index 4a4b70afc2..38726a8b10 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro @@ -1,5 +1,6 @@ # # @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# @TEST-GROUP: dataseries # # @TEST-EXEC: bro -r $TRACES/wikipedia.trace Log::default_writer=Log::WRITER_DATASERIES # @TEST-EXEC: ds2txt conn.ds >conn.ds.txt From c91563fe7590d88e1609609668b71a070ed00768 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Tue, 24 Apr 2012 17:57:05 -0700 Subject: [PATCH 12/20] DataSeries tuning. - Now using the new DS interface from git to remove warning. - New leak tests, not yet tried, --- doc/logging-dataseries.rst | 9 +++-- src/logging/writers/DataSeries.cc | 8 +---- src/logging/writers/DataSeries.h | 2 +- .../btest/core/leaks/dataseries-rotate.bro | 34 +++++++++++++++++++ testing/btest/core/leaks/dataseries.bro | 9 +++++ 5 files changed, 52 insertions(+), 10 deletions(-) create mode 100644 testing/btest/core/leaks/dataseries-rotate.bro create mode 100644 testing/btest/core/leaks/dataseries.bro diff --git a/doc/logging-dataseries.rst b/doc/logging-dataseries.rst index 6eef223a90..67f95ecf3b 100644 --- a/doc/logging-dataseries.rst +++ b/doc/logging-dataseries.rst @@ -24,8 +24,8 @@ distributed on `HP Labs' web site to use recent developments of both packages with Bro, which you can download from github like this:: - git clone http://github.com/eric-anderson/Lintel - git clone http://github.com/eric-anderson/DataSeries + git clone http://github.com/dataseries/Lintel + git clone http://github.com/dataseries/DataSeries To then build and install the two into ````, do:: @@ -109,8 +109,13 @@ TODO Warning, while packing field not_valid_after of record 11, error was > 10%: (1346460000 / 1000000 = 1346.46, round() = 1346) + See Eric's mail. + * For testing our script-level options: - Can we get the extentsize from a ``.ds`` file? - Can we get the compressio level from a ``.ds`` file? + See Eric's mail. + +* Do we have a leak? diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index aacef01f80..a3d193be97 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -329,13 +329,7 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con else Warning(Fmt("%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'. Defaulting to 'any'", ds_compression.c_str())); - const ExtentType& type = log_types.registerTypeR(schema); - - // Note: This is a bit dicey as it depends on the implementation of - // registerTypeR(), but its what the DataSeries guys recommended - // given that we function we originally used has been deprecated. - log_type = &type; - + log_type = log_types.registerTypePtr(schema); log_series.setType(*log_type); return OpenLog(path); diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h index ab2bcec88c..0d9ab67e95 100644 --- a/src/logging/writers/DataSeries.h +++ b/src/logging/writers/DataSeries.h @@ -100,7 +100,7 @@ private: // Internal DataSeries structures we need to keep track of. vector schema_list; ExtentTypeLibrary log_types; - const ExtentType *log_type; + ExtentType::Ptr log_type; ExtentSeries log_series; ExtentMap extents; int compress_type; diff --git a/testing/btest/core/leaks/dataseries-rotate.bro b/testing/btest/core/leaks/dataseries-rotate.bro new file mode 100644 index 0000000000..188de9717b --- /dev/null +++ b/testing/btest/core/leaks/dataseries-rotate.bro @@ -0,0 +1,34 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks +# +# @TEST-GROUP: leaks +# +# @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local bro -m -b -r %DIR/../rotation.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES + +module Test; + +export { + # Create a new ID for our log stream + redef enum Log::ID += { LOG }; + + # Define a record with all the columns the log file can have. + # (I'm using a subset of fields from ssh-ext for demonstration.) + type Log: record { + t: time; + id: conn_id; # Will be rolled out into individual columns. + } &log; +} + +redef Log::default_rotation_interval = 1hr; +redef Log::default_rotation_postprocessor_cmd = "echo"; + +event bro_init() +{ + Log::create_stream(Test::LOG, [$columns=Log]); +} + +event new_connection(c: connection) + { + Log::write(Test::LOG, [$t=network_time(), $id=c$id]); + } diff --git a/testing/btest/core/leaks/dataseries.bro b/testing/btest/core/leaks/dataseries.bro new file mode 100644 index 0000000000..886ee54dd9 --- /dev/null +++ b/testing/btest/core/leaks/dataseries.bro @@ -0,0 +1,9 @@ +# Needs perftools support. +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks +# +# @TEST-GROUP: leaks +# +# @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks +# @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local bro -m -r $TRACES/wikipedia.trace Log::default_writer=Log::WRITER_DATASERIES From a0575158efffba2ebb6ae0308fb7af6fdee25e4c Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Fri, 4 May 2012 21:50:20 -0700 Subject: [PATCH 13/20] DataSeries updates and fixes. --- doc/logging-dataseries.rst | 16 -- src/logging/WriterBackend.cc | 7 +- src/logging/WriterBackend.h | 8 + src/logging/writers/Ascii.cc | 7 +- src/logging/writers/DataSeries.cc | 34 +++-- .../ssh.ds.xml | 2 +- .../out | 140 +++++++++--------- .../ssh.ds.txt | 10 +- .../conn.ds.txt | 80 +++++----- .../http.ds.txt | 38 ++--- 10 files changed, 169 insertions(+), 173 deletions(-) diff --git a/doc/logging-dataseries.rst b/doc/logging-dataseries.rst index 67f95ecf3b..1a5f4ae520 100644 --- a/doc/logging-dataseries.rst +++ b/doc/logging-dataseries.rst @@ -102,20 +102,4 @@ TODO. TODO ==== -* I'm seeing lots of warning on stderr:: - - Warning, while packing field ts of record 1, error was > 10%: - (1334620000 / 1000000 = 1334.62, round() = 1335) - Warning, while packing field not_valid_after of record 11, error was > 10%: - (1346460000 / 1000000 = 1346.46, round() = 1346) - - See Eric's mail. - -* For testing our script-level options: - - - Can we get the extentsize from a ``.ds`` file? - - Can we get the compressio level from a ``.ds`` file? - - See Eric's mail. - * Do we have a leak? diff --git a/src/logging/WriterBackend.cc b/src/logging/WriterBackend.cc index 28b623988c..09970f02c6 100644 --- a/src/logging/WriterBackend.cc +++ b/src/logging/WriterBackend.cc @@ -267,4 +267,9 @@ string WriterBackend::Render(const threading::Value::subnet_t& subnet) const return s; } - +string WriterBackend::Render(double d) const + { + char buf[256]; + modp_dtoa(d, buf, 6); + return buf; + } diff --git a/src/logging/WriterBackend.h b/src/logging/WriterBackend.h index 8fbf0c9e71..fa12613e6d 100644 --- a/src/logging/WriterBackend.h +++ b/src/logging/WriterBackend.h @@ -165,6 +165,14 @@ public: */ string Render(const threading::Value::subnet_t& subnet) const; + /** Helper method to render a double in Bro's standard precision. + * + * @param d The double. + * + * @return An ASCII representation of the double. + */ + string Render(double d) const; + protected: friend class FinishMessage; diff --git a/src/logging/writers/Ascii.cc b/src/logging/writers/Ascii.cc index 3a35eea380..efc001aa97 100644 --- a/src/logging/writers/Ascii.cc +++ b/src/logging/writers/Ascii.cc @@ -176,14 +176,9 @@ bool Ascii::DoWriteOne(ODesc* desc, Value* val, const Field* field) desc->Add(Render(val->val.addr_val)); break; + case TYPE_DOUBLE: case TYPE_TIME: case TYPE_INTERVAL: - char buf[256]; - modp_dtoa(val->val.double_val, buf, 6); - desc->Add(buf); - break; - - case TYPE_DOUBLE: desc->Add(val->val.double_val); break; diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index a3d193be97..bd1da57403 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -21,29 +21,31 @@ std::string DataSeries::LogValueToString(threading::Value *val) if( ! val->present ) return ""; - std::ostringstream ostr; - switch(val->type) { case TYPE_BOOL: return (val->val.int_val ? "true" : "false"); case TYPE_INT: + { + std::ostringstream ostr; ostr << val->val.int_val; return ostr.str(); + } case TYPE_COUNT: case TYPE_COUNTER: case TYPE_PORT: + { + std::ostringstream ostr; ostr << val->val.uint_val; return ostr.str(); + } case TYPE_SUBNET: - ostr << Render(val->val.subnet_val); - return ostr.str(); + return Render(val->val.subnet_val); case TYPE_ADDR: - ostr << Render(val->val.addr_val); - return ostr.str(); + return Render(val->val.addr_val); // Note: These two cases are relatively special. We need to convert // these values into their integer equivalents to maximize precision. @@ -57,15 +59,16 @@ std::string DataSeries::LogValueToString(threading::Value *val) case TYPE_TIME: case TYPE_INTERVAL: if ( ds_use_integer_for_time ) + { + std::ostringstream ostr; ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val); + return ostr.str(); + } else - ostr << val->val.double_val; - - return ostr.str(); + return Render(val->val.double_val); case TYPE_DOUBLE: - ostr << val->val.double_val; - return ostr.str(); + return Render(val->val.double_val); case TYPE_ENUM: case TYPE_STRING: @@ -190,10 +193,11 @@ std::string DataSeries::GetDSOptionsForType(const threading::Field *field) case TYPE_TIME: case TYPE_INTERVAL: { - std::string s = "pack_relative=\"" + std::string(field->name) + "\""; + std::string s; + s += "pack_relative=\"" + std::string(field->name) + "\""; if ( ! ds_use_integer_for_time ) - s += " pack_scale=\"1000000\""; + s += " pack_scale=\"1000\" pack_scale_warn=\"no\""; else s += string(" units=\"") + TIME_UNIT() + "\" epoch=\"unix\""; @@ -250,7 +254,7 @@ bool DataSeries::OpenLog(string path) ds_extent_size = ROW_MAX; } - log_output = new OutputModule(*log_file, log_series, *log_type, ds_extent_size); + log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size); return true; } @@ -330,7 +334,7 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con Warning(Fmt("%s is not a valid compression type. Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'. Defaulting to 'any'", ds_compression.c_str())); log_type = log_types.registerTypePtr(schema); - log_series.setType(*log_type); + log_series.setType(log_type); return OpenLog(path); } diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml index 71ad5d70a0..9862ae606f 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml @@ -1,5 +1,5 @@ - + diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out index a12fed36e1..76e7e77c77 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out @@ -20,7 +20,7 @@ test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 dataserie - + @@ -34,17 +34,17 @@ test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 dataserie extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1024 -1.299e+09 10.0.0.2 20 10.0.0.3 0 +1.299467e+09 10.0.0.1 20 10.0.0.2 1024 +1.299471e+09 10.0.0.2 20 10.0.0.3 0 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-04-00-05.ds # Extent Types ... @@ -57,7 +57,7 @@ offset extenttype - + @@ -71,17 +71,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1025 -1.299e+09 10.0.0.2 20 10.0.0.3 1 +1.29947e+09 10.0.0.1 20 10.0.0.2 1025 +1.299474e+09 10.0.0.2 20 10.0.0.3 1 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-05-00-05.ds # Extent Types ... @@ -94,7 +94,7 @@ offset extenttype - + @@ -108,17 +108,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1026 -1.299e+09 10.0.0.2 20 10.0.0.3 2 +1.299474e+09 10.0.0.1 20 10.0.0.2 1026 +1.299478e+09 10.0.0.2 20 10.0.0.3 2 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-06-00-05.ds # Extent Types ... @@ -131,7 +131,7 @@ offset extenttype - + @@ -145,17 +145,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1027 -1.299e+09 10.0.0.2 20 10.0.0.3 3 +1.299478e+09 10.0.0.1 20 10.0.0.2 1027 +1.299482e+09 10.0.0.2 20 10.0.0.3 3 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-07-00-05.ds # Extent Types ... @@ -168,7 +168,7 @@ offset extenttype - + @@ -182,17 +182,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1028 -1.299e+09 10.0.0.2 20 10.0.0.3 4 +1.299481e+09 10.0.0.1 20 10.0.0.2 1028 +1.299485e+09 10.0.0.2 20 10.0.0.3 4 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-08-00-05.ds # Extent Types ... @@ -205,7 +205,7 @@ offset extenttype - + @@ -219,17 +219,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1029 -1.299e+09 10.0.0.2 20 10.0.0.3 5 +1.299485e+09 10.0.0.1 20 10.0.0.2 1029 +1.299489e+09 10.0.0.2 20 10.0.0.3 5 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-09-00-05.ds # Extent Types ... @@ -242,7 +242,7 @@ offset extenttype - + @@ -256,17 +256,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1030 -1.299e+09 10.0.0.2 20 10.0.0.3 6 +1.299488e+09 10.0.0.1 20 10.0.0.2 1030 +1.299492e+09 10.0.0.2 20 10.0.0.3 6 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-10-00-05.ds # Extent Types ... @@ -279,7 +279,7 @@ offset extenttype - + @@ -293,17 +293,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299e+09 10.0.0.1 20 10.0.0.2 1031 -1.299e+09 10.0.0.2 20 10.0.0.3 7 +1.299492e+09 10.0.0.1 20 10.0.0.2 1031 +1.299496e+09 10.0.0.2 20 10.0.0.3 7 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-11-00-05.ds # Extent Types ... @@ -316,7 +316,7 @@ offset extenttype - + @@ -330,17 +330,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.3e+09 10.0.0.1 20 10.0.0.2 1032 -1.3e+09 10.0.0.2 20 10.0.0.3 8 +1.299496e+09 10.0.0.1 20 10.0.0.2 1032 +1.2995e+09 10.0.0.2 20 10.0.0.3 8 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex > test.2011-03-07-12-00-05.ds # Extent Types ... @@ -353,7 +353,7 @@ offset extenttype - + @@ -367,14 +367,14 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.3e+09 10.0.0.1 20 10.0.0.2 1033 -1.3e+09 10.0.0.2 20 10.0.0.3 9 +1.299499e+09 10.0.0.1 20 10.0.0.2 1033 +1.299503e+09 10.0.0.2 20 10.0.0.3 9 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -360 test -468 DataSeries: ExtentIndex +372 test +484 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt index 05026a24ef..8cb1293772 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt @@ -9,7 +9,7 @@ - + @@ -27,8 +27,8 @@ extent offset ExtentType 40 DataSeries: XmlType -400 ssh -604 DataSeries: ExtentIndex +416 ssh +624 DataSeries: ExtentIndex # Extent, type='ssh' t id.orig_h id.orig_p id.resp_h id.resp_p status country X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 success unknown @@ -39,5 +39,5 @@ X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 failure MX # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -400 ssh -604 DataSeries: ExtentIndex +416 ssh +624 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt index e85cf9337e..7a4af6776b 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt @@ -9,7 +9,7 @@ - + @@ -17,7 +17,7 @@ - + @@ -51,46 +51,46 @@ extent offset ExtentType 40 DataSeries: XmlType -660 conn -2564 DataSeries: ExtentIndex +680 conn +2592 DataSeries: ExtentIndex # Extent, type='conn' ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes -1.3e+09 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 -1.3e+09 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 -1.3e+09 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 -1.3e+09 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.3e+09 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.3e+09 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.3e+09 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.3e+09 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.3e+09 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.3e+09 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.3e+09 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.3e+09 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.3e+09 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.3e+09 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.3e+09 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.3e+09 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 0 0 131 SHR F 0 Cd 0 0 1 159 -1.3e+09 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 0 0 198 SHR F 0 Cd 0 0 1 226 -1.3e+09 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 85 0 0 -1.3e+09 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 0 350 0 S0 F 0 D 7 546 0 0 -1.3e+09 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 -1.3e+09 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 -1.3e+09 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 -1.3e+09 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 -1.3e+09 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0 0 0 S0 F 0 D 1 78 0 0 -1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 0 1130 734 S1 F 1130 ShACad 4 216 4 950 -1.3e+09 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 0 0 350 OTH F 0 CdA 1 52 1 402 -1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 0 1178 734 S1 F 1178 ShACad 4 216 4 950 -1.3e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 0 534 412 S1 F 534 ShACad 3 164 3 576 -1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 0 1148 734 S1 F 1148 ShACad 4 216 4 950 -1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 0 1171 733 S1 F 1171 ShACad 4 216 4 949 -1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 0 1137 733 S1 F 1137 ShACad 4 216 4 949 -1.3e+09 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 -1.3e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 0 525 232 S1 F 525 ShACad 3 164 3 396 -1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 0 1125 734 S1 F 1125 ShACad 4 216 4 950 +1.300475e+09 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 +1.300475e+09 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 +1.300475e+09 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 +1.300475e+09 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.300475e+09 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.300475e+09 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.300475e+09 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.300475e+09 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.300475e+09 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.300475e+09 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.300475e+09 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.300475e+09 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.300475e+09 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.300475e+09 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.300475e+09 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.300475e+09 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 0 0 131 SHR F 0 Cd 0 0 1 159 +1.300475e+09 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 0 0 198 SHR F 0 Cd 0 0 1 226 +1.300475e+09 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 85 0 0 +1.300475e+09 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 0 350 0 S0 F 0 D 7 546 0 0 +1.300475e+09 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 +1.300475e+09 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 +1.300475e+09 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 +1.300475e+09 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 +1.300475e+09 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0 0 0 S0 F 0 D 1 78 0 0 +1.300475e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 0 1130 734 S1 F 1130 ShACad 4 216 4 950 +1.300475e+09 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 0 0 350 OTH F 0 CdA 1 52 1 402 +1.300475e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 0 1178 734 S1 F 1178 ShACad 4 216 4 950 +1.300475e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 0 534 412 S1 F 534 ShACad 3 164 3 576 +1.300475e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 0 1148 734 S1 F 1148 ShACad 4 216 4 950 +1.300475e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 0 1171 733 S1 F 1171 ShACad 4 216 4 949 +1.300475e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 0 1137 733 S1 F 1137 ShACad 4 216 4 949 +1.300475e+09 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 +1.300475e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 0 525 232 S1 F 525 ShACad 3 164 3 396 +1.300475e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 0 1125 734 S1 F 1125 ShACad 4 216 4 950 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -660 conn -2564 DataSeries: ExtentIndex +680 conn +2592 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt index a0c6cbbff3..0b16a69a6f 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt @@ -9,7 +9,7 @@ - + @@ -65,26 +65,26 @@ extent offset ExtentType 40 DataSeries: XmlType -768 http -1156 DataSeries: ExtentIndex +784 http +1172 DataSeries: ExtentIndex # Extent, type='http' ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file -1.3e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 -1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 0 0 0 304 Not Modified 0 -1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 +1.300475e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 0 0 0 304 Not Modified 0 +1.300475e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.300475e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -768 http -1156 DataSeries: ExtentIndex +784 http +1172 DataSeries: ExtentIndex From 00b592f933b69079afcab527c8be5387b625fddd Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 14 May 2012 21:38:27 -0700 Subject: [PATCH 14/20] Adding format specifier to DS spec to print out double as %.6f. --- aux/binpac | 2 +- aux/bro-aux | 2 +- aux/broccoli | 2 +- aux/broctl | 2 +- aux/btest | 2 +- cmake | 2 +- src/logging/writers/DataSeries.cc | 7 ++++--- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/aux/binpac b/aux/binpac index 56ae73ab99..dd1a3a95f0 160000 --- a/aux/binpac +++ b/aux/binpac @@ -1 +1 @@ -Subproject commit 56ae73ab995dda665d8918d1a6b3af39b15991e3 +Subproject commit dd1a3a95f07082efcd5274b21104a038d523d132 diff --git a/aux/bro-aux b/aux/bro-aux index 12d32194c1..a59b35bdad 160000 --- a/aux/bro-aux +++ b/aux/bro-aux @@ -1 +1 @@ -Subproject commit 12d32194c19d2dce06818588a2aeccf234de1889 +Subproject commit a59b35bdada8f70fb1a59bf7bb2976534c86d378 diff --git a/aux/broccoli b/aux/broccoli index 60898666ba..a4046c2f79 160000 --- a/aux/broccoli +++ b/aux/broccoli @@ -1 +1 @@ -Subproject commit 60898666ba1df1913c08ad5045b1e56f974060cc +Subproject commit a4046c2f79b6ab0ac19ae8be94b79c6ce578bea7 diff --git a/aux/broctl b/aux/broctl index d50e0efe13..c86b7e990b 160000 --- a/aux/broctl +++ b/aux/broctl @@ -1 +1 @@ -Subproject commit d50e0efe133c50d824753c86d068467e54a3c47d +Subproject commit c86b7e990b4d39cd48c0cb692077aa081b418149 diff --git a/aux/btest b/aux/btest index 1897d224ce..c8e8fe477b 160000 --- a/aux/btest +++ b/aux/btest @@ -1 +1 @@ -Subproject commit 1897d224ce295e91d20e458851759c99734a0a74 +Subproject commit c8e8fe477b5dec635e5ce00f3f764fad069c549c diff --git a/cmake b/cmake index d394eadf12..60b2873937 160000 --- a/cmake +++ b/cmake @@ -1 +1 @@ -Subproject commit d394eadf123f9ff972be4508d34b9614ebcc32a4 +Subproject commit 60b28739379da75f26c5c2a312b7886f5209a1cc diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index bd1da57403..32a93a5dd4 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -194,12 +194,13 @@ std::string DataSeries::GetDSOptionsForType(const threading::Field *field) case TYPE_INTERVAL: { std::string s; - s += "pack_relative=\"" + std::string(field->name) + "\""; + s += "pack_relative=\"" + std::string(field->name) + "\" "; + s += "print_format=\"%.6f\" "; if ( ! ds_use_integer_for_time ) - s += " pack_scale=\"1000\" pack_scale_warn=\"no\""; + s += "pack_scale=\"1000\" pack_scale_warn=\"no\""; else - s += string(" units=\"") + TIME_UNIT() + "\" epoch=\"unix\""; + s += string("units=\"") + TIME_UNIT() + "\" epoch=\"unix\""; return s; } From fabe891d4fbff62831de1dba677d252e984e2b30 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 14 May 2012 21:58:58 -0700 Subject: [PATCH 15/20] Fixing pack_scale and time-as-int. Also removing now unneccessary canonifier script, and updating test baselines. --- src/logging/writers/DataSeries.cc | 7 +- .../ssh.ds.xml | 2 +- .../out | 140 +++++++++--------- .../ssh.ds.txt | 20 +-- .../conn.ds.txt | 80 +++++----- .../http.ds.txt | 38 ++--- .../logging/dataseries/test-logging.bro | 2 +- .../scripts/diff-remove-timestamps-dataseries | 6 - 8 files changed, 144 insertions(+), 151 deletions(-) delete mode 100755 testing/scripts/diff-remove-timestamps-dataseries diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index 32a93a5dd4..a7908a8e04 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -194,13 +194,12 @@ std::string DataSeries::GetDSOptionsForType(const threading::Field *field) case TYPE_INTERVAL: { std::string s; - s += "pack_relative=\"" + std::string(field->name) + "\" "; - s += "print_format=\"%.6f\" "; + s += "pack_relative=\"" + std::string(field->name) + "\""; if ( ! ds_use_integer_for_time ) - s += "pack_scale=\"1000\" pack_scale_warn=\"no\""; + s += " pack_scale=\"1e-6\" print_format=\"%.6f\" pack_scale_warn=\"no\""; else - s += string("units=\"") + TIME_UNIT() + "\" epoch=\"unix\""; + s += string(" units=\"") + TIME_UNIT() + "\" epoch=\"unix\""; return s; } diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml index 9862ae606f..cacc3b0ea4 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml @@ -1,5 +1,5 @@ - + diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out index 76e7e77c77..ed2aff0164 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out @@ -20,7 +20,7 @@ test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 dataserie - + @@ -34,17 +34,17 @@ test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 dataserie extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +508 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299467e+09 10.0.0.1 20 10.0.0.2 1024 -1.299471e+09 10.0.0.2 20 10.0.0.3 0 +1299466805.000000 10.0.0.1 20 10.0.0.2 1024 +1299470395.000000 10.0.0.2 20 10.0.0.3 0 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +508 DataSeries: ExtentIndex > test.2011-03-07-04-00-05.ds # Extent Types ... @@ -57,7 +57,7 @@ offset extenttype - + @@ -71,17 +71,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.29947e+09 10.0.0.1 20 10.0.0.2 1025 -1.299474e+09 10.0.0.2 20 10.0.0.3 1 +1299470405.000000 10.0.0.1 20 10.0.0.2 1025 +1299473995.000000 10.0.0.2 20 10.0.0.3 1 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex > test.2011-03-07-05-00-05.ds # Extent Types ... @@ -94,7 +94,7 @@ offset extenttype - + @@ -108,17 +108,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299474e+09 10.0.0.1 20 10.0.0.2 1026 -1.299478e+09 10.0.0.2 20 10.0.0.3 2 +1299474005.000000 10.0.0.1 20 10.0.0.2 1026 +1299477595.000000 10.0.0.2 20 10.0.0.3 2 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex > test.2011-03-07-06-00-05.ds # Extent Types ... @@ -131,7 +131,7 @@ offset extenttype - + @@ -145,17 +145,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299478e+09 10.0.0.1 20 10.0.0.2 1027 -1.299482e+09 10.0.0.2 20 10.0.0.3 3 +1299477605.000000 10.0.0.1 20 10.0.0.2 1027 +1299481195.000000 10.0.0.2 20 10.0.0.3 3 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex > test.2011-03-07-07-00-05.ds # Extent Types ... @@ -168,7 +168,7 @@ offset extenttype - + @@ -182,17 +182,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +512 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299481e+09 10.0.0.1 20 10.0.0.2 1028 -1.299485e+09 10.0.0.2 20 10.0.0.3 4 +1299481205.000000 10.0.0.1 20 10.0.0.2 1028 +1299484795.000000 10.0.0.2 20 10.0.0.3 4 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +512 DataSeries: ExtentIndex > test.2011-03-07-08-00-05.ds # Extent Types ... @@ -205,7 +205,7 @@ offset extenttype - + @@ -219,17 +219,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299485e+09 10.0.0.1 20 10.0.0.2 1029 -1.299489e+09 10.0.0.2 20 10.0.0.3 5 +1299484805.000000 10.0.0.1 20 10.0.0.2 1029 +1299488395.000000 10.0.0.2 20 10.0.0.3 5 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex > test.2011-03-07-09-00-05.ds # Extent Types ... @@ -242,7 +242,7 @@ offset extenttype - + @@ -256,17 +256,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299488e+09 10.0.0.1 20 10.0.0.2 1030 -1.299492e+09 10.0.0.2 20 10.0.0.3 6 +1299488405.000000 10.0.0.1 20 10.0.0.2 1030 +1299491995.000000 10.0.0.2 20 10.0.0.3 6 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex > test.2011-03-07-10-00-05.ds # Extent Types ... @@ -279,7 +279,7 @@ offset extenttype - + @@ -293,17 +293,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299492e+09 10.0.0.1 20 10.0.0.2 1031 -1.299496e+09 10.0.0.2 20 10.0.0.3 7 +1299492005.000000 10.0.0.1 20 10.0.0.2 1031 +1299495595.000000 10.0.0.2 20 10.0.0.3 7 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex > test.2011-03-07-11-00-05.ds # Extent Types ... @@ -316,7 +316,7 @@ offset extenttype - + @@ -330,17 +330,17 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299496e+09 10.0.0.1 20 10.0.0.2 1032 -1.2995e+09 10.0.0.2 20 10.0.0.3 8 +1299495605.000000 10.0.0.1 20 10.0.0.2 1032 +1299499195.000000 10.0.0.2 20 10.0.0.3 8 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex > test.2011-03-07-12-00-05.ds # Extent Types ... @@ -353,7 +353,7 @@ offset extenttype - + @@ -367,14 +367,14 @@ offset extenttype extent offset ExtentType 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p -1.299499e+09 10.0.0.1 20 10.0.0.2 1033 -1.299503e+09 10.0.0.2 20 10.0.0.3 9 +1299499205.000000 10.0.0.1 20 10.0.0.2 1033 +1299502795.000000 10.0.0.2 20 10.0.0.3 9 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -372 test -484 DataSeries: ExtentIndex +392 test +516 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt index 8cb1293772..245bdcd9be 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt @@ -9,7 +9,7 @@ - + @@ -27,17 +27,17 @@ extent offset ExtentType 40 DataSeries: XmlType -416 ssh -624 DataSeries: ExtentIndex +436 ssh +644 DataSeries: ExtentIndex # Extent, type='ssh' t id.orig_h id.orig_p id.resp_h id.resp_p status country -X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 success unknown -X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 failure US -X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 failure UK -X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 success BR -X.XXXe+09 1.2.3.4 1234 2.3.4.5 80 failure MX +1337058239.030366 1.2.3.4 1234 2.3.4.5 80 success unknown +1337058239.030366 1.2.3.4 1234 2.3.4.5 80 failure US +1337058239.030366 1.2.3.4 1234 2.3.4.5 80 failure UK +1337058239.030366 1.2.3.4 1234 2.3.4.5 80 success BR +1337058239.030366 1.2.3.4 1234 2.3.4.5 80 failure MX # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -416 ssh -624 DataSeries: ExtentIndex +436 ssh +644 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt index 7a4af6776b..104831f027 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt @@ -9,7 +9,7 @@ - + @@ -17,7 +17,7 @@ - + @@ -51,46 +51,46 @@ extent offset ExtentType 40 DataSeries: XmlType -680 conn -2592 DataSeries: ExtentIndex +700 conn +2860 DataSeries: ExtentIndex # Extent, type='conn' ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes -1.300475e+09 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 -1.300475e+09 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 -1.300475e+09 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 -1.300475e+09 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.300475e+09 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.300475e+09 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.300475e+09 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.300475e+09 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.300475e+09 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.300475e+09 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.300475e+09 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.300475e+09 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.300475e+09 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 -1.300475e+09 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 -1.300475e+09 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 -1.300475e+09 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 0 0 131 SHR F 0 Cd 0 0 1 159 -1.300475e+09 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 0 0 198 SHR F 0 Cd 0 0 1 226 -1.300475e+09 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 85 0 0 -1.300475e+09 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 0 350 0 S0 F 0 D 7 546 0 0 -1.300475e+09 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 -1.300475e+09 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 -1.300475e+09 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 -1.300475e+09 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 -1.300475e+09 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0 0 0 S0 F 0 D 1 78 0 0 -1.300475e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 0 1130 734 S1 F 1130 ShACad 4 216 4 950 -1.300475e+09 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 0 0 350 OTH F 0 CdA 1 52 1 402 -1.300475e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 0 1178 734 S1 F 1178 ShACad 4 216 4 950 -1.300475e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 0 534 412 S1 F 534 ShACad 3 164 3 576 -1.300475e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 0 1148 734 S1 F 1148 ShACad 4 216 4 950 -1.300475e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 0 1171 733 S1 F 1171 ShACad 4 216 4 949 -1.300475e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 0 1137 733 S1 F 1137 ShACad 4 216 4 949 -1.300475e+09 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 -1.300475e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 0 525 232 S1 F 525 ShACad 3 164 3 396 -1.300475e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 0 1125 734 S1 F 1125 ShACad 4 216 4 950 +1300475167.096535 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0.000000 0 0 S0 F 0 D 1 73 0 0 +1300475167.097012 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0.000000 0 0 S0 F 0 D 1 199 0 0 +1300475167.099816 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0.000000 0 0 S0 F 0 D 1 179 0 0 +1300475168.853899 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 0.000435 0 89 SHR F 0 Cd 0 0 1 117 +1300475168.854378 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 0.000420 0 99 SHR F 0 Cd 0 0 1 127 +1300475168.854837 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 0.000392 0 183 SHR F 0 Cd 0 0 1 211 +1300475168.857956 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 0.000317 0 89 SHR F 0 Cd 0 0 1 117 +1300475168.858306 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 0.000343 0 99 SHR F 0 Cd 0 0 1 127 +1300475168.858713 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 0.000375 0 183 SHR F 0 Cd 0 0 1 211 +1300475168.891644 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 0.000339 0 89 SHR F 0 Cd 0 0 1 117 +1300475168.892037 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 0.000335 0 99 SHR F 0 Cd 0 0 1 127 +1300475168.892414 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 0.000421 0 183 SHR F 0 Cd 0 0 1 211 +1300475168.893988 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 0.000384 0 89 SHR F 0 Cd 0 0 1 117 +1300475168.894422 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 0.000317 0 99 SHR F 0 Cd 0 0 1 127 +1300475168.894787 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 0.000423 0 183 SHR F 0 Cd 0 0 1 211 +1300475168.901749 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 0.000402 0 131 SHR F 0 Cd 0 0 1 159 +1300475168.902195 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 0.000374 0 198 SHR F 0 Cd 0 0 1 226 +1300475169.899438 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0.000000 0 0 S0 F 0 D 1 85 0 0 +1300475170.862384 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 2.613017 350 0 S0 F 0 D 7 546 0 0 +1300475171.675372 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 0.100096 66 0 S0 F 0 D 2 162 0 0 +1300475171.677081 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 0.100021 66 0 S0 F 0 D 2 122 0 0 +1300475173.116749 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 0.099801 66 0 S0 F 0 D 2 162 0 0 +1300475173.117362 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 0.099849 66 0 S0 F 0 D 2 122 0 0 +1300475173.153679 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0.000000 0 0 S0 F 0 D 1 78 0 0 +1300475168.859163 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 0.215893 1130 734 S1 F 1130 ShACad 4 216 4 950 +1300475168.652003 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 0.061329 0 350 OTH F 0 CdA 1 52 1 402 +1300475168.895267 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 0.227284 1178 734 S1 F 1178 ShACad 4 216 4 950 +1300475168.902635 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 0.120041 534 412 S1 F 534 ShACad 3 164 3 576 +1300475168.892936 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 0.229603 1148 734 S1 F 1148 ShACad 4 216 4 950 +1300475168.855305 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 0.218501 1171 733 S1 F 1171 ShACad 4 216 4 949 +1300475168.892913 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 0.220961 1137 733 S1 F 1137 ShACad 4 216 4 949 +1300475169.780331 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0.000000 0 0 OTH F 0 h 0 0 1 48 +1300475168.724007 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 0.119905 525 232 S1 F 525 ShACad 3 164 3 396 +1300475168.855330 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 0.219720 1125 734 S1 F 1125 ShACad 4 216 4 950 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -680 conn -2592 DataSeries: ExtentIndex +700 conn +2860 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt index 0b16a69a6f..0f1eebd251 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt @@ -9,7 +9,7 @@ - + @@ -65,26 +65,26 @@ extent offset ExtentType 40 DataSeries: XmlType -784 http -1172 DataSeries: ExtentIndex +804 http +1252 DataSeries: ExtentIndex # Extent, type='http' ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file -1.300475e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 -1.300475e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 0 0 0 304 Not Modified 0 -1.300475e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 -1.300475e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475168.843894 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 +1300475168.975800 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475168.976327 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475168.979160 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.012666 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.012730 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.014860 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.022665 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 0 0 0 304 Not Modified 0 +1300475169.036294 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.036798 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.039923 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.074793 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.074938 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1300475169.075065 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 # Extent, type='DataSeries: ExtentIndex' offset extenttype 40 DataSeries: XmlType -784 http -1172 DataSeries: ExtentIndex +804 http +1252 DataSeries: ExtentIndex diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro index d04b0acf44..0c5c52460b 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro @@ -3,7 +3,7 @@ # @TEST-GROUP: dataseries # # @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES -# @TEST-EXEC: ds2txt ssh.ds | ${SCRIPTS}/diff-remove-timestamps-dataseries >ssh.ds.txt +# @TEST-EXEC: ds2txt ssh.ds >ssh.ds.txt # @TEST-EXEC: btest-diff ssh.ds.txt module SSH; diff --git a/testing/scripts/diff-remove-timestamps-dataseries b/testing/scripts/diff-remove-timestamps-dataseries deleted file mode 100755 index 5b20f138af..0000000000 --- a/testing/scripts/diff-remove-timestamps-dataseries +++ /dev/null @@ -1,6 +0,0 @@ -#! /usr/bin/env bash -# -# Replace anything which looks like DataSeries timestamps (which is a double) with XXXs. - -sed 's/1\.[0-9]*e+09/X.XXXe+09/g' - From ac09bae7d59a6794a02ae40197c9ea0c7cb40f1a Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Mon, 14 May 2012 22:14:31 -0700 Subject: [PATCH 16/20] Updating doc. --- doc/logging-dataseries.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/logging-dataseries.rst b/doc/logging-dataseries.rst index 1a5f4ae520..5f1ad7f7c6 100644 --- a/doc/logging-dataseries.rst +++ b/doc/logging-dataseries.rst @@ -72,12 +72,15 @@ With that, Bro will now write all its output into DataSeries files tools, which its installation process will have installed into ``/bin``. For example, to convert a file back into an ASCII representation:: - # ds2txt conn .log + # ds2txt conn.log [... We skip a bunch of meta data here ...] - ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts res - 1.3e+09 9CqElRsB9Q 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 - 1.3e+09 3bNPfUWuIhb fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 - 1.3e+09 ZoDDN7YuYx3 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 + ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes + 1300475167.096535 CRCC5OdDlXe 141.142.220.202 5353 224.0.0.251 5353 udp dns 0.000000 0 0 S0 F 0 D 1 73 0 0 + 1300475167.097012 o7XBsfvo3U1 fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0.000000 0 0 S0 F 0 D 1 199 0 0 + 1300475167.099816 pXPi1kPMgxb 141.142.220.50 5353 224.0.0.251 5353 udp 0.000000 0 0 S0 F 0 D 1 179 0 0 + 1300475168.853899 R7sOc16woCj 141.142.220.118 43927 141.142.2.2 53 udp dns 0.000435 38 89 SF F 0 Dd 1 66 1 117 + 1300475168.854378 Z6dfHVmt0X7 141.142.220.118 37676 141.142.2.2 53 udp dns 0.000420 52 99 SF F 0 Dd 1 80 1 127 + 1300475168.854837 k6T92WxgNAh 141.142.220.118 40526 141.142.2.2 53 udp dns 0.000392 38 183 SF F 0 Dd 1 66 1 211 [...] Note that is ASCII format is *not* equivalent to Bro's default format From 4fe11cf50361321597177e8d78ce25ffa5ea6f1b Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 16 May 2012 17:54:38 -0700 Subject: [PATCH 17/20] Extending DS docs with some examples. --- doc/logging-dataseries.rst | 75 ++++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 7 deletions(-) diff --git a/doc/logging-dataseries.rst b/doc/logging-dataseries.rst index 5f1ad7f7c6..8c797dd46c 100644 --- a/doc/logging-dataseries.rst +++ b/doc/logging-dataseries.rst @@ -62,7 +62,7 @@ Activating DataSeries The direct way to use DataSeries is to switch *all* log files over to the binary format. To do that, just add ``redef -Log::default_writer=Log::WRITER_DATASERIES;`` to your ``local.bro`. +Log::default_writer=Log::WRITER_DATASERIES;`` to your ``local.bro``. For testing, you can also just pass that on the command line:: bro -r trace.pcap Log::default_writer=Log::WRITER_DATASERIES @@ -72,7 +72,8 @@ With that, Bro will now write all its output into DataSeries files tools, which its installation process will have installed into ``/bin``. For example, to convert a file back into an ASCII representation:: - # ds2txt conn.log + + $ ds2txt conn.log [... We skip a bunch of meta data here ...] ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes 1300475167.096535 CRCC5OdDlXe 141.142.220.202 5353 224.0.0.251 5353 udp dns 0.000000 0 0 S0 F 0 D 1 73 0 0 @@ -83,13 +84,22 @@ representation:: 1300475168.854837 k6T92WxgNAh 141.142.220.118 40526 141.142.2.2 53 udp dns 0.000392 38 183 SF F 0 Dd 1 66 1 211 [...] +(``--skip-all`` suppresses the meta data.) + Note that is ASCII format is *not* equivalent to Bro's default format as DataSeries uses a different internal representation. You can also switch only individual files over to DataSeries by adding code like this to your ``local.bro``:: - TODO +.. code:: bro + + event bro_init() + { + local f = Log::get_filter(Conn::LOG, "default"); # Get default filter for connection log. + f$writer = Log::WRITER_DATASERIES; # Change writer type. + Log::add_filter(Conn::LOG, f); # Replace filter with adapted version. + } Bro's DataSeries writer comes with a few tuning options, see :doc:`scripts/base/frameworks/logging/writers/dataseries`. @@ -100,9 +110,60 @@ Working with DataSeries Here are few examples of using DataSeries command line tools to work with the output files. -TODO. +* Printing CSV:: -TODO -==== + $ ds2txt --csv conn.log + ts,uid,id.orig_h,id.orig_p,id.resp_h,id.resp_p,proto,service,duration,orig_bytes,resp_bytes,conn_state,local_orig,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes + 1258790493.773208,ZTtgbHvf4s3,192.168.1.104,137,192.168.1.255,137,udp,dns,3.748891,350,0,S0,F,0,D,7,546,0,0 + 1258790451.402091,pOY6Rw7lhUd,192.168.1.106,138,192.168.1.255,138,udp,,0.000000,0,0,S0,F,0,D,1,229,0,0 + 1258790493.787448,pn5IiEslca9,192.168.1.104,138,192.168.1.255,138,udp,,2.243339,348,0,S0,F,0,D,2,404,0,0 + 1258790615.268111,D9slyIu3hFj,192.168.1.106,137,192.168.1.255,137,udp,dns,3.764626,350,0,S0,F,0,D,7,546,0,0 + [...] -* Do we have a leak? + Add ``--separator=X`` to set a different separator. + +* Extracting a subset of columns:: + + $ ds2txt --select '*' ts,id.resp_h,id.resp_p --skip-all conn.log + 1258790493.773208 192.168.1.255 137 + 1258790451.402091 192.168.1.255 138 + 1258790493.787448 192.168.1.255 138 + 1258790615.268111 192.168.1.255 137 + 1258790615.289842 192.168.1.255 138 + [...] + +* Filtering rows:: + + $ ds2txt --where '*' 'duration > 5 && id.resp_p > 1024' --skip-all conn.ds + 1258790631.532888 V8mV5WLITu5 192.168.1.105 55890 239.255.255.250 1900 udp 15.004568 798 0 S0 F 0 D 6 966 0 0 + 1258792413.439596 tMcWVWQptvd 192.168.1.105 55890 239.255.255.250 1900 udp 15.004581 798 0 S0 F 0 D 6 966 0 0 + 1258794195.346127 cQwQMRdBrKa 192.168.1.105 55890 239.255.255.250 1900 udp 15.005071 798 0 S0 F 0 D 6 966 0 0 + 1258795977.253200 i8TEjhWd2W8 192.168.1.105 55890 239.255.255.250 1900 udp 15.004824 798 0 S0 F 0 D 6 966 0 0 + 1258797759.160217 MsLsBA8Ia49 192.168.1.105 55890 239.255.255.250 1900 udp 15.005078 798 0 S0 F 0 D 6 966 0 0 + 1258799541.068452 TsOxRWJRGwf 192.168.1.105 55890 239.255.255.250 1900 udp 15.004082 798 0 S0 F 0 D 6 966 0 0 + [...] + +* Calculate some statistics: + + Mean/stdev/min/max over a column:: + + $ dsstatgroupby '*' basic duration from conn.ds + # Begin DSStatGroupByModule + # processed 2159 rows, where clause eliminated 0 rows + # count(*), mean(duration), stddev, min, max + 2159, 42.7938, 1858.34, 0, 86370 + [...] + + Quantiles of total connection volume:: + + > dsstatgroupby '*' quantile 'orig_bytes + resp_bytes' from conn.ds + [...] + 2159 data points, mean 24616 +- 343295 [0,1.26615e+07] + quantiles about every 216 data points: + 10%: 0, 124, 317, 348, 350, 350, 601, 798, 1469 + tails: 90%: 1469, 95%: 7302, 99%: 242629, 99.5%: 1226262 + [...] + +The ``man`` pages for these tool show further options, and their +``-h`` option gives some more information (either can be a bit cryptic +unfortunately though). From 99db264775cfc2d1aae5f0c8cd264deb37f497e3 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 16 May 2012 18:00:44 -0700 Subject: [PATCH 18/20] Portability fixes. - Fix for time-as-int on 32-bit systems. - Skipping ds2txt's index output for test diffing, as it seems non-portable. --- src/logging/writers/DataSeries.cc | 2 +- .../out | 90 ------------------- .../ssh.ds.txt | 19 ++-- .../conn.ds.txt | 9 -- .../conn.ds.txt | 9 -- .../http.ds.txt | 9 -- .../btest/core/leaks/dataseries-rotate.bro | 1 + testing/btest/core/leaks/dataseries.bro | 1 + .../frameworks/logging/dataseries/rotate.bro | 2 +- .../logging/dataseries/test-logging.bro | 2 +- .../logging/dataseries/time-as-int.bro | 2 +- .../logging/dataseries/wikipedia.bro | 4 +- 12 files changed, 13 insertions(+), 137 deletions(-) diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index a7908a8e04..9f19028be3 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -61,7 +61,7 @@ std::string DataSeries::LogValueToString(threading::Value *val) if ( ds_use_integer_for_time ) { std::ostringstream ostr; - ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val); + ostr << (uint64_t)(DataSeries::TIME_SCALE * val->val.double_val); return ostr.str(); } else diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out index ed2aff0164..1e5e1b05c6 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out @@ -32,19 +32,10 @@ test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 dataserie -extent offset ExtentType -40 DataSeries: XmlType -392 test -508 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299466805.000000 10.0.0.1 20 10.0.0.2 1024 1299470395.000000 10.0.0.2 20 10.0.0.3 0 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -508 DataSeries: ExtentIndex > test.2011-03-07-04-00-05.ds # Extent Types ... @@ -69,19 +60,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299470405.000000 10.0.0.1 20 10.0.0.2 1025 1299473995.000000 10.0.0.2 20 10.0.0.3 1 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex > test.2011-03-07-05-00-05.ds # Extent Types ... @@ -106,19 +88,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299474005.000000 10.0.0.1 20 10.0.0.2 1026 1299477595.000000 10.0.0.2 20 10.0.0.3 2 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex > test.2011-03-07-06-00-05.ds # Extent Types ... @@ -143,19 +116,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299477605.000000 10.0.0.1 20 10.0.0.2 1027 1299481195.000000 10.0.0.2 20 10.0.0.3 3 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex > test.2011-03-07-07-00-05.ds # Extent Types ... @@ -180,19 +144,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -512 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299481205.000000 10.0.0.1 20 10.0.0.2 1028 1299484795.000000 10.0.0.2 20 10.0.0.3 4 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -512 DataSeries: ExtentIndex > test.2011-03-07-08-00-05.ds # Extent Types ... @@ -217,19 +172,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299484805.000000 10.0.0.1 20 10.0.0.2 1029 1299488395.000000 10.0.0.2 20 10.0.0.3 5 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex > test.2011-03-07-09-00-05.ds # Extent Types ... @@ -254,19 +200,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299488405.000000 10.0.0.1 20 10.0.0.2 1030 1299491995.000000 10.0.0.2 20 10.0.0.3 6 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex > test.2011-03-07-10-00-05.ds # Extent Types ... @@ -291,19 +228,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299492005.000000 10.0.0.1 20 10.0.0.2 1031 1299495595.000000 10.0.0.2 20 10.0.0.3 7 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex > test.2011-03-07-11-00-05.ds # Extent Types ... @@ -328,19 +256,10 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299495605.000000 10.0.0.1 20 10.0.0.2 1032 1299499195.000000 10.0.0.2 20 10.0.0.3 8 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex > test.2011-03-07-12-00-05.ds # Extent Types ... @@ -365,16 +284,7 @@ offset extenttype -extent offset ExtentType -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex # Extent, type='test' t id.orig_h id.orig_p id.resp_h id.resp_p 1299499205.000000 10.0.0.1 20 10.0.0.2 1033 1299502795.000000 10.0.0.2 20 10.0.0.3 9 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -392 test -516 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt index 245bdcd9be..e9640dfd9d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt @@ -25,19 +25,10 @@ -extent offset ExtentType -40 DataSeries: XmlType -436 ssh -644 DataSeries: ExtentIndex # Extent, type='ssh' t id.orig_h id.orig_p id.resp_h id.resp_p status country -1337058239.030366 1.2.3.4 1234 2.3.4.5 80 success unknown -1337058239.030366 1.2.3.4 1234 2.3.4.5 80 failure US -1337058239.030366 1.2.3.4 1234 2.3.4.5 80 failure UK -1337058239.030366 1.2.3.4 1234 2.3.4.5 80 success BR -1337058239.030366 1.2.3.4 1234 2.3.4.5 80 failure MX -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -436 ssh -644 DataSeries: ExtentIndex +1337216256.956476 1.2.3.4 1234 2.3.4.5 80 success unknown +1337216256.956476 1.2.3.4 1234 2.3.4.5 80 failure US +1337216256.956476 1.2.3.4 1234 2.3.4.5 80 failure UK +1337216256.956476 1.2.3.4 1234 2.3.4.5 80 success BR +1337216256.956476 1.2.3.4 1234 2.3.4.5 80 failure MX diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt index 65d4ba0a67..1d7cba3b3c 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt @@ -49,10 +49,6 @@ -extent offset ExtentType -40 DataSeries: XmlType -672 conn -2948 DataSeries: ExtentIndex # Extent, type='conn' ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes 1300475167096535 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 @@ -89,8 +85,3 @@ ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes 1300475169780331 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 1300475168724007 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 119904 525 232 S1 F 525 ShACad 3 164 3 396 1300475168855330 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 219720 1125 734 S1 F 1125 ShACad 4 216 4 950 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -672 conn -2948 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt index 104831f027..3cafa078de 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt @@ -49,10 +49,6 @@ -extent offset ExtentType -40 DataSeries: XmlType -700 conn -2860 DataSeries: ExtentIndex # Extent, type='conn' ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes 1300475167.096535 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0.000000 0 0 S0 F 0 D 1 73 0 0 @@ -89,8 +85,3 @@ ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes 1300475169.780331 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0.000000 0 0 OTH F 0 h 0 0 1 48 1300475168.724007 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 0.119905 525 232 S1 F 525 ShACad 3 164 3 396 1300475168.855330 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 0.219720 1125 734 S1 F 1125 ShACad 4 216 4 950 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -700 conn -2860 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt index 0f1eebd251..adb7bb3f7b 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt @@ -63,10 +63,6 @@ -extent offset ExtentType -40 DataSeries: XmlType -804 http -1252 DataSeries: ExtentIndex # Extent, type='http' ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file 1300475168.843894 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 @@ -83,8 +79,3 @@ ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri refer 1300475169.074793 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 1300475169.074938 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 1300475169.075065 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 -# Extent, type='DataSeries: ExtentIndex' -offset extenttype -40 DataSeries: XmlType -804 http -1252 DataSeries: ExtentIndex diff --git a/testing/btest/core/leaks/dataseries-rotate.bro b/testing/btest/core/leaks/dataseries-rotate.bro index 188de9717b..1afc517d1a 100644 --- a/testing/btest/core/leaks/dataseries-rotate.bro +++ b/testing/btest/core/leaks/dataseries-rotate.bro @@ -3,6 +3,7 @@ # @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks # # @TEST-GROUP: leaks +# @TEST-GROUP: dataseries # # @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local bro -m -b -r %DIR/../rotation.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES diff --git a/testing/btest/core/leaks/dataseries.bro b/testing/btest/core/leaks/dataseries.bro index 886ee54dd9..01dc3ffd79 100644 --- a/testing/btest/core/leaks/dataseries.bro +++ b/testing/btest/core/leaks/dataseries.bro @@ -4,6 +4,7 @@ # @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks # # @TEST-GROUP: leaks +# @TEST-GROUP: dataseries # # @TEST-REQUIRES: bro --help 2>&1 | grep -q mem-leaks # @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local bro -m -r $TRACES/wikipedia.trace Log::default_writer=Log::WRITER_DATASERIES diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro index 6a0cee5888..652a4596fb 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro @@ -3,7 +3,7 @@ # @TEST-GROUP: dataseries # # @TEST-EXEC: bro -b -r %DIR/../rotation.trace %INPUT 2>&1 Log::default_writer=Log::WRITER_DATASERIES | grep "test" >out -# @TEST-EXEC: for i in test.*.ds; do printf '> %s\n' $i; ds2txt $i; done >>out +# @TEST-EXEC: for i in test.*.ds; do printf '> %s\n' $i; ds2txt --skip-index $i; done >>out # @TEST-EXEC: btest-diff out module Test; diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro index 0c5c52460b..ee0426ae55 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro @@ -3,7 +3,7 @@ # @TEST-GROUP: dataseries # # @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES -# @TEST-EXEC: ds2txt ssh.ds >ssh.ds.txt +# @TEST-EXEC: ds2txt --skip-index ssh.ds >ssh.ds.txt # @TEST-EXEC: btest-diff ssh.ds.txt module SSH; diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro index e4dd6a5431..5e3f864b33 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro @@ -3,7 +3,7 @@ # @TEST-GROUP: dataseries # # @TEST-EXEC: bro -r $TRACES/wikipedia.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES -# @TEST-EXEC: ds2txt conn.ds >conn.ds.txt +# @TEST-EXEC: ds2txt --skip-index conn.ds >conn.ds.txt # @TEST-EXEC: btest-diff conn.ds.txt redef LogDataSeries::use_integer_for_time = T; diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro index 38726a8b10..ee1342c470 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro @@ -3,7 +3,7 @@ # @TEST-GROUP: dataseries # # @TEST-EXEC: bro -r $TRACES/wikipedia.trace Log::default_writer=Log::WRITER_DATASERIES -# @TEST-EXEC: ds2txt conn.ds >conn.ds.txt -# @TEST-EXEC: ds2txt http.ds >http.ds.txt +# @TEST-EXEC: ds2txt --skip-index conn.ds >conn.ds.txt +# @TEST-EXEC: ds2txt --skip-index http.ds >http.ds.txt # @TEST-EXEC: btest-diff conn.ds.txt # @TEST-EXEC: btest-diff http.ds.txt From 5dae925f670c1f9976ae6a344fb60293f34a6df2 Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 16 May 2012 18:24:55 -0700 Subject: [PATCH 19/20] Fixing a rotation race condition at termination. Noticed with DS, but could just as well happen with ASCII. --- src/logging/Manager.cc | 13 +++++++++++++ src/logging/Manager.h | 1 + src/threading/Manager.h | 6 ++++++ 3 files changed, 20 insertions(+) diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index 7f785e1080..a4dea1c909 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -7,6 +7,7 @@ #include "../NetVar.h" #include "../Net.h" +#include "threading/Manager.h" #include "threading/SerialTypes.h" #include "Manager.h" @@ -124,6 +125,7 @@ Manager::Stream::~Stream() Manager::Manager() { + rotations_pending = 0; } Manager::~Manager() @@ -1127,6 +1129,13 @@ bool Manager::Flush(EnumVal* id) void Manager::Terminate() { + // Make sure we process all the pending rotations. + while ( rotations_pending ) + { + thread_mgr->ForceProcessing(); // A blatant layering violation ... + usleep(1000); + } + for ( vector::iterator s = streams.begin(); s != streams.end(); ++s ) { if ( ! *s ) @@ -1235,6 +1244,8 @@ void Manager::Rotate(WriterInfo* winfo) // Trigger the rotation. winfo->writer->Rotate(tmp, winfo->open_time, network_time, terminating); + + ++rotations_pending; } bool Manager::FinishedRotation(WriterFrontend* writer, string new_name, string old_name, @@ -1243,6 +1254,8 @@ bool Manager::FinishedRotation(WriterFrontend* writer, string new_name, string o DBG_LOG(DBG_LOGGING, "Finished rotating %s at %.6f, new name %s", writer->Path().c_str(), network_time, new_name.c_str()); + --rotations_pending; + WriterInfo* winfo = FindWriter(writer); if ( ! winfo ) return true; diff --git a/src/logging/Manager.h b/src/logging/Manager.h index 5af3e55b4a..f5e62b0683 100644 --- a/src/logging/Manager.h +++ b/src/logging/Manager.h @@ -200,6 +200,7 @@ private: WriterInfo* FindWriter(WriterFrontend* writer); vector streams; // Indexed by stream enum. + int rotations_pending; // Number of rotations not yet finished. }; } diff --git a/src/threading/Manager.h b/src/threading/Manager.h index 7d9ba766d4..ab8189f39d 100644 --- a/src/threading/Manager.h +++ b/src/threading/Manager.h @@ -77,6 +77,12 @@ public: */ int NumThreads() const { return all_threads.size(); } + /** Manually triggers processing of any thread input. This can be useful + * if the main thread is waiting for a specific message from a child. + * Usually, though, one should avoid using it. + */ + void ForceProcessing() { Process(); } + protected: friend class BasicThread; friend class MsgThread; From 122f6ee4c64b46cd5264ea0964ba366ddc73446c Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Wed, 16 May 2012 18:28:51 -0700 Subject: [PATCH 20/20] Moving trace for rotation test into traces directory. --- .../frameworks/logging => Traces}/rotation.trace | Bin testing/btest/core/leaks/dataseries-rotate.bro | 2 +- .../base/frameworks/logging/dataseries/rotate.bro | 2 +- .../scripts/base/frameworks/logging/rotate.bro | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename testing/btest/{scripts/base/frameworks/logging => Traces}/rotation.trace (100%) diff --git a/testing/btest/scripts/base/frameworks/logging/rotation.trace b/testing/btest/Traces/rotation.trace similarity index 100% rename from testing/btest/scripts/base/frameworks/logging/rotation.trace rename to testing/btest/Traces/rotation.trace diff --git a/testing/btest/core/leaks/dataseries-rotate.bro b/testing/btest/core/leaks/dataseries-rotate.bro index 1afc517d1a..f0a5f3079d 100644 --- a/testing/btest/core/leaks/dataseries-rotate.bro +++ b/testing/btest/core/leaks/dataseries-rotate.bro @@ -5,7 +5,7 @@ # @TEST-GROUP: leaks # @TEST-GROUP: dataseries # -# @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local bro -m -b -r %DIR/../rotation.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: HEAP_CHECK_DUMP_DIRECTORY=. HEAPCHECK=local bro -m -b -r $TRACES/rotation.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES module Test; diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro index 652a4596fb..7b708473e3 100644 --- a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro @@ -2,7 +2,7 @@ # @TEST-REQUIRES: has-writer DataSeries && which ds2txt # @TEST-GROUP: dataseries # -# @TEST-EXEC: bro -b -r %DIR/../rotation.trace %INPUT 2>&1 Log::default_writer=Log::WRITER_DATASERIES | grep "test" >out +# @TEST-EXEC: bro -b -r ${TRACES}/rotation.trace %INPUT 2>&1 Log::default_writer=Log::WRITER_DATASERIES | grep "test" >out # @TEST-EXEC: for i in test.*.ds; do printf '> %s\n' $i; ds2txt --skip-index $i; done >>out # @TEST-EXEC: btest-diff out diff --git a/testing/btest/scripts/base/frameworks/logging/rotate.bro b/testing/btest/scripts/base/frameworks/logging/rotate.bro index 14123c56c6..212dba3bf7 100644 --- a/testing/btest/scripts/base/frameworks/logging/rotate.bro +++ b/testing/btest/scripts/base/frameworks/logging/rotate.bro @@ -1,5 +1,5 @@ # -# @TEST-EXEC: bro -b -r %DIR/rotation.trace %INPUT 2>&1 | grep "test" >out +# @TEST-EXEC: bro -b -r ${TRACES}/rotation.trace %INPUT 2>&1 | grep "test" >out # @TEST-EXEC: for i in test.*.log; do printf '> %s\n' $i; cat $i; done >>out # @TEST-EXEC: btest-diff out