diff --git a/scripts/base/frameworks/logging/writers/dataseries.bro b/scripts/base/frameworks/logging/writers/dataseries.bro index c8ba922d2a..daf59ebf42 100644 --- a/scripts/base/frameworks/logging/writers/dataseries.bro +++ b/scripts/base/frameworks/logging/writers/dataseries.bro @@ -10,18 +10,18 @@ export { ## 'lzo' -- LZO compression. Very fast decompression times. ## 'gz' -- GZIP compression. Slower than LZF, but also produces smaller output. ## 'bz2' -- BZIP2 compression. Slower than GZIP, but also produces smaller output. - const ds_compression = "lzf" &redef; + const compression = "lzf" &redef; ## The extent buffer size. ## Larger values here lead to better compression and more efficient writes, but ## also increases the lag between the time events are received and the time they ## are actually written to disk. - const ds_extent_size = 65536 &redef; + const extent_size = 65536 &redef; ## Should we dump the XML schema we use for this ds file to disk? ## If yes, the XML schema shares the name of the logfile, but has ## an XML ending. - const ds_dump_schema = T &redef; + const dump_schema = F &redef; ## How many threads should DataSeries spawn to perform compression? ## Note that this dictates the number of threads per log stream. If @@ -31,7 +31,7 @@ export { ## Default value is 1, which will spawn one thread / core / stream. ## ## MAX is 128, MIN is 1. - const ds_num_threads = 1 &redef; + const num_threads = 1 &redef; ## Should time be stored as an integer or a double? ## Storing time as a double leads to possible precision issues and @@ -41,7 +41,7 @@ export { ## when working with the raw DataSeries format. ## ## Double timestamps are used by default. - const ds_use_integer = F &redef; + const use_integer_for_time = F &redef; } # Default function to postprocess a rotated DataSeries log file. It moves the diff --git a/src/logging.bif b/src/logging.bif index 6e66de8772..efc6ed0b4b 100644 --- a/src/logging.bif +++ b/src/logging.bif @@ -76,7 +76,8 @@ const unset_field: string; module LogDataSeries; -const ds_compression: string; -const ds_extent_size: count; -const ds_dump_schema: bool; -const ds_num_threads: count; +const compression: string; +const extent_size: count; +const dump_schema: bool; +const use_integer_for_time: bool; +const num_threads: count; diff --git a/src/logging/writers/DataSeries.cc b/src/logging/writers/DataSeries.cc index 27c4cd6009..5ee8a812da 100644 --- a/src/logging/writers/DataSeries.cc +++ b/src/logging/writers/DataSeries.cc @@ -14,78 +14,6 @@ using namespace logging; using namespace writer; -// NOTE: Naming conventions are a little bit scattershot at the moment. -// Within the scope of this file, a function name prefixed by '_' denotes a -// static function. - -// ************************ LOCAL PROTOTYPES ********************************* - -struct SchemaValue; - -/** - * Turns a log value into a std::string. Uses an ostringstream to do the - * heavy lifting, but still need to switch on the type to know which value - * in the union to give to the string string for processing. - * - * @param val The value we wish to convert to a string - * @return the string value of val - */ -static std::string _LogValueToString(threading::Value* val); - -/** - * Takes a field type and converts it to a relevant DataSeries type. - * - * @param field We extract the type from this and convert it into a relevant DS type. - * @return String representation of type that DataSeries can understand. - */ -static string _GetDSFieldType(const threading::Field* field); - -/** - * Takes a field type and converts it to a readable string. - * - * @param field We extract the type from this and convert it into a readable string. - * @return String representation of the field's type - */ -static string _GetBroTypeString(const threading::Field *field); - -/** - * Takes a list of types, a list of names, and a title, and uses it to construct a valid DataSeries XML schema - * thing, which is then returned as a std::string - * - * @param opts std::vector of strings containing a list of options to be appended to each field (e.g. "pack_relative=yes") - * @param sTitle Name of this schema. Ideally, these schemas would be aggregated and re-used. - */ -static string _BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle); - -/** - * Are there any options we should put into the XML schema? - * - * @param field We extract the type from this and return any options that make sense for that type. - * @return Options that can be added directly to the XML (e.g. "pack_relative=\"yes\"") - */ -static std::string _GetDSOptionsForType(const threading::Field *field); - -/** - * Internal helper structure; populate a vector of these which is passed to the XML generator for its use. - */ -struct SchemaValue -{ - string ds_type; - string bro_type; - string field_name; - string field_options; - - SchemaValue(const threading::Field *field) - { - ds_type = _GetDSFieldType(field); - field_name = string(field->name); - field_options = _GetDSOptionsForType(field); - bro_type = _GetBroTypeString(field); - } -}; - -// ************************ LOCAL IMPL ********************************* - std::string DataSeries::LogValueToString(threading::Value *val) { const int strsz = 1024; @@ -127,7 +55,11 @@ std::string DataSeries::LogValueToString(threading::Value *val) // in the near-term, this *should* lead to better pack_relative (and thus smaller output files). case TYPE_TIME: case TYPE_INTERVAL: - ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val); + if ( ds_use_integer_for_time ) + ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val); + else + ostr << val->val.double_val; + return ostr.str(); case TYPE_DOUBLE: @@ -186,7 +118,7 @@ std::string DataSeries::LogValueToString(threading::Value *val) } } -static string _GetDSFieldType(const threading::Field *field) +string DataSeries::GetDSFieldType(const threading::Field *field) { switch(field->type) { @@ -197,13 +129,15 @@ static string _GetDSFieldType(const threading::Field *field) case TYPE_COUNTER: case TYPE_PORT: case TYPE_INT: - case TYPE_TIME: - case TYPE_INTERVAL: return "int64"; case TYPE_DOUBLE: return "double"; + case TYPE_TIME: + case TYPE_INTERVAL: + return ds_use_integer_for_time ? "int64" : "double"; + case TYPE_SUBNET: case TYPE_ADDR: case TYPE_ENUM: @@ -217,7 +151,7 @@ static string _GetDSFieldType(const threading::Field *field) } } -static string _GetBroTypeString(const threading::Field *field) +string DataSeries::GetBroTypeString(const threading::Field *field) { switch(field->type) { @@ -256,7 +190,7 @@ static string _GetBroTypeString(const threading::Field *field) } } -static string _BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle) +string DataSeries::BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle) { if("" == sTitle) { @@ -276,13 +210,21 @@ static string _BuildDSSchemaFromFieldTypes(const vector& vals, stri return xmlschema; } -static std::string _GetDSOptionsForType(const threading::Field *field) +std::string DataSeries::GetDSOptionsForType(const threading::Field *field) { switch(field->type) { case TYPE_TIME: case TYPE_INTERVAL: - return "pack_relative=\"" + std::string(field->name) + "\""; + { + std::string s = "pack_relative=\"" + std::string(field->name) + "\""; + + if ( ! ds_use_integer_for_time ) + s += " pack_scale=\"1000000\""; + + return s; + } + case TYPE_SUBNET: case TYPE_ADDR: case TYPE_ENUM: @@ -300,16 +242,40 @@ static std::string _GetDSOptionsForType(const threading::Field *field) DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend) { - ds_compression = string((const char *)BifConst::LogDataSeries::ds_compression->Bytes(), BifConst::LogDataSeries::ds_compression->Len()); - ds_dump_schema = BifConst::LogDataSeries::ds_dump_schema; - ds_extent_size = BifConst::LogDataSeries::ds_extent_size; - ds_num_threads = BifConst::LogDataSeries::ds_num_threads; + ds_compression = string((const char *)BifConst::LogDataSeries::compression->Bytes(), BifConst::LogDataSeries::compression->Len()); + ds_dump_schema = BifConst::LogDataSeries::dump_schema; + ds_extent_size = BifConst::LogDataSeries::extent_size; + ds_num_threads = BifConst::LogDataSeries::num_threads; + ds_use_integer_for_time = BifConst::LogDataSeries::use_integer_for_time; } DataSeries::~DataSeries() { } +bool DataSeries::OpenLog(string path) + { + log_file = new DataSeriesSink(path + ".ds", compress_type); + log_file->writeExtentLibrary(log_types); + + for(size_t i = 0; i < schema_list.size(); ++i) + extents.insert(std::make_pair(schema_list[i].field_name, GeneralField::create(log_series, schema_list[i].field_name))); + + if(ds_extent_size < ROW_MIN) + { + fprintf(stderr, "%d is not a valid value for 'rows'. Using min of %d instead.\n", (int)ds_extent_size, (int)ROW_MIN); + ds_extent_size = ROW_MIN; + } + else if(ds_extent_size > ROW_MAX) + { + fprintf(stderr, "%d is not a valid value for 'rows'. Using max of %d instead.\n", (int)ds_extent_size, (int)ROW_MAX); + ds_extent_size = ROW_MAX; + } + log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size); + + return true; + } + bool DataSeries::DoInit(string path, int num_fields, const threading::Field* const * fields) { // We first construct an XML schema thing (and, if ds_dump_schema is @@ -333,14 +299,18 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con { DataSeriesSink::setCompressorCount(ds_num_threads); } - vector schema_list; + for ( int i = 0; i < num_fields; i++ ) { const threading::Field* field = fields[i]; - SchemaValue val(field); + SchemaValue val; + val.ds_type = GetDSFieldType(field); + val.field_name = string(field->name); + val.field_options = GetDSOptionsForType(field); + val.bro_type = GetBroTypeString(field); schema_list.push_back(val); } - string schema = _BuildDSSchemaFromFieldTypes(schema_list, path); + string schema = BuildDSSchemaFromFieldTypes(schema_list, path); if(ds_dump_schema) { FILE * pFile; @@ -353,7 +323,7 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con fclose (pFile); } - int compress_type = Extent::compress_all; + compress_type = Extent::compress_all; if(ds_compression == "lzf") { @@ -385,28 +355,11 @@ bool DataSeries::DoInit(string path, int num_fields, const threading::Field* con fprintf(stderr, "Defaulting to 'any'\n"); } - log_type = const_cast(log_types.registerType(schema)); + log_type = const_cast(log_types.registerType(schema)); log_series.setType(*log_type); - log_file = new DataSeriesSink(path + ".ds", compress_type); - log_file->writeExtentLibrary(log_types); - for(size_t i = 0; i < schema_list.size(); ++i) - extents.insert(std::make_pair(schema_list[i].field_name, GeneralField::create(log_series, schema_list[i].field_name))); - - if(ds_extent_size < ROW_MIN) - { - fprintf(stderr, "%d is not a valid value for 'rows'. Using min of %d instead.\n", (int)ds_extent_size, (int)ROW_MIN); - ds_extent_size = ROW_MIN; - } - else if(ds_extent_size > ROW_MAX) - { - fprintf(stderr, "%d is not a valid value for 'rows'. Using max of %d instead.\n", (int)ds_extent_size, (int)ROW_MAX); - ds_extent_size = ROW_MAX; - } - log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size); - - return true; + return OpenLog(path); } @@ -416,18 +369,26 @@ bool DataSeries::DoFlush() return true; } -bool DataSeries::DoFinish() -{ - for(ExtentIterator iter = extents.begin(); - iter != extents.end(); ++iter) - { +void DataSeries::CloseLog() + { + for( ExtentIterator iter = extents.begin(); iter != extents.end(); ++iter ) delete iter->second; - } + extents.clear(); - // Don't delete the file before you delete the output, or bad things happen. + + // Don't delete the file before you delete the output, or bad things + // happen. delete log_output; delete log_file; + log_output = 0; + log_file = 0; + } + +bool DataSeries::DoFinish() +{ + CloseLog(); + return WriterBackend::DoFinish(); } @@ -453,8 +414,7 @@ bool DataSeries::DoWrite(int num_fields, const threading::Field* const * fields, bool DataSeries::DoRotate(string rotated_path, double open, double close, bool terminating) { // Note that if DS files are rotated too often, the aggregate log size will be (much) larger. - - DoFinish(); + CloseLog(); string dsname = Path() + ".ds"; string nname = rotated_path + ".ds"; @@ -466,7 +426,7 @@ bool DataSeries::DoRotate(string rotated_path, double open, double close, bool t return false; } - return DoInit(Path(), NumFields(), Fields()); + return OpenLog(Path()); } bool DataSeries::DoSetBuf(bool enabled) diff --git a/src/logging/writers/DataSeries.h b/src/logging/writers/DataSeries.h index 5331975937..319cb72ec5 100644 --- a/src/logging/writers/DataSeries.h +++ b/src/logging/writers/DataSeries.h @@ -42,24 +42,83 @@ private: static const size_t THREAD_MAX = 128; // Maximum number of compression threads that DataSeries may spawn. static const size_t TIME_SCALE = 1000000; // Fixed-point multiplier for time values when converted to integers. + struct SchemaValue + { + string ds_type; + string bro_type; + string field_name; + string field_options; + }; + + /** + * Turns a log value into a std::string. Uses an ostringstream to do the + * heavy lifting, but still need to switch on the type to know which value + * in the union to give to the string string for processing. + * + * @param val The value we wish to convert to a string + * @return the string value of val + */ std::string LogValueToString(threading::Value *val); + /** + * Takes a field type and converts it to a relevant DataSeries type. + * + * @param field We extract the type from this and convert it into a relevant DS type. + * @return String representation of type that DataSeries can understand. + */ + string GetDSFieldType(const threading::Field *field); + + /** + * Are there any options we should put into the XML schema? + * + * @param field We extract the type from this and return any options that make sense for that type. + * @return Options that can be added directly to the XML (e.g. "pack_relative=\"yes\"") + */ + std::string GetDSOptionsForType(const threading::Field *field); + + /** + * Takes a list of types, a list of names, and a title, and uses it to construct a valid DataSeries XML schema + * thing, which is then returned as a std::string + * + * @param opts std::vector of strings containing a list of options to be appended to each field (e.g. "pack_relative=yes") + * @param sTitle Name of this schema. Ideally, these schemas would be aggregated and re-used. + */ + string BuildDSSchemaFromFieldTypes(const vector& vals, string sTitle); + + /** + * Takes a field type and converts it to a readable string. + * + * @param field We extract the type from this and convert it into a readable string. + * @return String representation of the field's type + */ + string GetBroTypeString(const threading::Field *field); + + /** Closes the currently open file. */ + void CloseLog(); + + /** XXX */ + bool OpenLog(string path); + typedef std::map ExtentMap; typedef ExtentMap::iterator ExtentIterator; // Internal DataSeries structures we need to keep track of. - DataSeriesSink* log_file; + vector schema_list; ExtentTypeLibrary log_types; ExtentType *log_type; ExtentSeries log_series; - OutputModule* log_output; ExtentMap extents; + int compress_type; + + DataSeriesSink* log_file; + OutputModule* log_output; // Options set from the script-level. uint64 ds_extent_size; uint64 ds_num_threads; string ds_compression; bool ds_dump_schema; + bool ds_use_integer_for_time; }; } diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml new file mode 100644 index 0000000000..71ad5d70a0 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.options/ssh.ds.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out new file mode 100644 index 0000000000..b6f05003f3 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.rotate/out @@ -0,0 +1,380 @@ +test.2011-03-07-03-00-05.ds test 11-03-07_03.00.05 11-03-07_04.00.05 0 +test.2011-03-07-04-00-05.ds test 11-03-07_04.00.05 11-03-07_05.00.05 0 +test.2011-03-07-05-00-05.ds test 11-03-07_05.00.05 11-03-07_06.00.05 0 +test.2011-03-07-06-00-05.ds test 11-03-07_06.00.05 11-03-07_07.00.05 0 +test.2011-03-07-07-00-05.ds test 11-03-07_07.00.05 11-03-07_08.00.05 0 +test.2011-03-07-08-00-05.ds test 11-03-07_08.00.05 11-03-07_09.00.05 0 +test.2011-03-07-09-00-05.ds test 11-03-07_09.00.05 11-03-07_10.00.05 0 +test.2011-03-07-10-00-05.ds test 11-03-07_10.00.05 11-03-07_11.00.05 0 +test.2011-03-07-11-00-05.ds test 11-03-07_11.00.05 11-03-07_12.00.05 0 +test.2011-03-07-12-00-05.ds test 11-03-07_12.00.05 11-03-07_12.59.55 1 +> test.2011-03-07-03-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1024 +1.299e+09 10.0.0.2 20 10.0.0.3 0 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-04-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1025 +1.299e+09 10.0.0.2 20 10.0.0.3 1 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-05-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1026 +1.299e+09 10.0.0.2 20 10.0.0.3 2 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-06-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1027 +1.299e+09 10.0.0.2 20 10.0.0.3 3 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-07-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1028 +1.299e+09 10.0.0.2 20 10.0.0.3 4 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-08-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1029 +1.299e+09 10.0.0.2 20 10.0.0.3 5 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-09-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1030 +1.299e+09 10.0.0.2 20 10.0.0.3 6 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-10-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.299e+09 10.0.0.1 20 10.0.0.2 1031 +1.299e+09 10.0.0.2 20 10.0.0.3 7 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-11-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.3e+09 10.0.0.1 20 10.0.0.2 1032 +1.3e+09 10.0.0.2 20 10.0.0.3 8 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +> test.2011-03-07-12-00-05.ds +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex +# Extent, type='test' +t id.orig_h id.orig_p id.resp_h id.resp_p +1.3e+09 10.0.0.1 20 10.0.0.2 1033 +1.3e+09 10.0.0.2 20 10.0.0.3 9 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +360 test +468 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt new file mode 100644 index 0000000000..f66f40b701 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.test-logging/ssh.ds.txt @@ -0,0 +1,43 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +400 ssh +604 DataSeries: ExtentIndex +# Extent, type='ssh' +t id.orig_h id.orig_p id.resp_h id.resp_p status country +1.334e+09 1.2.3.4 1234 2.3.4.5 80 success unknown +1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure US +1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure UK +1.334e+09 1.2.3.4 1234 2.3.4.5 80 success BR +1.334e+09 1.2.3.4 1234 2.3.4.5 80 failure MX +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +400 ssh +604 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt new file mode 100644 index 0000000000..e6294b1d71 --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.time-as-int/conn.ds.txt @@ -0,0 +1,96 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +636 conn +2912 DataSeries: ExtentIndex +# Extent, type='conn' +ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes +1300475167096535 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 +1300475167097012 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 +1300475167099816 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 +1300475168853899 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 435 0 89 SHR F 0 Cd 0 0 1 117 +1300475168854378 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 420 0 99 SHR F 0 Cd 0 0 1 127 +1300475168854837 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 391 0 183 SHR F 0 Cd 0 0 1 211 +1300475168857956 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 317 0 89 SHR F 0 Cd 0 0 1 117 +1300475168858306 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 343 0 99 SHR F 0 Cd 0 0 1 127 +1300475168858713 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 375 0 183 SHR F 0 Cd 0 0 1 211 +1300475168891644 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 339 0 89 SHR F 0 Cd 0 0 1 117 +1300475168892037 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 334 0 99 SHR F 0 Cd 0 0 1 127 +1300475168892414 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 420 0 183 SHR F 0 Cd 0 0 1 211 +1300475168893988 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 384 0 89 SHR F 0 Cd 0 0 1 117 +1300475168894422 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 316 0 99 SHR F 0 Cd 0 0 1 127 +1300475168894787 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 422 0 183 SHR F 0 Cd 0 0 1 211 +1300475168901749 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 402 0 131 SHR F 0 Cd 0 0 1 159 +1300475168902195 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 374 0 198 SHR F 0 Cd 0 0 1 226 +1300475169899438 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 85 0 0 +1300475170862384 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 2613016 350 0 S0 F 0 D 7 546 0 0 +1300475171675372 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 100096 66 0 S0 F 0 D 2 162 0 0 +1300475171677081 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 100020 66 0 S0 F 0 D 2 122 0 0 +1300475173116749 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 99801 66 0 S0 F 0 D 2 162 0 0 +1300475173117362 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 99848 66 0 S0 F 0 D 2 122 0 0 +1300475173153679 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0 0 0 S0 F 0 D 1 78 0 0 +1300475168859163 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 215893 1130 734 S1 F 1130 ShACad 4 216 4 950 +1300475168652003 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 61328 0 350 OTH F 0 CdA 1 52 1 402 +1300475168895267 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 227283 1178 734 S1 F 1178 ShACad 4 216 4 950 +1300475168902635 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 120040 534 412 S1 F 534 ShACad 3 164 3 576 +1300475168892936 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 229603 1148 734 S1 F 1148 ShACad 4 216 4 950 +1300475168855305 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 218501 1171 733 S1 F 1171 ShACad 4 216 4 949 +1300475168892913 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 220960 1137 733 S1 F 1137 ShACad 4 216 4 949 +1300475169780331 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 +1300475168724007 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 119904 525 232 S1 F 525 ShACad 3 164 3 396 +1300475168855330 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 219720 1125 734 S1 F 1125 ShACad 4 216 4 950 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +636 conn +2912 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt new file mode 100644 index 0000000000..e85cf9337e --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/conn.ds.txt @@ -0,0 +1,96 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +660 conn +2564 DataSeries: ExtentIndex +# Extent, type='conn' +ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes +1.3e+09 UWkUyAuUGXf 141.142.220.202 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 73 0 0 +1.3e+09 arKYeMETxOg fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp 0 0 0 S0 F 0 D 1 199 0 0 +1.3e+09 k6kgXLOoSKl 141.142.220.50 5353 224.0.0.251 5353 udp 0 0 0 S0 F 0 D 1 179 0 0 +1.3e+09 TEfuqmmG4bh 141.142.220.118 43927 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 FrJExwHcSal 141.142.220.118 37676 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 5OKnoww6xl4 141.142.220.118 40526 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 3PKsZ2Uye21 141.142.220.118 32902 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 VW0XPVINV8a 141.142.220.118 59816 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 fRFu0wcOle6 141.142.220.118 59714 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 qSsw6ESzHV4 141.142.220.118 58206 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 iE6yhOq3SF 141.142.220.118 38911 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 GSxOnSLghOa 141.142.220.118 59746 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 qCaWGmzFtM5 141.142.220.118 45000 141.142.2.2 53 udp dns 0 0 89 SHR F 0 Cd 0 0 1 117 +1.3e+09 70MGiRM1Qf4 141.142.220.118 48479 141.142.2.2 53 udp dns 0 0 99 SHR F 0 Cd 0 0 1 127 +1.3e+09 h5DsfNtYzi1 141.142.220.118 48128 141.142.2.2 53 udp dns 0 0 183 SHR F 0 Cd 0 0 1 211 +1.3e+09 P654jzLoe3a 141.142.220.118 56056 141.142.2.2 53 udp dns 0 0 131 SHR F 0 Cd 0 0 1 159 +1.3e+09 Tw8jXtpTGu6 141.142.220.118 55092 141.142.2.2 53 udp dns 0 0 198 SHR F 0 Cd 0 0 1 226 +1.3e+09 BWaU4aSuwkc 141.142.220.44 5353 224.0.0.251 5353 udp dns 0 0 0 S0 F 0 D 1 85 0 0 +1.3e+09 10XodEwRycf 141.142.220.226 137 141.142.220.255 137 udp dns 0 350 0 S0 F 0 D 7 546 0 0 +1.3e+09 zno26fFZkrh fe80::3074:17d5:2052:c324 65373 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 +1.3e+09 v5rgkJBig5l 141.142.220.226 55131 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 +1.3e+09 eWZCH7OONC1 fe80::3074:17d5:2052:c324 54213 ff02::1:3 5355 udp dns 0 66 0 S0 F 0 D 2 162 0 0 +1.3e+09 0Pwk3ntf8O3 141.142.220.226 55671 224.0.0.252 5355 udp dns 0 66 0 S0 F 0 D 2 122 0 0 +1.3e+09 0HKorjr8Zp7 141.142.220.238 56641 141.142.220.255 137 udp dns 0 0 0 S0 F 0 D 1 78 0 0 +1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 tcp 0 1130 734 S1 F 1130 ShACad 4 216 4 950 +1.3e+09 nQcgTWjvg4c 141.142.220.118 35634 208.80.152.2 80 tcp 0 0 350 OTH F 0 CdA 1 52 1 402 +1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 tcp 0 1178 734 S1 F 1178 ShACad 4 216 4 950 +1.3e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 tcp 0 534 412 S1 F 534 ShACad 3 164 3 576 +1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 tcp 0 1148 734 S1 F 1148 ShACad 4 216 4 950 +1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 tcp 0 1171 733 S1 F 1171 ShACad 4 216 4 949 +1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 tcp 0 1137 733 S1 F 1137 ShACad 4 216 4 949 +1.3e+09 2cx26uAvUPl 141.142.220.235 6705 173.192.163.128 80 tcp 0 0 0 OTH F 0 h 0 0 1 48 +1.3e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 tcp 0 525 232 S1 F 525 ShACad 3 164 3 396 +1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 tcp 0 1125 734 S1 F 1125 ShACad 4 216 4 950 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +660 conn +2564 DataSeries: ExtentIndex diff --git a/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt new file mode 100644 index 0000000000..49e431085c --- /dev/null +++ b/testing/btest/Baseline/scripts.base.frameworks.logging.dataseries.wikipedia/http.ds.txt @@ -0,0 +1,90 @@ +# Extent Types ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +extent offset ExtentType +40 DataSeries: XmlType +756 http +1144 DataSeries: ExtentIndex +# Extent, type='http' +ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied mime_type md5 extraction_file +1.3e+09 j4u32Pc5bif 141.142.220.118 48649 208.80.152.118 80 0 0 0 304 Not Modified 0 +1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 i2rO3KD1Syg 141.142.220.118 35642 208.80.152.2 80 0 0 0 304 Not Modified 0 +1.3e+09 c4Zw9TmAE05 141.142.220.118 49997 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 EAr0uf4mhq 141.142.220.118 49996 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 GvmoxJFXdTa 141.142.220.118 49998 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 0Q4FH8sESw5 141.142.220.118 50000 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 slFea8xwSmb 141.142.220.118 49999 208.80.152.3 80 0 0 0 304 Not Modified 0 +1.3e+09 UfGkYA2HI2g 141.142.220.118 50001 208.80.152.3 80 0 0 0 304 Not Modified 0 +# Extent, type='DataSeries: ExtentIndex' +offset extenttype +40 DataSeries: XmlType +756 http +1144 DataSeries: ExtentIndex diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro new file mode 100644 index 0000000000..77ea32908a --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/options.bro @@ -0,0 +1,43 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: test -e ssh.ds.xml +# @TEST-EXEC: btest-diff ssh.ds.xml + +module SSH; + +redef LogDataSeries::dump_schema = T; + +# Haven't yet found a way to check for the effect of these. +redef LogDataSeries::compression = "bz2"; +redef LogDataSeries::extent_size = 1000; +redef LogDataSeries::num_threads = 5; + +# LogDataSeries::use_integer_for_time is tested separately. + +export { + redef enum Log::ID += { LOG }; + + type Log: record { + t: time; + id: conn_id; # Will be rolled out into individual columns. + status: string &optional; + country: string &default="unknown"; + } &log; +} + +event bro_init() +{ + Log::create_stream(SSH::LOG, [$columns=Log]); + + local cid = [$orig_h=1.2.3.4, $orig_p=1234/tcp, $resp_h=2.3.4.5, $resp_p=80/tcp]; + + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="US"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="UK"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success", $country="BR"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="MX"]); + +} + diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro new file mode 100644 index 0000000000..639c7f3562 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/rotate.bro @@ -0,0 +1,33 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -b -r %DIR/../rotation.trace %INPUT 2>&1 Log::default_writer=Log::WRITER_DATASERIES | grep "test" >out +# @TEST-EXEC: for i in test.*.ds; do printf '> %s\n' $i; ds2txt $i; done >>out +# @TEST-EXEC: btest-diff out + +module Test; + +export { + # Create a new ID for our log stream + redef enum Log::ID += { LOG }; + + # Define a record with all the columns the log file can have. + # (I'm using a subset of fields from ssh-ext for demonstration.) + type Log: record { + t: time; + id: conn_id; # Will be rolled out into individual columns. + } &log; +} + +redef Log::default_rotation_interval = 1hr; +redef Log::default_rotation_postprocessor_cmd = "echo"; + +event bro_init() +{ + Log::create_stream(Test::LOG, [$columns=Log]); +} + +event new_connection(c: connection) + { + Log::write(Test::LOG, [$t=network_time(), $id=c$id]); + } diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro new file mode 100644 index 0000000000..c7f8a5618f --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/test-logging.bro @@ -0,0 +1,34 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -b %INPUT Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: ds2txt ssh.ds >ssh.ds.txt +# @TEST-EXEC: btest-diff ssh.ds.txt + +module SSH; + +export { + redef enum Log::ID += { LOG }; + + type Log: record { + t: time; + id: conn_id; # Will be rolled out into individual columns. + status: string &optional; + country: string &default="unknown"; + } &log; +} + +event bro_init() +{ + Log::create_stream(SSH::LOG, [$columns=Log]); + + local cid = [$orig_h=1.2.3.4, $orig_p=1234/tcp, $resp_h=2.3.4.5, $resp_p=80/tcp]; + + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="US"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="UK"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="success", $country="BR"]); + Log::write(SSH::LOG, [$t=network_time(), $id=cid, $status="failure", $country="MX"]); + +} + diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro new file mode 100644 index 0000000000..3a072998c0 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/time-as-int.bro @@ -0,0 +1,8 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -r $TRACES/wikipedia.trace %INPUT Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: ds2txt conn.ds >conn.ds.txt +# @TEST-EXEC: btest-diff conn.ds.txt + +redef LogDataSeries::use_integer_for_time = T; diff --git a/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro new file mode 100644 index 0000000000..4a4b70afc2 --- /dev/null +++ b/testing/btest/scripts/base/frameworks/logging/dataseries/wikipedia.bro @@ -0,0 +1,8 @@ +# +# @TEST-REQUIRES: has-writer DataSeries && which ds2txt +# +# @TEST-EXEC: bro -r $TRACES/wikipedia.trace Log::default_writer=Log::WRITER_DATASERIES +# @TEST-EXEC: ds2txt conn.ds >conn.ds.txt +# @TEST-EXEC: ds2txt http.ds >http.ds.txt +# @TEST-EXEC: btest-diff conn.ds.txt +# @TEST-EXEC: btest-diff http.ds.txt diff --git a/testing/scripts/has-writer b/testing/scripts/has-writer new file mode 100755 index 0000000000..683d31041f --- /dev/null +++ b/testing/scripts/has-writer @@ -0,0 +1,6 @@ +#! /usr/bin/env bash +# +# Returns true if Bro has been compiled with support for writer type +# $1. The type name must match what "bro --help" prints. + +bro --helper 2>&1 | grep -qi "Supported log formats:.*$1"