Merging in DataSeries support from topic/gilbert/logging.

I copied the code over manually, no merging, because (1) it needed to be adapted to the new threading API, and (2) there's more stuff in the branch that I haven't ported yet. The DS output generally seems to work, but it has seen no further testing yet. Not unit tests yet either.
2025-10-07 09:08:20 +00:00 · 2012-04-03 22:14:56 -07:00 · 2012-04-03 22:14:56 -07:00 · 952b6b293a
commit 952b6b293a
parent 99e3c58494
18 changed files with 726 additions and 65 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -107,6 +107,21 @@ if (GOOGLEPERFTOOLS_FOUND)
    endif ()
 endif ()

+set(USE_DATASERIES false)
+find_package(Lintel)
+find_package(DataSeries)
+find_package(LibXML2)
+
+if (LINTEL_FOUND AND DATASERIES_FOUND AND LIBXML2_FOUND)
+    set(USE_DATASERIES true)
+	include_directories(BEFORE ${Lintel_INCLUDE_DIR})
+	include_directories(BEFORE ${DataSeries_INCLUDE_DIR})
+	include_directories(BEFORE ${LibXML2_INCLUDE_DIR})
+	list(APPEND OPTLIBS ${Lintel_LIBRARIES})
+	list(APPEND OPTLIBS ${DataSeries_LIBRARIES})
+	list(APPEND OPTLIBS ${LibXML2_LIBRARIES})
+endif()
+
 set(brodeps
    ${BinPAC_LIBRARY}
    ${PCAP_LIBRARY}
@ -193,6 +208,7 @@ message(
    "\nGeoIP:             ${USE_GEOIP}"
    "\nGoogle perftools:  ${USE_PERFTOOLS}"
    "\n       debugging:  ${USE_PERFTOOLS_DEBUG}"
+    "\nDataSeries:        ${USE_DATASERIES}"
    "\n"
    "\n================================================================\n"
 )
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 550ab2c8d95b1d3e18e40a903152650e6c7a3c45
+Subproject commit 60b28739379da75f26c5c2a312b7886f5209a1cc
--- a/config.h.in
+++ b/config.h.in
@ -111,6 +111,9 @@
 /* Use Google's perftools */
 #cmakedefine USE_PERFTOOLS

+/* Use the DataSeries writer. */
+#cmakedefine USE_DATASERIES
+
 /* Version number of package */
 #define VERSION "@VERSION@"

--- a/9
+++ b/9
@ -54,6 +54,8 @@ Usage: $0 [OPTION]... [VAR=VALUE]...
    --with-ruby-lib=PATH   path to ruby library
    --with-ruby-inc=PATH   path to ruby headers
    --with-swig=PATH       path to SWIG executable
+    --with-dataseries=PATH path to DataSeries and Lintel libraries
+    --with-xml2=PATH       path to libxml2 installation (for DataSeries)

  Packaging Options (for developers):
    --binary-package       toggle special logic for binary packaging
@ -203,6 +205,13 @@ while [ $# -ne 0 ]; do
        --with-swig=*)
            append_cache_entry SWIG_EXECUTABLE      PATH    $optarg
            ;;
+        --with-dataseries=*)
+            append_cache_entry DataSeries_ROOT_DIR PATH $optarg
+            append_cache_entry Lintel_ROOT_DIR PATH $optarg
+            ;;
+        --with-xml2=*)
+            append_cache_entry LibXML2_ROOT_DIR PATH $optarg
+            ;;
        --binary-package)
            append_cache_entry BINARY_PACKAGING_MODE BOOL true
            ;;
--- a/scripts/base/frameworks/logging/load.bro
+++ b/scripts/base/frameworks/logging/load.bro
@ -1,3 +1,4 @@
@load ./main
@load ./postprocessors
@load ./writers/ascii
+@load ./writers/dataseries
--- a/scripts/base/frameworks/logging/writers/dataseries.bro
+++ b/scripts/base/frameworks/logging/writers/dataseries.bro
@ -0,0 +1,62 @@
+##! Interface for the dataseries log writer.
+
+module LogDataSeries;
+
+export {
+	## Compression to use with the DS output file.  Options are:
+	##
+	## 'none' -- No compression.
+	## 'lzf' -- LZF compression.  Very quick, but leads to larger output files.
+	## 'lzo' -- LZO compression.  Very fast decompression times.
+	## 'gz' -- GZIP compression.  Slower than LZF, but also produces smaller output.
+	## 'bz2' -- BZIP2 compression.  Slower than GZIP, but also produces smaller output.
+	const ds_compression = "lzf" &redef;
+
+	## The extent buffer size.
+	## Larger values here lead to better compression and more efficient writes, but
+	## also increases the lag between the time events are received and the time they
+	## are actually written to disk.
+	const ds_extent_size = 65536 &redef;
+
+	## Should we dump the XML schema we use for this ds file to disk?
+	## If yes, the XML schema shares the name of the logfile, but has
+	## an XML ending.
+	const ds_dump_schema = T &redef;
+
+	## How many threads should DataSeries spawn to perform compression?
+	## Note that this dictates the number of threads per log stream.  If
+	## you're using a lot of streams, you may want to keep this number
+	## relatively small.
+	##
+	## Default value is 1, which will spawn one thread / core / stream.
+	## 
+	## MAX is 128, MIN is 1.
+	const ds_num_threads = 1 &redef;
+
+	## Should time be stored as an integer or a double?
+	## Storing time as a double leads to possible precision issues and
+	## could (significantly) increase the size of the resulting DS log.
+	## That said, timestamps stored in double form are more consistent
+	## with the rest of Bro and are more easily readable / understandable
+	## when working with the raw DataSeries format.
+	## 
+	## Double timestamps are used by default.
+	const ds_use_integer = F &redef;
+}
+
+# Default function to postprocess a rotated DataSeries log file. It moves the
+# rotated file to a new name that includes a timestamp with the opening time, and
+# then runs the writer's default postprocessor command on it.
+function default_rotation_postprocessor_func(info: Log::RotationInfo) : bool
+	{
+	# Move file to name including both opening and closing time.
+	local dst = fmt("%s.%s.ds", info$path,
+			strftime(Log::default_rotation_date_format, info$open));
+
+	system(fmt("/bin/mv %s %s", info$fname, dst));
+
+	# Run default postprocessor.
+	return Log::run_rotation_postprocessor_cmd(info, dst);
+	}
+
+redef Log::default_rotation_postprocessors += { [Log::WRITER_DATASERIES] = default_rotation_postprocessor_func };
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -419,6 +419,7 @@ set(bro_SRCS
    logging/WriterBackend.cc
    logging/WriterFrontend.cc
    logging/writers/Ascii.cc
+    logging/writers/DataSeries.cc
    logging/writers/None.cc

    ${dns_SRCS}
--- a/src/logging.bif
+++ b/src/logging.bif
@ -72,3 +72,11 @@ const set_separator: string;
 const empty_field: string;
 const unset_field: string;

+# Options for the DataSeries writer.
+
+module LogDataSeries;
+
+const ds_compression: string;
+const ds_extent_size: count;
+const ds_dump_schema: bool;
+const ds_num_threads: count;
--- a/src/logging/Manager.cc
+++ b/src/logging/Manager.cc
@ -16,9 +16,11 @@
 #include "writers/Ascii.h"
 #include "writers/None.h"

+#ifdef USE_DATASERIES
+#include "writers/DataSeries.h"
+#endif
+
 using namespace logging;
-using threading::Value;
-using threading::Field;

 // Structure describing a log writer type.
 struct WriterDefinition {
@ -32,6 +34,9 @@ struct WriterDefinition {
 WriterDefinition log_writers[] = {
 	{ BifEnum::Log::WRITER_NONE,  "None", 0, writer::None::Instantiate },
 	{ BifEnum::Log::WRITER_ASCII, "Ascii", 0, writer::Ascii::Instantiate },
+#ifdef USE_DATASERIES
+	{ BifEnum::Log::WRITER_DATASERIES, "DataSeries", 0, writer::DataSeries::Instantiate },
+#endif

 	// End marker, don't touch.
 	{ BifEnum::Log::WRITER_DEFAULT, "None", 0, (WriterBackend* (*)(WriterFrontend* frontend))0 }
@ -51,7 +56,7 @@ struct Manager::Filter {
 	Func* postprocessor;

 	int num_fields;
-	Field** fields;
+	threading::Field** fields;

 	// Vector indexed by field number. Each element is a list of record
 	// indices defining a path leading to the value across potential
@ -127,6 +132,17 @@ Manager::~Manager()
 		delete *s;
 	}

+list<string> Manager::SupportedFormats()
+	{
+	list<string> formats;
+
+	for ( WriterDefinition* ld = log_writers; ld->type != BifEnum::Log::WRITER_DEFAULT; ++ld ) 
+		formats.push_back(ld->name);
+
+	return formats;
+	}
+
+
 WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type)
 	{
 	WriterDefinition* ld = log_writers;
@ -135,7 +151,7 @@ WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type)
 		{
 		if ( ld->type == BifEnum::Log::WRITER_DEFAULT )
 			{
-			reporter->Error("unknow writer when creating writer");
+			reporter->Error("unknown writer type requested");
 			return 0;
 			}

@ -159,10 +175,8 @@ WriterBackend* Manager::CreateBackend(WriterFrontend* frontend, bro_int_t type)
 				// function.
 				ld->factory = 0;

-				DBG_LOG(DBG_LOGGING, "failed to init writer class %s",
-					ld->name);
-
-				return false;
+				reporter->Error("initialization of writer %s failed", ld->name);
+				return 0;
 				}
 			}

@ -449,7 +463,7 @@ bool Manager::TraverseRecord(Stream* stream, Filter* filter, RecordType* rt,

 		filter->indices.push_back(new_indices);

-		filter->fields = (Field**)
+		filter->fields = (threading::Field**)
 			realloc(filter->fields,
 				sizeof(Field) * ++filter->num_fields);

@ -459,7 +473,7 @@ bool Manager::TraverseRecord(Stream* stream, Filter* filter, RecordType* rt,
 			return false;
 			}

-		Field* field = new Field();
+		threading::Field* field = new threading::Field();
 		field->name = new_path;
 		field->type = t->Tag();
 		if ( field->type == TYPE_TABLE )
@ -572,7 +586,7 @@ bool Manager::AddFilter(EnumVal* id, RecordVal* fval)

 	for ( int i = 0; i < filter->num_fields; i++ )
 		{
-		Field* field = filter->fields[i];
+		threading::Field* field = filter->fields[i];
 		DBG_LOG(DBG_LOGGING, "   field %10s: %s",
 			field->name.c_str(), type_name(field->type));
 		}
@ -744,10 +758,10 @@ bool Manager::Write(EnumVal* id, RecordVal* columns)

 			// Copy the fields for WriterFrontend::Init() as it
 			// will take ownership.
-			Field** arg_fields = new Field*[filter->num_fields];
+			threading::Field** arg_fields = new threading::Field*[filter->num_fields];

 			for ( int j = 0; j < filter->num_fields; ++j )
-				arg_fields[j] = new Field(*filter->fields[j]);
+				arg_fields[j] = new threading::Field(*filter->fields[j]);

 			writer = CreateWriter(stream->id, filter->writer,
 					      path, filter->num_fields,
@ -898,10 +912,10 @@ threading::Value* Manager::ValToLogVal(Val* val, BroType* ty)
 	return lval;
 	}

-Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
+threading::Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
 				    RecordVal* columns)
 	{
-	Value** vals = new Value*[filter->num_fields];
+	threading::Value** vals = new threading::Value*[filter->num_fields];

 	for ( int i = 0; i < filter->num_fields; ++i )
 		{
@ -920,7 +934,7 @@ Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
 			if ( ! val )
 				{
 				// Value, or any of its parents, is not set.
-				vals[i] = new Value(filter->fields[i]->type, false);
+				vals[i] = new threading::Value(filter->fields[i]->type, false);
 				break;
 				}
 			}
@ -933,7 +947,7 @@ Value** Manager::RecordToFilterVals(Stream* stream, Filter* filter,
 	}

 WriterFrontend* Manager::CreateWriter(EnumVal* id, EnumVal* writer, string path,
-				int num_fields, const Field* const*  fields, bool local, bool remote)
+				int num_fields, const threading::Field* const*  fields, bool local, bool remote)
 	{
 	Stream* stream = FindStream(id);

@ -997,7 +1011,7 @@ WriterFrontend* Manager::CreateWriter(EnumVal* id, EnumVal* writer, string path,
 	return writer_obj;
 	}

-void Manager::DeleteVals(int num_fields, Value** vals)
+void Manager::DeleteVals(int num_fields, threading::Value** vals)
 	{
 	// Note this code is duplicated in WriterBackend::DeleteVals().
 	for ( int i = 0; i < num_fields; i++ )
@ -1007,7 +1021,7 @@ void Manager::DeleteVals(int num_fields, Value** vals)
 	}

 bool Manager::Write(EnumVal* id, EnumVal* writer, string path, int num_fields,
-		   Value** vals)
+		   threading::Value** vals)
 	{
 	Stream* stream = FindStream(id);

@ -1116,7 +1130,9 @@ void Manager::Terminate()
 	{
 	for ( vector<Stream *>::iterator s = streams.begin(); s != streams.end(); ++s )
 		{
-		if ( *s )
+		if ( ! *s )
+			continue;
+
 		Flush((*s)->id);
 		}
 	}
--- a/src/logging/Manager.h
+++ b/src/logging/Manager.h
@ -15,7 +15,6 @@ class RotationTimer;

 namespace logging {

-
 class WriterBackend;
 class WriterFrontend;
 class RotationFinishedMessage;
@ -145,6 +144,11 @@ public:
 	 */
 	void Terminate();

+	/**
+	 * Returns a list of supported output formats.
+	 */
+	static list<string> SupportedFormats();
+
 protected:
 	friend class WriterFrontend;
 	friend class RotationFinishedMessage;
--- a/src/logging/WriterBackend.cc
+++ b/src/logging/WriterBackend.cc
@ -222,17 +222,6 @@ bool WriterBackend::Flush()
 	return true;
 	}

-bool WriterBackend::Finish()
-	{
-	if ( ! DoFlush() )
-		{
-		DisableFrontend();
-		return false;
-		}
-
-	return true;
-	}
-
 bool WriterBackend::DoHeartbeat(double network_time, double current_time)
 	{
 	MsgThread::DoHeartbeat(network_time, current_time);
--- a/src/logging/WriterBackend.h
+++ b/src/logging/WriterBackend.h
@ -101,15 +101,6 @@ public:
 	 */
 	bool Rotate(string rotated_path, double open, double close, bool terminating);

-	/**
-	 * Finishes writing to this logger in a regularl fashion. Must not be
-	 * called if an error has been indicated earlier. After calling this,
-	 * no further writing must be performed.
-	 *
-	 * @return False if an error occured.
-	 */
-	bool Finish();
-
 	/**
 	 * Disables the frontend that has instantiated this backend. Once
 	 * disabled,the frontend will not send any further message over.
@ -175,6 +166,8 @@ public:
 	string Render(const threading::Value::subnet_t& subnet) const;

 protected:
+	friend class FinishMessage;
+
 	/**
 	 * Writer-specific intialization method.
 	 *
@ -272,26 +265,18 @@ protected:
 			      bool terminating) = 0;

 	/**
-	 * Writer-specific method implementing log output finalization at
-	 * termination. Not called when any of the other methods has
-	 * previously signaled an error, i.e., executing this method signals
-	 * a regular shutdown of the writer.
+	 * Writer-specific method called just before the threading system is
+	 * going to shutdown.
 	 * 
-	 * A writer implementation must override this method but it can just
-	 * ignore calls if flushing doesn't align with its semantics.
-	 *
-	 * If the method returns false, it will be assumed that a fatal error
-	 * has occured that prevents the writer from further operation; it
-	 * will then be disabled and eventually deleted. When returning
-	 * false, an implementation should also call Error() to indicate what
-	 * happened.
+	 * This method can be overridden but one must call
+	 * WriterBackend::DoFinish().
 	 */
-	virtual bool DoFinish() = 0;
+	virtual bool DoFinish() { return MsgThread::DoFinish(); }

 	/**
 	 * Triggered by regular heartbeat messages from the main thread.
 	 *
-	 * This method can be overridden but once must call
+	 * This method can be overridden but one must call
 	 * WriterBackend::DoHeartbeat().
 	 */
 	virtual bool DoHeartbeat(double network_time, double current_time);
--- a/src/logging/WriterFrontend.cc
+++ b/src/logging/WriterFrontend.cc
@ -90,7 +90,7 @@ public:
 	FinishMessage(WriterBackend* backend)
 		: threading::InputMessage<WriterBackend>("Finish", backend)	{}

-	virtual bool Process() { return Object()->Finish(); }
+	virtual bool Process() { return Object()->DoFinish(); }
 };

 }
@ -117,7 +117,8 @@ WriterFrontend::WriterFrontend(EnumVal* arg_stream, EnumVal* arg_writer, bool ar
 	if ( local )
 		{
 		backend = log_mgr->CreateBackend(this, writer->AsEnum());
-		assert(backend);
+
+		if ( backend )
 			backend->Start();
 		}

--- a/src/logging/writers/Ascii.cc
+++ b/src/logging/writers/Ascii.cc
@ -69,8 +69,7 @@ bool Ascii::WriteHeaderField(const string& key, const string& val)
 	return (fwrite(str.c_str(), str.length(), 1, file) == 1);
 	}

-bool Ascii::DoInit(string path, int num_fields,
-			    const Field* const * fields)
+bool Ascii::DoInit(string path, int num_fields, const Field* const * fields)
 	{
 	if ( output_to_stdout )
 		path = "/dev/stdout";
@ -146,7 +145,7 @@ bool Ascii::DoFlush()

 bool Ascii::DoFinish()
 	{
-	return true;
+	return WriterBackend::DoFinish();
 	}

 bool Ascii::DoWriteOne(ODesc* desc, Value* val, const Field* field)
--- a/src/logging/writers/DataSeries.cc
+++ b/src/logging/writers/DataSeries.cc
@ -0,0 +1,476 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+
+#include <map>
+#include <string>
+#include <errno.h>
+
+#include <DataSeries/GeneralField.hpp>
+
+#include "NetVar.h"
+#include "threading/SerialTypes.h"
+
+#include "DataSeries.h"
+
+using namespace logging;
+using namespace writer;
+
+// NOTE: Naming conventions are a little bit scattershot at the moment.
+// Within the scope of this file, a function name prefixed by '_' denotes a
+// static function.
+
+// ************************ LOCAL PROTOTYPES *********************************
+
+struct SchemaValue;
+
+/**
+ *  Turns a log value into a std::string.  Uses an ostringstream to do the
+ *  heavy lifting, but still need to switch on the type to know which value
+ *  in the union to give to the string string for processing.
+ *
+ *  @param val The value we wish to convert to a string
+ *  @return the string value of val
+ */
+static std::string _LogValueToString(threading::Value* val);
+
+/**
+ *  Takes a field type and converts it to a relevant DataSeries type.
+ *
+ *  @param field We extract the type from this and convert it into a relevant DS type.
+ *  @return String representation of type that DataSeries can understand.
+ */
+static string _GetDSFieldType(const threading::Field* field);
+
+/**
+ *  Takes a field type and converts it to a readable string.
+ *
+ *  @param field We extract the type from this and convert it into a readable string.
+ *  @return String representation of the field's type
+ */
+static string _GetBroTypeString(const threading::Field *field);
+
+/**
+ *  Takes a list of types, a list of names, and a title, and uses it to construct a valid DataSeries XML schema
+ *  thing, which is then returned as a std::string
+ *
+ *  @param opts std::vector of strings containing a list of options to be appended to each field (e.g. "pack_relative=yes")
+ *  @param sTitle Name of this schema.  Ideally, these schemas would be aggregated and re-used.
+ */
+static string _BuildDSSchemaFromFieldTypes(const vector<SchemaValue>& vals, string sTitle);
+
+/**
+ *  Are there any options we should put into the XML schema?
+ *
+ *  @param field We extract the type from this and return any options that make sense for that type.
+ *  @return Options that can be added directly to the XML (e.g. "pack_relative=\"yes\"")
+ */
+static std::string _GetDSOptionsForType(const threading::Field *field);
+
+/**
+ *  Internal helper structure; populate a vector of these which is passed to the XML generator for its use.
+ */
+struct SchemaValue
+{
+	string ds_type;
+	string bro_type;
+	string field_name;
+	string field_options;
+
+	SchemaValue(const threading::Field *field)
+	{
+		ds_type = _GetDSFieldType(field);
+		field_name = string(field->name);
+		field_options = _GetDSOptionsForType(field);
+		bro_type = _GetBroTypeString(field);
+	}
+};
+
+// ************************ LOCAL IMPL *********************************
+
+std::string DataSeries::LogValueToString(threading::Value *val)
+{
+	const int strsz = 1024;
+	char strbuf[strsz];
+
+	// In some cases, no value is attached.  If this is the case, return an empty string.
+	if(!val->present)
+		return "";
+
+	std::ostringstream ostr;
+	switch(val->type)
+	{
+	case TYPE_BOOL:
+		return (val->val.int_val ? "true" : "false");
+
+	case TYPE_INT:
+		ostr << val->val.int_val;
+		return ostr.str();
+
+	case TYPE_COUNT:
+	case TYPE_COUNTER:
+	case TYPE_PORT:
+		ostr << val->val.uint_val;
+		return ostr.str();
+
+	case TYPE_SUBNET:
+	        ostr << Render(val->val.subnet_val);
+		return ostr.str();
+
+	case TYPE_ADDR:
+	        ostr << Render(val->val.addr_val);
+		return ostr.str();
+
+	// Note: These two cases are relatively special.  We need to convert these values into their integer equivalents
+	// to maximize precision.  At the moment, there won't be a noticeable effect (Bro uses the double format everywhere
+	// internally, so we've already lost the precision we'd gain here), but timestamps may eventually switch to this
+	// representation within Bro.
+	//
+	// in the near-term, this *should* lead to better pack_relative (and thus smaller output files).
+	case TYPE_TIME:
+	case TYPE_INTERVAL:
+		ostr << (unsigned long)(DataSeries::TIME_SCALE * val->val.double_val);
+		return ostr.str();
+
+	case TYPE_DOUBLE:
+		ostr << val->val.double_val;
+		return ostr.str();
+
+	case TYPE_ENUM:
+	case TYPE_STRING:
+	case TYPE_FILE:
+	{
+		int size = val->val.string_val->size();
+		string tmpString = "";
+		if(size)
+			tmpString = string(val->val.string_val->data(), val->val.string_val->size());
+		else
+			tmpString = string("");
+		return tmpString;
+	}
+	case TYPE_TABLE:
+	{
+		if ( ! val->val.set_val.size )
+			{
+			return "";
+			}
+
+		string tmpString = "";
+		for ( int j = 0; j < val->val.set_val.size; j++ )
+			{
+			if ( j > 0 )
+				tmpString += ":";  //TODO: Specify set separator char in configuration.
+
+			tmpString += LogValueToString(val->val.set_val.vals[j]);
+			}
+		return tmpString;
+	}
+	case TYPE_VECTOR:
+	{
+		if ( ! val->val.vector_val.size )
+			{
+			return "";
+			}
+
+		string tmpString = "";
+		for ( int j = 0; j < val->val.vector_val.size; j++ )
+			{
+			if ( j > 0 )
+				tmpString += ":";  //TODO: Specify set separator char in configuration.
+
+			tmpString += LogValueToString(val->val.vector_val.vals[j]);
+			}
+
+		return tmpString;
+	}
+	default:
+		return "???";
+	}
+}
+
+static string _GetDSFieldType(const threading::Field *field)
+{
+	switch(field->type)
+	{
+	case TYPE_BOOL:
+		return "bool";
+
+	case TYPE_COUNT:
+	case TYPE_COUNTER:
+	case TYPE_PORT:
+	case TYPE_INT:
+	case TYPE_TIME:
+	case TYPE_INTERVAL:
+		return "int64";
+
+	case TYPE_DOUBLE:
+		return "double";
+
+	case TYPE_SUBNET:
+	case TYPE_ADDR:
+	case TYPE_ENUM:
+	case TYPE_STRING:
+	case TYPE_FILE:
+	case TYPE_TABLE:
+	case TYPE_VECTOR:
+	default:
+		return "variable32";
+
+	}
+}
+
+static string _GetBroTypeString(const threading::Field *field)
+{
+	switch(field->type)
+	{
+	case TYPE_BOOL:
+		return "bool";
+	case TYPE_COUNT:
+		return "count";
+	case TYPE_COUNTER:
+		return "counter";
+	case TYPE_PORT:
+		return "port";
+	case TYPE_INT:
+		return "int";
+	case TYPE_TIME:
+		return "time";
+	case TYPE_INTERVAL:
+		return "interval";
+	case TYPE_DOUBLE:
+		return "double";
+	case TYPE_SUBNET:
+		return "subnet";
+	case TYPE_ADDR:
+		return "addr";
+	case TYPE_ENUM:
+		return "enum";
+	case TYPE_STRING:
+		return "string";
+	case TYPE_FILE:
+		return "file";
+	case TYPE_TABLE:
+		return "table";
+	case TYPE_VECTOR:
+		return "vector";
+	default:
+		return "???";
+	}
+}
+
+static string _BuildDSSchemaFromFieldTypes(const vector<SchemaValue>& vals, string sTitle)
+{
+	if("" == sTitle)
+		{
+		sTitle = "GenericBroStream";
+		}
+    string xmlschema;
+	xmlschema  = "<ExtentType name=\"" + sTitle + "\" version=\"1.0\" namespace=\"bro-ids.org\">\n";
+	for(size_t i = 0; i < vals.size(); ++i)
+		{
+		xmlschema += "\t<field type=\"" + vals[i].ds_type + "\" name=\"" + vals[i].field_name + "\" " + vals[i].field_options + "/>\n";
+		}
+	xmlschema += "</ExtentType>\n";
+	for(size_t i = 0; i < vals.size(); ++i)
+		{
+		xmlschema += "<!-- " + vals[i].field_name + " : " + vals[i].bro_type + " -->\n";
+		}
+	return xmlschema;
+}
+
+static std::string _GetDSOptionsForType(const threading::Field *field)
+{
+	switch(field->type)
+	{
+	case TYPE_TIME:
+	case TYPE_INTERVAL:
+		return "pack_relative=\"" + std::string(field->name) + "\"";
+	case TYPE_SUBNET:
+	case TYPE_ADDR:
+	case TYPE_ENUM:
+	case TYPE_STRING:
+	case TYPE_FILE:
+	case TYPE_TABLE:
+	case TYPE_VECTOR:
+		return "pack_unique=\"yes\"";
+	default:
+		return "";
+	}
+}
+
+// ************************ CLASS IMPL *********************************
+
+DataSeries::DataSeries(WriterFrontend* frontend) : WriterBackend(frontend)
+{
+	ds_compression = string((const char *)BifConst::LogDataSeries::ds_compression->Bytes(), BifConst::LogDataSeries::ds_compression->Len());
+	ds_dump_schema = BifConst::LogDataSeries::ds_dump_schema;
+	ds_extent_size = BifConst::LogDataSeries::ds_extent_size;
+	ds_num_threads = BifConst::LogDataSeries::ds_num_threads;
+}
+
+DataSeries::~DataSeries()
+{
+}
+
+bool DataSeries::DoInit(string path, int num_fields, const threading::Field* const * fields)
+	{
+	// We first construct an XML schema thing (and, if ds_dump_schema is
+	// set, dump it to path + ".ds.xml").  Assuming that goes well, we
+	// use that schema to build our output logfile and prepare it to be
+	// written to.
+
+	// Note: compressor count must be set *BEFORE* DataSeriesSink is instantiated.
+	if(ds_num_threads < THREAD_MIN && ds_num_threads != 0)
+		{
+		fprintf(stderr, "%d is too few threads!  Using %d instead\n", (int)ds_num_threads, (int)THREAD_MIN);
+		ds_num_threads = THREAD_MIN;
+		}
+	if(ds_num_threads > THREAD_MAX)
+		{
+		fprintf(stderr, "%d is too many threads!  Dropping back to %d\n", (int)ds_num_threads, (int)THREAD_MAX);
+		ds_num_threads = THREAD_MAX;
+		}
+
+    if(ds_num_threads > 0)
+		{
+		DataSeriesSink::setCompressorCount(ds_num_threads);
+		}
+	vector<SchemaValue> schema_list;
+	for ( int i = 0; i < num_fields; i++ )
+		{
+		const threading::Field* field = fields[i];
+		SchemaValue val(field);
+		schema_list.push_back(val);
+		}
+	string schema = _BuildDSSchemaFromFieldTypes(schema_list, path);
+	if(ds_dump_schema)
+		{
+		FILE * pFile;
+	  	pFile = fopen ( string(path + ".ds.xml").c_str() , "wb" );
+	    if(NULL == pFile)
+			{
+			perror("Could not dump schema");
+			}
+		fwrite (schema.c_str(), 1 , schema.length() , pFile );
+		fclose (pFile);
+		}
+
+	int compress_type = Extent::compress_all;
+
+	if(ds_compression == "lzf")
+		{
+		compress_type = Extent::compress_lzf;
+		}
+	else if(ds_compression == "lzo")
+		{
+		compress_type = Extent::compress_lzo;
+		}
+	else if(ds_compression == "gz")
+		{
+		compress_type = Extent::compress_gz;
+		}
+	else if(ds_compression == "bz2")
+		{
+		compress_type = Extent::compress_bz2;
+		}
+	else if(ds_compression == "none")
+		{
+		compress_type = Extent::compress_none;
+		}
+	else if(ds_compression == "any")
+		{
+		compress_type = Extent::compress_all;
+		}
+	else
+		{
+		fprintf(stderr, "%s is not a valid compression type.  Valid types are: 'lzf', 'lzo', 'gz', 'bz2', 'none', 'any'\n", ds_compression.c_str());
+		fprintf(stderr, "Defaulting to 'any'\n");
+		}
+
+    log_type = const_cast<ExtentType *>(log_types.registerType(schema));
+
+	log_series.setType(*log_type);
+    log_file = new DataSeriesSink(path + ".ds", compress_type);
+	log_file->writeExtentLibrary(log_types);
+
+	for(size_t i = 0; i < schema_list.size(); ++i)
+		extents.insert(std::make_pair(schema_list[i].field_name, GeneralField::create(log_series, schema_list[i].field_name)));
+
+	if(ds_extent_size < ROW_MIN)
+		{
+			fprintf(stderr, "%d is not a valid value for 'rows'.  Using min of %d instead.\n", (int)ds_extent_size, (int)ROW_MIN);
+			ds_extent_size = ROW_MIN;
+		}
+	else if(ds_extent_size > ROW_MAX)
+		{
+			fprintf(stderr, "%d is not a valid value for 'rows'.  Using max of %d instead.\n", (int)ds_extent_size, (int)ROW_MAX);
+			ds_extent_size = ROW_MAX;
+		}
+    log_output = new OutputModule(*log_file, log_series, log_type, ds_extent_size);
+
+	return true;
+
+	}
+
+bool DataSeries::DoFlush()
+{
+	// Flushing is handled by DataSeries automatically, so this function doesn't do anything.
+	return true;
+}
+
+bool DataSeries::DoFinish()
+{
+	for(ExtentIterator iter = extents.begin();
+		iter != extents.end(); ++iter)
+		{
+		delete iter->second;
+		}
+	extents.clear();
+	// Don't delete the file before you delete the output, or bad things happen.
+	delete log_output;
+	delete log_file;
+
+	return WriterBackend::DoFinish();
+}
+
+bool DataSeries::DoWrite(int num_fields, const threading::Field* const * fields,
+			 threading::Value** vals)
+{
+	log_output->newRecord();
+	for(size_t i = 0; i < (size_t)num_fields; ++i)
+		{
+		ExtentIterator iter = extents.find(fields[i]->name);
+		assert(iter != extents.end());
+		if( iter != extents.end() )
+			{
+			GeneralField *cField = iter->second;
+			if(vals[i]->present)
+				cField->set(LogValueToString(vals[i]));
+			}
+		}
+
+	return true;
+}
+
+bool DataSeries::DoRotate(string rotated_path, double open, double close, bool terminating)
+{
+	// Note that if DS files are rotated too often, the aggregate log size will be (much) larger.
+
+	DoFinish();
+
+	string dsname = Path() + ".ds";
+	string nname = rotated_path + ".ds";
+	rename(dsname.c_str(), nname.c_str());
+
+	if ( ! FinishedRotation(nname, dsname, open, close, terminating) )
+		{
+		Error(Fmt("error rotating %s to %s", dsname.c_str(), nname.c_str()));
+		return false;
+		}
+
+	return DoInit(Path(), NumFields(), Fields());
+}
+
+bool DataSeries::DoSetBuf(bool enabled)
+{
+	// DataSeries is *always* buffered to some degree.  This option is ignored.
+	return true;
+}
--- a/src/logging/writers/DataSeries.h
+++ b/src/logging/writers/DataSeries.h
@ -0,0 +1,69 @@
+// See the file "COPYING" in the main distribution directory for copyright.
+//
+// A binary log writer producing DataSeries output. See doc/data-series.rst
+// for more information.
+
+#ifndef LOGGING_WRITER_DATA_SERIES_H
+#define LOGGING_WRITER_DATA_SERIES_H
+
+#include "../WriterBackend.h"
+
+#include <DataSeries/ExtentType.hpp>
+#include <DataSeries/DataSeriesFile.hpp>
+#include <DataSeries/DataSeriesModule.hpp>
+#include <DataSeries/GeneralField.hpp>
+
+namespace logging { namespace writer {
+
+class DataSeries : public WriterBackend {
+public:
+	DataSeries(WriterFrontend* frontend);
+	~DataSeries();
+
+	static WriterBackend* Instantiate(WriterFrontend* frontend)
+		{ return new DataSeries(frontend); }
+
+protected:
+	virtual bool DoInit(string path, int num_fields,
+			    const threading::Field* const * fields);
+
+	virtual bool DoWrite(int num_fields, const threading::Field* const* fields,
+			     threading::Value** vals);
+	virtual bool DoSetBuf(bool enabled);
+	virtual bool DoRotate(string rotated_path, double open,
+			      double close, bool terminating);
+	virtual bool DoFlush();
+	virtual bool DoFinish();
+
+private:
+	static const size_t ROW_MIN = 2048;                      // Minimum extent size.
+	static const size_t ROW_MAX = (1024 * 1024 * 100);       // Maximum extent size.
+	static const size_t THREAD_MIN = 1;                      // Minimum number of compression threads that DataSeries may spawn.
+	static const size_t THREAD_MAX = 128;                    // Maximum number of compression threads that DataSeries may spawn.
+	static const size_t TIME_SCALE = 1000000;                // Fixed-point multiplier for time values when converted to integers.
+
+	std::string LogValueToString(threading::Value *val);
+
+	typedef std::map<string, GeneralField *> ExtentMap;
+	typedef ExtentMap::iterator ExtentIterator;
+
+	// Internal DataSeries structures we need to keep track of.
+	DataSeriesSink* log_file;
+	ExtentTypeLibrary log_types;
+	ExtentType *log_type;
+	ExtentSeries log_series;
+	OutputModule* log_output;
+	ExtentMap extents;
+
+	// Options set from the script-level.
+	uint64 ds_extent_size;
+	uint64 ds_num_threads;
+	string ds_compression;
+	bool ds_dump_schema;
+};
+
+}
+}
+
+#endif
+
--- a/src/main.cc
+++ b/src/main.cc
@ -201,6 +201,27 @@ void usage()
 	fprintf(stderr, "    $BRO_LOG_SUFFIX                | ASCII log file extension (.%s)\n", logging::writer::Ascii::LogExt().c_str());
 	fprintf(stderr, "    $BRO_PROFILER_FILE             | Output file for script execution statistics (not set)\n");

+	fprintf(stderr, "\n");
+	fprintf(stderr, "    Supported log formats: ");
+
+	bool first = true;
+	list<string> fmts = logging::Manager::SupportedFormats();
+
+	for ( list<string>::const_iterator i = fmts.begin(); i != fmts.end(); ++i )
+		{
+		if ( *i == "None" )
+			// Skip, it's uninteresting.
+			continue;
+
+		if ( ! first )
+			fprintf(stderr, ",");
+
+		fprintf(stderr, "%s", (*i).c_str());
+		first = false;
+		}
+
+	fprintf(stderr, "\n");
+
 	exit(1);
 	}

--- a/src/types.bif
+++ b/src/types.bif
@ -162,6 +162,7 @@ enum Writer %{
 	WRITER_DEFAULT,
 	WRITER_NONE,
 	WRITER_ASCII,
+	WRITER_DATASERIES,
 %}

 enum ID %{