Merge remote branch 'origin/topic/jsiwek/log-escaping'

* origin/topic/jsiwek/log-escaping:
  Add missing ascii writer options to log header.
  Escape the ASCII log's set separator (addresses #712)
  Rewrite ODesc character escaping functionality. (addresses #681)

Closes #712.
This commit is contained in:
Robin Sommer 2011-12-18 16:42:58 -08:00
parent 0a3e160a8d
commit 3220bbce55
72 changed files with 487 additions and 168 deletions

View file

@ -41,8 +41,7 @@ ODesc::ODesc(desc_type t, BroFile* arg_f)
do_flush = 1;
include_stats = 0;
indent_with_spaces = 0;
escape = 0;
escape_len = 0;
escape = false;
}
ODesc::~ODesc()
@ -56,10 +55,9 @@ ODesc::~ODesc()
free(base);
}
void ODesc::SetEscape(const char* arg_escape, int len)
void ODesc::EnableEscaping()
{
escape = arg_escape;
escape_len = len;
escape = true;
}
void ODesc::PushIndent()
@ -228,6 +226,25 @@ static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned
return 0;
}
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
{
pair<const char*, size_t> p(find_first_unprintable(this, bytes, n), 1);
string str(bytes, n);
list<string>::const_iterator it;
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
{
size_t pos = str.find(*it);
if ( pos != string::npos && (p.first == 0 || bytes + pos < p.first) )
{
p.first = bytes + pos;
p.second = it->size();
}
}
return p;
}
void ODesc::AddBytes(const void* bytes, unsigned int n)
{
if ( ! escape )
@ -241,45 +258,30 @@ void ODesc::AddBytes(const void* bytes, unsigned int n)
while ( s < e )
{
const char* t1 = (const char*) memchr(s, escape[0], e - s);
if ( ! t1 )
t1 = e;
const char* t2 = find_first_unprintable(this, s, t1 - s);
if ( t2 && t2 < t1 )
pair<const char*, size_t> p = FirstEscapeLoc(s, e - s);
if ( p.first )
{
AddBytesRaw(s, t2 - s);
char hex[6] = "\\x00";
hex[2] = hex_chars[((*t2) & 0xf0) >> 4];
hex[3] = hex_chars[(*t2) & 0x0f];
AddBytesRaw(hex, 4);
s = t2 + 1;
continue;
AddBytesRaw(s, p.first - s);
if ( p.second == 1 )
{
char hex[6] = "\\x00";
hex[2] = hex_chars[((*p.first) & 0xf0) >> 4];
hex[3] = hex_chars[(*p.first) & 0x0f];
AddBytesRaw(hex, 4);
}
else
{
string esc_str = get_escaped_string(string(p.first, p.second));
AddBytesRaw(esc_str.c_str(), esc_str.size());
}
s = p.first + p.second;
}
if ( memcmp(t1, escape, escape_len) != 0 )
break;
AddBytesRaw(s, t1 - s);
for ( int i = 0; i < escape_len; ++i )
else
{
char hex[5] = "\\x00";
hex[2] = hex_chars[((*t1) & 0xf0) >> 4];
hex[3] = hex_chars[(*t1) & 0x0f];
AddBytesRaw(hex, 4);
++t1;
AddBytesRaw(s, e - s);
break;
}
s = t1;
}
if ( s < e )
AddBytesRaw(s, e - s);
}
void ODesc::AddBytesRaw(const void* bytes, unsigned int n)

View file

@ -4,6 +4,8 @@
#define descriptor_h
#include <stdio.h>
#include <list>
#include <utility>
#include "BroString.h"
typedef enum {
@ -48,8 +50,13 @@ public:
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
// The string passed in must remain valid as long as this object lives.
void SetEscape(const char* escape, int len);
void EnableEscaping();
void AddEscapeSequence(const char* s) { escape_sequences.push_back(s); }
void AddEscapeSequence(const char* s, size_t n)
{ escape_sequences.push_back(string(s, n)); }
void RemoveEscapeSequence(const char* s) { escape_sequences.remove(s); }
void RemoveEscapeSequence(const char* s, size_t n)
{ escape_sequences.remove(string(s, n)); }
void PushIndent();
void PopIndent();
@ -133,6 +140,19 @@ protected:
void OutOfMemory();
/**
* Returns the location of the first place in the bytes to be hex-escaped.
*
* @param bytes the starting memory address to start searching for
* escapable character.
* @param n the maximum number of bytes to search.
* @return a pair whose first element represents a starting memory address
* to be escaped up to the number of characters indicated by the
* second element. The first element may be 0 if nothing is
* to be escaped.
*/
pair<const char*, size_t> FirstEscapeLoc(const char* bytes, size_t n);
desc_type type;
desc_style style;
@ -140,8 +160,8 @@ protected:
unsigned int offset; // where we are in the buffer
unsigned int size; // size of buffer in bytes
int escape_len; // number of bytes in to escape sequence
const char* escape; // bytes to escape on output
bool escape; // escape unprintable characters in output?
list<string> escape_sequences; // additional sequences of chars to escape
BroFile* f; // or the file we're using.

View file

@ -6,27 +6,6 @@
#include "LogWriterAscii.h"
#include "NetVar.h"
/**
* Takes a string, escapes each character into its equivalent hex code (\x##), and
* returns a string containing all escaped values.
*
* @param str string to escape
* @return A std::string containing a list of escaped hex values of the form \x##
*/
static string get_escaped_string(const std::string& str)
{
char tbuf[16];
string esc = "";
for ( size_t i = 0; i < str.length(); ++i )
{
snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
esc += tbuf;
}
return esc;
}
LogWriterAscii::LogWriterAscii()
{
file = 0;
@ -59,7 +38,8 @@ LogWriterAscii::LogWriterAscii()
memcpy(header_prefix, BifConst::LogAscii::header_prefix->Bytes(),
header_prefix_len);
desc.SetEscape(separator, separator_len);
desc.EnableEscaping();
desc.AddEscapeSequence(separator, separator_len);
}
LogWriterAscii::~LogWriterAscii()
@ -108,7 +88,13 @@ bool LogWriterAscii::DoInit(string path, int num_fields,
if( fwrite(str.c_str(), str.length(), 1, file) != 1 )
goto write_error;
if ( ! WriteHeaderField("path", path) )
if ( ! (WriteHeaderField("set_separator", get_escaped_string(
string(set_separator, set_separator_len))) &&
WriteHeaderField("empty_field", get_escaped_string(
string(empty_field, empty_field_len))) &&
WriteHeaderField("unset_field", get_escaped_string(
string(unset_field, unset_field_len))) &&
WriteHeaderField("path", path)) )
goto write_error;
string names;
@ -238,14 +224,19 @@ bool LogWriterAscii::DoWriteOne(ODesc* desc, LogVal* val, const LogField* field)
break;
}
desc->AddEscapeSequence(set_separator, set_separator_len);
for ( int j = 0; j < val->val.set_val.size; j++ )
{
if ( j > 0 )
desc->AddN(set_separator, set_separator_len);
desc->AddRaw(set_separator, set_separator_len);
if ( ! DoWriteOne(desc, val->val.set_val.vals[j], field) )
{
desc->RemoveEscapeSequence(set_separator, set_separator_len);
return false;
}
}
desc->RemoveEscapeSequence(set_separator, set_separator_len);
break;
}
@ -258,14 +249,19 @@ bool LogWriterAscii::DoWriteOne(ODesc* desc, LogVal* val, const LogField* field)
break;
}
desc->AddEscapeSequence(set_separator, set_separator_len);
for ( int j = 0; j < val->val.vector_val.size; j++ )
{
if ( j > 0 )
desc->AddN(set_separator, set_separator_len);
desc->AddRaw(set_separator, set_separator_len);
if ( ! DoWriteOne(desc, val->val.vector_val.vals[j], field) )
{
desc->RemoveEscapeSequence(set_separator, set_separator_len);
return false;
}
}
desc->RemoveEscapeSequence(set_separator, set_separator_len);
break;
}

View file

@ -41,6 +41,27 @@
#include "Net.h"
#include "Reporter.h"
/**
* Takes a string, escapes each character into its equivalent hex code (\x##), and
* returns a string containing all escaped values.
*
* @param str string to escape
* @return A std::string containing a list of escaped hex values of the form \x##
*/
std::string get_escaped_string(const std::string& str)
{
char tbuf[16];
string esc = "";
for ( size_t i = 0; i < str.length(); ++i )
{
snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
esc += tbuf;
}
return esc;
}
char* copy_string(const char* s)
{
char* c = new char[strlen(s)+1];

View file

@ -89,6 +89,8 @@ void delete_each(T* t)
delete *it;
}
std::string get_escaped_string(const std::string& str);
extern char* copy_string(const char* s);
extern int streq(const char* s1, const char* s2);