mirror of
https://github.com/zeek/zeek.git
synced 2025-10-06 00:28:21 +00:00
Merge remote branch 'origin/topic/jsiwek/log-escaping'
* origin/topic/jsiwek/log-escaping: Add missing ascii writer options to log header. Escape the ASCII log's set separator (addresses #712) Rewrite ODesc character escaping functionality. (addresses #681) Closes #712.
This commit is contained in:
parent
0a3e160a8d
commit
3220bbce55
72 changed files with 487 additions and 168 deletions
80
src/Desc.cc
80
src/Desc.cc
|
@ -41,8 +41,7 @@ ODesc::ODesc(desc_type t, BroFile* arg_f)
|
|||
do_flush = 1;
|
||||
include_stats = 0;
|
||||
indent_with_spaces = 0;
|
||||
escape = 0;
|
||||
escape_len = 0;
|
||||
escape = false;
|
||||
}
|
||||
|
||||
ODesc::~ODesc()
|
||||
|
@ -56,10 +55,9 @@ ODesc::~ODesc()
|
|||
free(base);
|
||||
}
|
||||
|
||||
void ODesc::SetEscape(const char* arg_escape, int len)
|
||||
void ODesc::EnableEscaping()
|
||||
{
|
||||
escape = arg_escape;
|
||||
escape_len = len;
|
||||
escape = true;
|
||||
}
|
||||
|
||||
void ODesc::PushIndent()
|
||||
|
@ -228,6 +226,25 @@ static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned
|
|||
return 0;
|
||||
}
|
||||
|
||||
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
||||
{
|
||||
pair<const char*, size_t> p(find_first_unprintable(this, bytes, n), 1);
|
||||
|
||||
string str(bytes, n);
|
||||
list<string>::const_iterator it;
|
||||
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
|
||||
{
|
||||
size_t pos = str.find(*it);
|
||||
if ( pos != string::npos && (p.first == 0 || bytes + pos < p.first) )
|
||||
{
|
||||
p.first = bytes + pos;
|
||||
p.second = it->size();
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void ODesc::AddBytes(const void* bytes, unsigned int n)
|
||||
{
|
||||
if ( ! escape )
|
||||
|
@ -241,45 +258,30 @@ void ODesc::AddBytes(const void* bytes, unsigned int n)
|
|||
|
||||
while ( s < e )
|
||||
{
|
||||
const char* t1 = (const char*) memchr(s, escape[0], e - s);
|
||||
|
||||
if ( ! t1 )
|
||||
t1 = e;
|
||||
|
||||
const char* t2 = find_first_unprintable(this, s, t1 - s);
|
||||
|
||||
if ( t2 && t2 < t1 )
|
||||
pair<const char*, size_t> p = FirstEscapeLoc(s, e - s);
|
||||
if ( p.first )
|
||||
{
|
||||
AddBytesRaw(s, t2 - s);
|
||||
|
||||
char hex[6] = "\\x00";
|
||||
hex[2] = hex_chars[((*t2) & 0xf0) >> 4];
|
||||
hex[3] = hex_chars[(*t2) & 0x0f];
|
||||
AddBytesRaw(hex, 4);
|
||||
|
||||
s = t2 + 1;
|
||||
continue;
|
||||
AddBytesRaw(s, p.first - s);
|
||||
if ( p.second == 1 )
|
||||
{
|
||||
char hex[6] = "\\x00";
|
||||
hex[2] = hex_chars[((*p.first) & 0xf0) >> 4];
|
||||
hex[3] = hex_chars[(*p.first) & 0x0f];
|
||||
AddBytesRaw(hex, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
string esc_str = get_escaped_string(string(p.first, p.second));
|
||||
AddBytesRaw(esc_str.c_str(), esc_str.size());
|
||||
}
|
||||
s = p.first + p.second;
|
||||
}
|
||||
|
||||
if ( memcmp(t1, escape, escape_len) != 0 )
|
||||
break;
|
||||
|
||||
AddBytesRaw(s, t1 - s);
|
||||
|
||||
for ( int i = 0; i < escape_len; ++i )
|
||||
else
|
||||
{
|
||||
char hex[5] = "\\x00";
|
||||
hex[2] = hex_chars[((*t1) & 0xf0) >> 4];
|
||||
hex[3] = hex_chars[(*t1) & 0x0f];
|
||||
AddBytesRaw(hex, 4);
|
||||
++t1;
|
||||
AddBytesRaw(s, e - s);
|
||||
break;
|
||||
}
|
||||
|
||||
s = t1;
|
||||
}
|
||||
|
||||
if ( s < e )
|
||||
AddBytesRaw(s, e - s);
|
||||
}
|
||||
|
||||
void ODesc::AddBytesRaw(const void* bytes, unsigned int n)
|
||||
|
|
28
src/Desc.h
28
src/Desc.h
|
@ -4,6 +4,8 @@
|
|||
#define descriptor_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <list>
|
||||
#include <utility>
|
||||
#include "BroString.h"
|
||||
|
||||
typedef enum {
|
||||
|
@ -48,8 +50,13 @@ public:
|
|||
|
||||
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
|
||||
|
||||
// The string passed in must remain valid as long as this object lives.
|
||||
void SetEscape(const char* escape, int len);
|
||||
void EnableEscaping();
|
||||
void AddEscapeSequence(const char* s) { escape_sequences.push_back(s); }
|
||||
void AddEscapeSequence(const char* s, size_t n)
|
||||
{ escape_sequences.push_back(string(s, n)); }
|
||||
void RemoveEscapeSequence(const char* s) { escape_sequences.remove(s); }
|
||||
void RemoveEscapeSequence(const char* s, size_t n)
|
||||
{ escape_sequences.remove(string(s, n)); }
|
||||
|
||||
void PushIndent();
|
||||
void PopIndent();
|
||||
|
@ -133,6 +140,19 @@ protected:
|
|||
|
||||
void OutOfMemory();
|
||||
|
||||
/**
|
||||
* Returns the location of the first place in the bytes to be hex-escaped.
|
||||
*
|
||||
* @param bytes the starting memory address to start searching for
|
||||
* escapable character.
|
||||
* @param n the maximum number of bytes to search.
|
||||
* @return a pair whose first element represents a starting memory address
|
||||
* to be escaped up to the number of characters indicated by the
|
||||
* second element. The first element may be 0 if nothing is
|
||||
* to be escaped.
|
||||
*/
|
||||
pair<const char*, size_t> FirstEscapeLoc(const char* bytes, size_t n);
|
||||
|
||||
desc_type type;
|
||||
desc_style style;
|
||||
|
||||
|
@ -140,8 +160,8 @@ protected:
|
|||
unsigned int offset; // where we are in the buffer
|
||||
unsigned int size; // size of buffer in bytes
|
||||
|
||||
int escape_len; // number of bytes in to escape sequence
|
||||
const char* escape; // bytes to escape on output
|
||||
bool escape; // escape unprintable characters in output?
|
||||
list<string> escape_sequences; // additional sequences of chars to escape
|
||||
|
||||
BroFile* f; // or the file we're using.
|
||||
|
||||
|
|
|
@ -6,27 +6,6 @@
|
|||
#include "LogWriterAscii.h"
|
||||
#include "NetVar.h"
|
||||
|
||||
/**
|
||||
* Takes a string, escapes each character into its equivalent hex code (\x##), and
|
||||
* returns a string containing all escaped values.
|
||||
*
|
||||
* @param str string to escape
|
||||
* @return A std::string containing a list of escaped hex values of the form \x##
|
||||
*/
|
||||
static string get_escaped_string(const std::string& str)
|
||||
{
|
||||
char tbuf[16];
|
||||
string esc = "";
|
||||
|
||||
for ( size_t i = 0; i < str.length(); ++i )
|
||||
{
|
||||
snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
|
||||
esc += tbuf;
|
||||
}
|
||||
|
||||
return esc;
|
||||
}
|
||||
|
||||
LogWriterAscii::LogWriterAscii()
|
||||
{
|
||||
file = 0;
|
||||
|
@ -59,7 +38,8 @@ LogWriterAscii::LogWriterAscii()
|
|||
memcpy(header_prefix, BifConst::LogAscii::header_prefix->Bytes(),
|
||||
header_prefix_len);
|
||||
|
||||
desc.SetEscape(separator, separator_len);
|
||||
desc.EnableEscaping();
|
||||
desc.AddEscapeSequence(separator, separator_len);
|
||||
}
|
||||
|
||||
LogWriterAscii::~LogWriterAscii()
|
||||
|
@ -108,7 +88,13 @@ bool LogWriterAscii::DoInit(string path, int num_fields,
|
|||
if( fwrite(str.c_str(), str.length(), 1, file) != 1 )
|
||||
goto write_error;
|
||||
|
||||
if ( ! WriteHeaderField("path", path) )
|
||||
if ( ! (WriteHeaderField("set_separator", get_escaped_string(
|
||||
string(set_separator, set_separator_len))) &&
|
||||
WriteHeaderField("empty_field", get_escaped_string(
|
||||
string(empty_field, empty_field_len))) &&
|
||||
WriteHeaderField("unset_field", get_escaped_string(
|
||||
string(unset_field, unset_field_len))) &&
|
||||
WriteHeaderField("path", path)) )
|
||||
goto write_error;
|
||||
|
||||
string names;
|
||||
|
@ -238,14 +224,19 @@ bool LogWriterAscii::DoWriteOne(ODesc* desc, LogVal* val, const LogField* field)
|
|||
break;
|
||||
}
|
||||
|
||||
desc->AddEscapeSequence(set_separator, set_separator_len);
|
||||
for ( int j = 0; j < val->val.set_val.size; j++ )
|
||||
{
|
||||
if ( j > 0 )
|
||||
desc->AddN(set_separator, set_separator_len);
|
||||
desc->AddRaw(set_separator, set_separator_len);
|
||||
|
||||
if ( ! DoWriteOne(desc, val->val.set_val.vals[j], field) )
|
||||
{
|
||||
desc->RemoveEscapeSequence(set_separator, set_separator_len);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
desc->RemoveEscapeSequence(set_separator, set_separator_len);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -258,14 +249,19 @@ bool LogWriterAscii::DoWriteOne(ODesc* desc, LogVal* val, const LogField* field)
|
|||
break;
|
||||
}
|
||||
|
||||
desc->AddEscapeSequence(set_separator, set_separator_len);
|
||||
for ( int j = 0; j < val->val.vector_val.size; j++ )
|
||||
{
|
||||
if ( j > 0 )
|
||||
desc->AddN(set_separator, set_separator_len);
|
||||
desc->AddRaw(set_separator, set_separator_len);
|
||||
|
||||
if ( ! DoWriteOne(desc, val->val.vector_val.vals[j], field) )
|
||||
{
|
||||
desc->RemoveEscapeSequence(set_separator, set_separator_len);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
desc->RemoveEscapeSequence(set_separator, set_separator_len);
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
21
src/util.cc
21
src/util.cc
|
@ -41,6 +41,27 @@
|
|||
#include "Net.h"
|
||||
#include "Reporter.h"
|
||||
|
||||
/**
|
||||
* Takes a string, escapes each character into its equivalent hex code (\x##), and
|
||||
* returns a string containing all escaped values.
|
||||
*
|
||||
* @param str string to escape
|
||||
* @return A std::string containing a list of escaped hex values of the form \x##
|
||||
*/
|
||||
std::string get_escaped_string(const std::string& str)
|
||||
{
|
||||
char tbuf[16];
|
||||
string esc = "";
|
||||
|
||||
for ( size_t i = 0; i < str.length(); ++i )
|
||||
{
|
||||
snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
|
||||
esc += tbuf;
|
||||
}
|
||||
|
||||
return esc;
|
||||
}
|
||||
|
||||
char* copy_string(const char* s)
|
||||
{
|
||||
char* c = new char[strlen(s)+1];
|
||||
|
|
|
@ -89,6 +89,8 @@ void delete_each(T* t)
|
|||
delete *it;
|
||||
}
|
||||
|
||||
std::string get_escaped_string(const std::string& str);
|
||||
|
||||
extern char* copy_string(const char* s);
|
||||
extern int streq(const char* s1, const char* s2);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue