Refactor various hex escaping code.

This commit is contained in:
Jon Siwek 2014-04-18 13:19:50 -05:00
parent 80d7a1482c
commit e8a5ea8844
7 changed files with 102 additions and 60 deletions

View file

@ -216,18 +216,32 @@ void ODesc::Indent()
}
}
static const char hex_chars[] = "0123456789abcdef";
static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned int n)
static bool starts_with(const char* str1, const char* str2, size_t len)
{
if ( d->IsBinary() )
for ( size_t i = 0; i < len; ++i )
if ( str1[i] != str2[i] )
return false;
return true;
}
size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end)
{
if ( escape_sequences.empty() )
return 0;
while ( n-- )
escape_set::const_iterator it;
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
{
if ( ! isprint(*bytes) )
return bytes;
++bytes;
const string& esc_str = *it;
size_t esc_len = esc_str.length();
if ( start + esc_len > end )
continue;
if ( starts_with(start, esc_str.c_str(), esc_len) )
return esc_len;
}
return 0;
@ -235,21 +249,23 @@ static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
{
pair<const char*, size_t> p(find_first_unprintable(this, bytes, n), 1);
typedef pair<const char*, size_t> escape_pos;
string str(bytes, n);
list<string>::const_iterator it;
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
if ( IsBinary() )
return escape_pos(0, 0);
for ( size_t i = 0; i < n; ++i )
{
size_t pos = str.find(*it);
if ( pos != string::npos && (p.first == 0 || bytes + pos < p.first) )
{
p.first = bytes + pos;
p.second = it->size();
}
if ( ! isprint(bytes[i]) )
return escape_pos(bytes + i, 1);
size_t len = StartsWithEscapeSequence(bytes + i, bytes + n);
if ( len )
return escape_pos(bytes + i, len);
}
return p;
return escape_pos(0, 0);
}
void ODesc::AddBytes(const void* bytes, unsigned int n)
@ -266,21 +282,11 @@ void ODesc::AddBytes(const void* bytes, unsigned int n)
while ( s < e )
{
pair<const char*, size_t> p = FirstEscapeLoc(s, e - s);
if ( p.first )
{
AddBytesRaw(s, p.first - s);
if ( p.second == 1 )
{
char hex[6] = "\\x00";
hex[2] = hex_chars[((*p.first) & 0xf0) >> 4];
hex[3] = hex_chars[(*p.first) & 0x0f];
AddBytesRaw(hex, 4);
}
else
{
string esc_str = get_escaped_string(string(p.first, p.second), true);
AddBytesRaw(esc_str.c_str(), esc_str.size());
}
get_escaped_string(this, p.first, p.second, true);
s = p.first + p.second;
}
else

View file

@ -4,7 +4,7 @@
#define descriptor_h
#include <stdio.h>
#include <list>
#include <set>
#include <utility>
#include "BroString.h"
@ -54,16 +54,16 @@ public:
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
void EnableEscaping();
void AddEscapeSequence(const char* s) { escape_sequences.push_back(s); }
void AddEscapeSequence(const char* s) { escape_sequences.insert(s); }
void AddEscapeSequence(const char* s, size_t n)
{ escape_sequences.push_back(string(s, n)); }
{ escape_sequences.insert(string(s, n)); }
void AddEscapeSequence(const string & s)
{ escape_sequences.push_back(s); }
void RemoveEscapeSequence(const char* s) { escape_sequences.remove(s); }
{ escape_sequences.insert(s); }
void RemoveEscapeSequence(const char* s) { escape_sequences.erase(s); }
void RemoveEscapeSequence(const char* s, size_t n)
{ escape_sequences.remove(string(s, n)); }
{ escape_sequences.erase(string(s, n)); }
void RemoveEscapeSequence(const string & s)
{ escape_sequences.remove(s); }
{ escape_sequences.erase(s); }
void PushIndent();
void PopIndent();
@ -163,6 +163,15 @@ protected:
*/
pair<const char*, size_t> FirstEscapeLoc(const char* bytes, size_t n);
/**
* @param start start of string to check for starting with an espace
* sequence.
* @param end one byte past the last character in the string.
* @return The number of bytes in the escape sequence that the string
* starts with.
*/
size_t StartsWithEscapeSequence(const char* start, const char* end);
desc_type type;
desc_style style;
@ -171,7 +180,8 @@ protected:
unsigned int size; // size of buffer in bytes
bool escape; // escape unprintable characters in output?
list<string> escape_sequences; // additional sequences of chars to escape
typedef set<string> escape_set;
escape_set escape_sequences; // additional sequences of chars to escape
BroFile* f; // or the file we're using.

View file

@ -335,10 +335,10 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
if ( strncmp(bytes, meta_prefix.data(), meta_prefix.size()) == 0 )
{
// It would so escape the first character.
char buf[16];
snprintf(buf, sizeof(buf), "\\x%02x", bytes[0]);
char hex[4] = {'\\', 'x', '0', '0'};
bytetohex(bytes[0], hex + 2);
if ( ! safe_write(fd, buf, strlen(buf)) )
if ( ! safe_write(fd, hex, 4) )
goto write_error;
++bytes;

View file

@ -122,10 +122,8 @@ bool Ascii::Describe(ODesc* desc, threading::Value* val, const string& name) con
// place-holder we use for unset optional fields. We
// escape the first character so that the output
// won't be ambigious.
static const char hex_chars[] = "0123456789abcdef";
char hex[6] = "\\x00";
hex[2] = hex_chars[((*data) & 0xf0) >> 4];
hex[3] = hex_chars[(*data) & 0x0f];
char hex[4] = {'\\', 'x', '0', '0'};
bytetohex(*data, hex + 2);
desc->AddRaw(hex, 4);
++data;

View file

@ -160,10 +160,11 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
// 2byte Unicode escape special characters.
if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' )
{
static const char hex_chars[] = "0123456789abcdef";
desc->AddRaw("\\u00", 4);
desc->AddRaw(&hex_chars[(c & 0xf0) >> 4], 1);
desc->AddRaw(&hex_chars[c & 0x0f], 1);
char hex[2] = {'0', '0'};
bytetohex(c, hex);
desc->AddRaw(hex, 1);
desc->AddRaw(hex + 1, 1);
}
else
desc->AddRaw(&c, 1);

View file

@ -120,31 +120,41 @@ std::string get_unescaped_string(const std::string& arg_str)
* Takes a string, escapes characters into equivalent hex codes (\x##), and
* returns a string containing all escaped values.
*
* @param d an ODesc object to store the escaped hex version of the string,
* if null one will be allocated and returned from the function.
* @param str string to escape
* @param escape_all If true, all characters are escaped. If false, only
* characters are escaped that are either whitespace or not printable in
* ASCII.
* @return A std::string containing a list of escaped hex values of the form
* \x## */
std::string get_escaped_string(const std::string& str, bool escape_all)
* @return A ODesc object containing a list of escaped hex values of the form
* \x##, which may be newly allocated if \a d was a null pointer. */
ODesc* get_escaped_string(ODesc* d, const char* str, size_t len,
bool escape_all)
{
char tbuf[16];
string esc = "";
if ( ! d )
d = new ODesc();
for ( size_t i = 0; i < str.length(); ++i )
for ( size_t i = 0; i < len; ++i )
{
char c = str[i];
if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) )
{
snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
esc += tbuf;
char hex[4] = {'\\', 'x', '0', '0' };
bytetohex(c, hex + 2);
d->AddRaw(hex, 4);
}
else
esc += c;
d->AddRaw(&c, 1);
}
return esc;
return d;
}
std::string get_escaped_string(const char* str, size_t len, bool escape_all)
{
ODesc d;
return get_escaped_string(&d, str, len, escape_all)->Description();
}
char* copy_string(const char* s)

View file

@ -102,8 +102,25 @@ void delete_each(T* t)
std::string extract_ip(const std::string& i);
std::string extract_ip_and_len(const std::string& i, int* len);
inline void bytetohex(unsigned char byte, char* hex_out)
{
static const char hex_chars[] = "0123456789abcdef";
hex_out[0] = hex_chars[(byte & 0xf0) >> 4];
hex_out[1] = hex_chars[byte & 0x0f];
}
std::string get_unescaped_string(const std::string& str);
std::string get_escaped_string(const std::string& str, bool escape_all);
class ODesc;
ODesc* get_escaped_string(ODesc* d, const char* str, size_t len,
bool escape_all);
std::string get_escaped_string(const char* str, size_t len, bool escape_all);
inline std::string get_escaped_string(const std::string& str, bool escape_all)
{
return get_escaped_string(str.data(), str.length(), escape_all);
}
std::vector<std::string>* tokenize_string(std::string input,
const std::string& delim,