Refactor various hex escaping code.

This commit is contained in:
Jon Siwek 2014-04-18 13:19:50 -05:00
parent 80d7a1482c
commit e8a5ea8844
7 changed files with 102 additions and 60 deletions

View file

@ -216,18 +216,32 @@ void ODesc::Indent()
} }
} }
static const char hex_chars[] = "0123456789abcdef"; static bool starts_with(const char* str1, const char* str2, size_t len)
static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned int n)
{ {
if ( d->IsBinary() ) for ( size_t i = 0; i < len; ++i )
if ( str1[i] != str2[i] )
return false;
return true;
}
size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end)
{
if ( escape_sequences.empty() )
return 0; return 0;
while ( n-- ) escape_set::const_iterator it;
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
{ {
if ( ! isprint(*bytes) ) const string& esc_str = *it;
return bytes; size_t esc_len = esc_str.length();
++bytes;
if ( start + esc_len > end )
continue;
if ( starts_with(start, esc_str.c_str(), esc_len) )
return esc_len;
} }
return 0; return 0;
@ -235,21 +249,23 @@ static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n) pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
{ {
pair<const char*, size_t> p(find_first_unprintable(this, bytes, n), 1); typedef pair<const char*, size_t> escape_pos;
string str(bytes, n); if ( IsBinary() )
list<string>::const_iterator it; return escape_pos(0, 0);
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
for ( size_t i = 0; i < n; ++i )
{ {
size_t pos = str.find(*it); if ( ! isprint(bytes[i]) )
if ( pos != string::npos && (p.first == 0 || bytes + pos < p.first) ) return escape_pos(bytes + i, 1);
{
p.first = bytes + pos; size_t len = StartsWithEscapeSequence(bytes + i, bytes + n);
p.second = it->size();
} if ( len )
return escape_pos(bytes + i, len);
} }
return p; return escape_pos(0, 0);
} }
void ODesc::AddBytes(const void* bytes, unsigned int n) void ODesc::AddBytes(const void* bytes, unsigned int n)
@ -266,21 +282,11 @@ void ODesc::AddBytes(const void* bytes, unsigned int n)
while ( s < e ) while ( s < e )
{ {
pair<const char*, size_t> p = FirstEscapeLoc(s, e - s); pair<const char*, size_t> p = FirstEscapeLoc(s, e - s);
if ( p.first ) if ( p.first )
{ {
AddBytesRaw(s, p.first - s); AddBytesRaw(s, p.first - s);
if ( p.second == 1 ) get_escaped_string(this, p.first, p.second, true);
{
char hex[6] = "\\x00";
hex[2] = hex_chars[((*p.first) & 0xf0) >> 4];
hex[3] = hex_chars[(*p.first) & 0x0f];
AddBytesRaw(hex, 4);
}
else
{
string esc_str = get_escaped_string(string(p.first, p.second), true);
AddBytesRaw(esc_str.c_str(), esc_str.size());
}
s = p.first + p.second; s = p.first + p.second;
} }
else else

View file

@ -4,7 +4,7 @@
#define descriptor_h #define descriptor_h
#include <stdio.h> #include <stdio.h>
#include <list> #include <set>
#include <utility> #include <utility>
#include "BroString.h" #include "BroString.h"
@ -54,16 +54,16 @@ public:
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; } void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
void EnableEscaping(); void EnableEscaping();
void AddEscapeSequence(const char* s) { escape_sequences.push_back(s); } void AddEscapeSequence(const char* s) { escape_sequences.insert(s); }
void AddEscapeSequence(const char* s, size_t n) void AddEscapeSequence(const char* s, size_t n)
{ escape_sequences.push_back(string(s, n)); } { escape_sequences.insert(string(s, n)); }
void AddEscapeSequence(const string & s) void AddEscapeSequence(const string & s)
{ escape_sequences.push_back(s); } { escape_sequences.insert(s); }
void RemoveEscapeSequence(const char* s) { escape_sequences.remove(s); } void RemoveEscapeSequence(const char* s) { escape_sequences.erase(s); }
void RemoveEscapeSequence(const char* s, size_t n) void RemoveEscapeSequence(const char* s, size_t n)
{ escape_sequences.remove(string(s, n)); } { escape_sequences.erase(string(s, n)); }
void RemoveEscapeSequence(const string & s) void RemoveEscapeSequence(const string & s)
{ escape_sequences.remove(s); } { escape_sequences.erase(s); }
void PushIndent(); void PushIndent();
void PopIndent(); void PopIndent();
@ -163,6 +163,15 @@ protected:
*/ */
pair<const char*, size_t> FirstEscapeLoc(const char* bytes, size_t n); pair<const char*, size_t> FirstEscapeLoc(const char* bytes, size_t n);
/**
* @param start start of string to check for starting with an espace
* sequence.
* @param end one byte past the last character in the string.
* @return The number of bytes in the escape sequence that the string
* starts with.
*/
size_t StartsWithEscapeSequence(const char* start, const char* end);
desc_type type; desc_type type;
desc_style style; desc_style style;
@ -171,7 +180,8 @@ protected:
unsigned int size; // size of buffer in bytes unsigned int size; // size of buffer in bytes
bool escape; // escape unprintable characters in output? bool escape; // escape unprintable characters in output?
list<string> escape_sequences; // additional sequences of chars to escape typedef set<string> escape_set;
escape_set escape_sequences; // additional sequences of chars to escape
BroFile* f; // or the file we're using. BroFile* f; // or the file we're using.

View file

@ -335,10 +335,10 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
if ( strncmp(bytes, meta_prefix.data(), meta_prefix.size()) == 0 ) if ( strncmp(bytes, meta_prefix.data(), meta_prefix.size()) == 0 )
{ {
// It would so escape the first character. // It would so escape the first character.
char buf[16]; char hex[4] = {'\\', 'x', '0', '0'};
snprintf(buf, sizeof(buf), "\\x%02x", bytes[0]); bytetohex(bytes[0], hex + 2);
if ( ! safe_write(fd, buf, strlen(buf)) ) if ( ! safe_write(fd, hex, 4) )
goto write_error; goto write_error;
++bytes; ++bytes;

View file

@ -122,10 +122,8 @@ bool Ascii::Describe(ODesc* desc, threading::Value* val, const string& name) con
// place-holder we use for unset optional fields. We // place-holder we use for unset optional fields. We
// escape the first character so that the output // escape the first character so that the output
// won't be ambigious. // won't be ambigious.
static const char hex_chars[] = "0123456789abcdef"; char hex[4] = {'\\', 'x', '0', '0'};
char hex[6] = "\\x00"; bytetohex(*data, hex + 2);
hex[2] = hex_chars[((*data) & 0xf0) >> 4];
hex[3] = hex_chars[(*data) & 0x0f];
desc->AddRaw(hex, 4); desc->AddRaw(hex, 4);
++data; ++data;

View file

@ -160,10 +160,11 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
// 2byte Unicode escape special characters. // 2byte Unicode escape special characters.
if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' ) if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' )
{ {
static const char hex_chars[] = "0123456789abcdef";
desc->AddRaw("\\u00", 4); desc->AddRaw("\\u00", 4);
desc->AddRaw(&hex_chars[(c & 0xf0) >> 4], 1); char hex[2] = {'0', '0'};
desc->AddRaw(&hex_chars[c & 0x0f], 1); bytetohex(c, hex);
desc->AddRaw(hex, 1);
desc->AddRaw(hex + 1, 1);
} }
else else
desc->AddRaw(&c, 1); desc->AddRaw(&c, 1);

View file

@ -120,31 +120,41 @@ std::string get_unescaped_string(const std::string& arg_str)
* Takes a string, escapes characters into equivalent hex codes (\x##), and * Takes a string, escapes characters into equivalent hex codes (\x##), and
* returns a string containing all escaped values. * returns a string containing all escaped values.
* *
* @param d an ODesc object to store the escaped hex version of the string,
* if null one will be allocated and returned from the function.
* @param str string to escape * @param str string to escape
* @param escape_all If true, all characters are escaped. If false, only * @param escape_all If true, all characters are escaped. If false, only
* characters are escaped that are either whitespace or not printable in * characters are escaped that are either whitespace or not printable in
* ASCII. * ASCII.
* @return A std::string containing a list of escaped hex values of the form * @return A ODesc object containing a list of escaped hex values of the form
* \x## */ * \x##, which may be newly allocated if \a d was a null pointer. */
std::string get_escaped_string(const std::string& str, bool escape_all) ODesc* get_escaped_string(ODesc* d, const char* str, size_t len,
bool escape_all)
{ {
char tbuf[16]; if ( ! d )
string esc = ""; d = new ODesc();
for ( size_t i = 0; i < str.length(); ++i ) for ( size_t i = 0; i < len; ++i )
{ {
char c = str[i]; char c = str[i];
if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) ) if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) )
{ {
snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]); char hex[4] = {'\\', 'x', '0', '0' };
esc += tbuf; bytetohex(c, hex + 2);
d->AddRaw(hex, 4);
} }
else else
esc += c; d->AddRaw(&c, 1);
} }
return esc; return d;
}
std::string get_escaped_string(const char* str, size_t len, bool escape_all)
{
ODesc d;
return get_escaped_string(&d, str, len, escape_all)->Description();
} }
char* copy_string(const char* s) char* copy_string(const char* s)

View file

@ -102,8 +102,25 @@ void delete_each(T* t)
std::string extract_ip(const std::string& i); std::string extract_ip(const std::string& i);
std::string extract_ip_and_len(const std::string& i, int* len); std::string extract_ip_and_len(const std::string& i, int* len);
inline void bytetohex(unsigned char byte, char* hex_out)
{
static const char hex_chars[] = "0123456789abcdef";
hex_out[0] = hex_chars[(byte & 0xf0) >> 4];
hex_out[1] = hex_chars[byte & 0x0f];
}
std::string get_unescaped_string(const std::string& str); std::string get_unescaped_string(const std::string& str);
std::string get_escaped_string(const std::string& str, bool escape_all);
class ODesc;
ODesc* get_escaped_string(ODesc* d, const char* str, size_t len,
bool escape_all);
std::string get_escaped_string(const char* str, size_t len, bool escape_all);
inline std::string get_escaped_string(const std::string& str, bool escape_all)
{
return get_escaped_string(str.data(), str.length(), escape_all);
}
std::vector<std::string>* tokenize_string(std::string input, std::vector<std::string>* tokenize_string(std::string input,
const std::string& delim, const std::string& delim,