diff --git a/src/Desc.cc b/src/Desc.cc index 62c6130f40..f636a028b5 100644 --- a/src/Desc.cc +++ b/src/Desc.cc @@ -216,18 +216,32 @@ void ODesc::Indent() } } -static const char hex_chars[] = "0123456789abcdef"; - -static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned int n) +static bool starts_with(const char* str1, const char* str2, size_t len) { - if ( d->IsBinary() ) + for ( size_t i = 0; i < len; ++i ) + if ( str1[i] != str2[i] ) + return false; + + return true; + } + +size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end) + { + if ( escape_sequences.empty() ) return 0; - while ( n-- ) + escape_set::const_iterator it; + + for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it ) { - if ( ! isprint(*bytes) ) - return bytes; - ++bytes; + const string& esc_str = *it; + size_t esc_len = esc_str.length(); + + if ( start + esc_len > end ) + continue; + + if ( starts_with(start, esc_str.c_str(), esc_len) ) + return esc_len; } return 0; @@ -235,21 +249,23 @@ static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned pair ODesc::FirstEscapeLoc(const char* bytes, size_t n) { - pair p(find_first_unprintable(this, bytes, n), 1); + typedef pair escape_pos; - string str(bytes, n); - list::const_iterator it; - for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it ) + if ( IsBinary() ) + return escape_pos(0, 0); + + for ( size_t i = 0; i < n; ++i ) { - size_t pos = str.find(*it); - if ( pos != string::npos && (p.first == 0 || bytes + pos < p.first) ) - { - p.first = bytes + pos; - p.second = it->size(); - } + if ( ! isprint(bytes[i]) ) + return escape_pos(bytes + i, 1); + + size_t len = StartsWithEscapeSequence(bytes + i, bytes + n); + + if ( len ) + return escape_pos(bytes + i, len); } - return p; + return escape_pos(0, 0); } void ODesc::AddBytes(const void* bytes, unsigned int n) @@ -266,21 +282,11 @@ void ODesc::AddBytes(const void* bytes, unsigned int n) while ( s < e ) { pair p = FirstEscapeLoc(s, e - s); + if ( p.first ) { AddBytesRaw(s, p.first - s); - if ( p.second == 1 ) - { - char hex[6] = "\\x00"; - hex[2] = hex_chars[((*p.first) & 0xf0) >> 4]; - hex[3] = hex_chars[(*p.first) & 0x0f]; - AddBytesRaw(hex, 4); - } - else - { - string esc_str = get_escaped_string(string(p.first, p.second), true); - AddBytesRaw(esc_str.c_str(), esc_str.size()); - } + get_escaped_string(this, p.first, p.second, true); s = p.first + p.second; } else diff --git a/src/Desc.h b/src/Desc.h index 27dc326ff0..b7df7d75f7 100644 --- a/src/Desc.h +++ b/src/Desc.h @@ -4,7 +4,7 @@ #define descriptor_h #include -#include +#include #include #include "BroString.h" @@ -54,16 +54,16 @@ public: void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; } void EnableEscaping(); - void AddEscapeSequence(const char* s) { escape_sequences.push_back(s); } + void AddEscapeSequence(const char* s) { escape_sequences.insert(s); } void AddEscapeSequence(const char* s, size_t n) - { escape_sequences.push_back(string(s, n)); } + { escape_sequences.insert(string(s, n)); } void AddEscapeSequence(const string & s) - { escape_sequences.push_back(s); } - void RemoveEscapeSequence(const char* s) { escape_sequences.remove(s); } + { escape_sequences.insert(s); } + void RemoveEscapeSequence(const char* s) { escape_sequences.erase(s); } void RemoveEscapeSequence(const char* s, size_t n) - { escape_sequences.remove(string(s, n)); } + { escape_sequences.erase(string(s, n)); } void RemoveEscapeSequence(const string & s) - { escape_sequences.remove(s); } + { escape_sequences.erase(s); } void PushIndent(); void PopIndent(); @@ -163,6 +163,15 @@ protected: */ pair FirstEscapeLoc(const char* bytes, size_t n); + /** + * @param start start of string to check for starting with an espace + * sequence. + * @param end one byte past the last character in the string. + * @return The number of bytes in the escape sequence that the string + * starts with. + */ + size_t StartsWithEscapeSequence(const char* start, const char* end); + desc_type type; desc_style style; @@ -171,7 +180,8 @@ protected: unsigned int size; // size of buffer in bytes bool escape; // escape unprintable characters in output? - list escape_sequences; // additional sequences of chars to escape + typedef set escape_set; + escape_set escape_sequences; // additional sequences of chars to escape BroFile* f; // or the file we're using. diff --git a/src/logging/writers/Ascii.cc b/src/logging/writers/Ascii.cc index 43ffe47308..fe79089b04 100644 --- a/src/logging/writers/Ascii.cc +++ b/src/logging/writers/Ascii.cc @@ -335,10 +335,10 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields, if ( strncmp(bytes, meta_prefix.data(), meta_prefix.size()) == 0 ) { // It would so escape the first character. - char buf[16]; - snprintf(buf, sizeof(buf), "\\x%02x", bytes[0]); + char hex[4] = {'\\', 'x', '0', '0'}; + bytetohex(bytes[0], hex + 2); - if ( ! safe_write(fd, buf, strlen(buf)) ) + if ( ! safe_write(fd, hex, 4) ) goto write_error; ++bytes; diff --git a/src/threading/formatters/Ascii.cc b/src/threading/formatters/Ascii.cc index 3120549f13..6c114ff3fd 100644 --- a/src/threading/formatters/Ascii.cc +++ b/src/threading/formatters/Ascii.cc @@ -122,10 +122,8 @@ bool Ascii::Describe(ODesc* desc, threading::Value* val, const string& name) con // place-holder we use for unset optional fields. We // escape the first character so that the output // won't be ambigious. - static const char hex_chars[] = "0123456789abcdef"; - char hex[6] = "\\x00"; - hex[2] = hex_chars[((*data) & 0xf0) >> 4]; - hex[3] = hex_chars[(*data) & 0x0f]; + char hex[4] = {'\\', 'x', '0', '0'}; + bytetohex(*data, hex + 2); desc->AddRaw(hex, 4); ++data; diff --git a/src/threading/formatters/JSON.cc b/src/threading/formatters/JSON.cc index 17712e8d53..472023e0f8 100644 --- a/src/threading/formatters/JSON.cc +++ b/src/threading/formatters/JSON.cc @@ -160,10 +160,11 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const // 2byte Unicode escape special characters. if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' ) { - static const char hex_chars[] = "0123456789abcdef"; desc->AddRaw("\\u00", 4); - desc->AddRaw(&hex_chars[(c & 0xf0) >> 4], 1); - desc->AddRaw(&hex_chars[c & 0x0f], 1); + char hex[2] = {'0', '0'}; + bytetohex(c, hex); + desc->AddRaw(hex, 1); + desc->AddRaw(hex + 1, 1); } else desc->AddRaw(&c, 1); diff --git a/src/util.cc b/src/util.cc index 434783a340..6190067aa6 100644 --- a/src/util.cc +++ b/src/util.cc @@ -120,31 +120,41 @@ std::string get_unescaped_string(const std::string& arg_str) * Takes a string, escapes characters into equivalent hex codes (\x##), and * returns a string containing all escaped values. * + * @param d an ODesc object to store the escaped hex version of the string, + * if null one will be allocated and returned from the function. * @param str string to escape * @param escape_all If true, all characters are escaped. If false, only * characters are escaped that are either whitespace or not printable in * ASCII. - * @return A std::string containing a list of escaped hex values of the form - * \x## */ -std::string get_escaped_string(const std::string& str, bool escape_all) + * @return A ODesc object containing a list of escaped hex values of the form + * \x##, which may be newly allocated if \a d was a null pointer. */ +ODesc* get_escaped_string(ODesc* d, const char* str, size_t len, + bool escape_all) { - char tbuf[16]; - string esc = ""; + if ( ! d ) + d = new ODesc(); - for ( size_t i = 0; i < str.length(); ++i ) + for ( size_t i = 0; i < len; ++i ) { char c = str[i]; if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) ) { - snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]); - esc += tbuf; + char hex[4] = {'\\', 'x', '0', '0' }; + bytetohex(c, hex + 2); + d->AddRaw(hex, 4); } else - esc += c; + d->AddRaw(&c, 1); } - return esc; + return d; + } + +std::string get_escaped_string(const char* str, size_t len, bool escape_all) + { + ODesc d; + return get_escaped_string(&d, str, len, escape_all)->Description(); } char* copy_string(const char* s) diff --git a/src/util.h b/src/util.h index aebc8bbc43..c6b657b7a8 100644 --- a/src/util.h +++ b/src/util.h @@ -102,8 +102,25 @@ void delete_each(T* t) std::string extract_ip(const std::string& i); std::string extract_ip_and_len(const std::string& i, int* len); +inline void bytetohex(unsigned char byte, char* hex_out) + { + static const char hex_chars[] = "0123456789abcdef"; + hex_out[0] = hex_chars[(byte & 0xf0) >> 4]; + hex_out[1] = hex_chars[byte & 0x0f]; + } + std::string get_unescaped_string(const std::string& str); -std::string get_escaped_string(const std::string& str, bool escape_all); + +class ODesc; + +ODesc* get_escaped_string(ODesc* d, const char* str, size_t len, + bool escape_all); +std::string get_escaped_string(const char* str, size_t len, bool escape_all); + +inline std::string get_escaped_string(const std::string& str, bool escape_all) + { + return get_escaped_string(str.data(), str.length(), escape_all); + } std::vector* tokenize_string(std::string input, const std::string& delim,