mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Refactor various hex escaping code.
This commit is contained in:
parent
80d7a1482c
commit
e8a5ea8844
7 changed files with 102 additions and 60 deletions
68
src/Desc.cc
68
src/Desc.cc
|
@ -216,18 +216,32 @@ void ODesc::Indent()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char hex_chars[] = "0123456789abcdef";
|
static bool starts_with(const char* str1, const char* str2, size_t len)
|
||||||
|
|
||||||
static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned int n)
|
|
||||||
{
|
{
|
||||||
if ( d->IsBinary() )
|
for ( size_t i = 0; i < len; ++i )
|
||||||
|
if ( str1[i] != str2[i] )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end)
|
||||||
|
{
|
||||||
|
if ( escape_sequences.empty() )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
while ( n-- )
|
escape_set::const_iterator it;
|
||||||
|
|
||||||
|
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
|
||||||
{
|
{
|
||||||
if ( ! isprint(*bytes) )
|
const string& esc_str = *it;
|
||||||
return bytes;
|
size_t esc_len = esc_str.length();
|
||||||
++bytes;
|
|
||||||
|
if ( start + esc_len > end )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ( starts_with(start, esc_str.c_str(), esc_len) )
|
||||||
|
return esc_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -235,21 +249,23 @@ static const char* find_first_unprintable(ODesc* d, const char* bytes, unsigned
|
||||||
|
|
||||||
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
||||||
{
|
{
|
||||||
pair<const char*, size_t> p(find_first_unprintable(this, bytes, n), 1);
|
typedef pair<const char*, size_t> escape_pos;
|
||||||
|
|
||||||
string str(bytes, n);
|
if ( IsBinary() )
|
||||||
list<string>::const_iterator it;
|
return escape_pos(0, 0);
|
||||||
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
|
|
||||||
|
for ( size_t i = 0; i < n; ++i )
|
||||||
{
|
{
|
||||||
size_t pos = str.find(*it);
|
if ( ! isprint(bytes[i]) )
|
||||||
if ( pos != string::npos && (p.first == 0 || bytes + pos < p.first) )
|
return escape_pos(bytes + i, 1);
|
||||||
{
|
|
||||||
p.first = bytes + pos;
|
size_t len = StartsWithEscapeSequence(bytes + i, bytes + n);
|
||||||
p.second = it->size();
|
|
||||||
}
|
if ( len )
|
||||||
|
return escape_pos(bytes + i, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
return p;
|
return escape_pos(0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ODesc::AddBytes(const void* bytes, unsigned int n)
|
void ODesc::AddBytes(const void* bytes, unsigned int n)
|
||||||
|
@ -266,21 +282,11 @@ void ODesc::AddBytes(const void* bytes, unsigned int n)
|
||||||
while ( s < e )
|
while ( s < e )
|
||||||
{
|
{
|
||||||
pair<const char*, size_t> p = FirstEscapeLoc(s, e - s);
|
pair<const char*, size_t> p = FirstEscapeLoc(s, e - s);
|
||||||
|
|
||||||
if ( p.first )
|
if ( p.first )
|
||||||
{
|
{
|
||||||
AddBytesRaw(s, p.first - s);
|
AddBytesRaw(s, p.first - s);
|
||||||
if ( p.second == 1 )
|
get_escaped_string(this, p.first, p.second, true);
|
||||||
{
|
|
||||||
char hex[6] = "\\x00";
|
|
||||||
hex[2] = hex_chars[((*p.first) & 0xf0) >> 4];
|
|
||||||
hex[3] = hex_chars[(*p.first) & 0x0f];
|
|
||||||
AddBytesRaw(hex, 4);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
string esc_str = get_escaped_string(string(p.first, p.second), true);
|
|
||||||
AddBytesRaw(esc_str.c_str(), esc_str.size());
|
|
||||||
}
|
|
||||||
s = p.first + p.second;
|
s = p.first + p.second;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
26
src/Desc.h
26
src/Desc.h
|
@ -4,7 +4,7 @@
|
||||||
#define descriptor_h
|
#define descriptor_h
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <list>
|
#include <set>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "BroString.h"
|
#include "BroString.h"
|
||||||
|
@ -54,16 +54,16 @@ public:
|
||||||
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
|
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
|
||||||
|
|
||||||
void EnableEscaping();
|
void EnableEscaping();
|
||||||
void AddEscapeSequence(const char* s) { escape_sequences.push_back(s); }
|
void AddEscapeSequence(const char* s) { escape_sequences.insert(s); }
|
||||||
void AddEscapeSequence(const char* s, size_t n)
|
void AddEscapeSequence(const char* s, size_t n)
|
||||||
{ escape_sequences.push_back(string(s, n)); }
|
{ escape_sequences.insert(string(s, n)); }
|
||||||
void AddEscapeSequence(const string & s)
|
void AddEscapeSequence(const string & s)
|
||||||
{ escape_sequences.push_back(s); }
|
{ escape_sequences.insert(s); }
|
||||||
void RemoveEscapeSequence(const char* s) { escape_sequences.remove(s); }
|
void RemoveEscapeSequence(const char* s) { escape_sequences.erase(s); }
|
||||||
void RemoveEscapeSequence(const char* s, size_t n)
|
void RemoveEscapeSequence(const char* s, size_t n)
|
||||||
{ escape_sequences.remove(string(s, n)); }
|
{ escape_sequences.erase(string(s, n)); }
|
||||||
void RemoveEscapeSequence(const string & s)
|
void RemoveEscapeSequence(const string & s)
|
||||||
{ escape_sequences.remove(s); }
|
{ escape_sequences.erase(s); }
|
||||||
|
|
||||||
void PushIndent();
|
void PushIndent();
|
||||||
void PopIndent();
|
void PopIndent();
|
||||||
|
@ -163,6 +163,15 @@ protected:
|
||||||
*/
|
*/
|
||||||
pair<const char*, size_t> FirstEscapeLoc(const char* bytes, size_t n);
|
pair<const char*, size_t> FirstEscapeLoc(const char* bytes, size_t n);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param start start of string to check for starting with an espace
|
||||||
|
* sequence.
|
||||||
|
* @param end one byte past the last character in the string.
|
||||||
|
* @return The number of bytes in the escape sequence that the string
|
||||||
|
* starts with.
|
||||||
|
*/
|
||||||
|
size_t StartsWithEscapeSequence(const char* start, const char* end);
|
||||||
|
|
||||||
desc_type type;
|
desc_type type;
|
||||||
desc_style style;
|
desc_style style;
|
||||||
|
|
||||||
|
@ -171,7 +180,8 @@ protected:
|
||||||
unsigned int size; // size of buffer in bytes
|
unsigned int size; // size of buffer in bytes
|
||||||
|
|
||||||
bool escape; // escape unprintable characters in output?
|
bool escape; // escape unprintable characters in output?
|
||||||
list<string> escape_sequences; // additional sequences of chars to escape
|
typedef set<string> escape_set;
|
||||||
|
escape_set escape_sequences; // additional sequences of chars to escape
|
||||||
|
|
||||||
BroFile* f; // or the file we're using.
|
BroFile* f; // or the file we're using.
|
||||||
|
|
||||||
|
|
|
@ -335,10 +335,10 @@ bool Ascii::DoWrite(int num_fields, const Field* const * fields,
|
||||||
if ( strncmp(bytes, meta_prefix.data(), meta_prefix.size()) == 0 )
|
if ( strncmp(bytes, meta_prefix.data(), meta_prefix.size()) == 0 )
|
||||||
{
|
{
|
||||||
// It would so escape the first character.
|
// It would so escape the first character.
|
||||||
char buf[16];
|
char hex[4] = {'\\', 'x', '0', '0'};
|
||||||
snprintf(buf, sizeof(buf), "\\x%02x", bytes[0]);
|
bytetohex(bytes[0], hex + 2);
|
||||||
|
|
||||||
if ( ! safe_write(fd, buf, strlen(buf)) )
|
if ( ! safe_write(fd, hex, 4) )
|
||||||
goto write_error;
|
goto write_error;
|
||||||
|
|
||||||
++bytes;
|
++bytes;
|
||||||
|
|
|
@ -122,10 +122,8 @@ bool Ascii::Describe(ODesc* desc, threading::Value* val, const string& name) con
|
||||||
// place-holder we use for unset optional fields. We
|
// place-holder we use for unset optional fields. We
|
||||||
// escape the first character so that the output
|
// escape the first character so that the output
|
||||||
// won't be ambigious.
|
// won't be ambigious.
|
||||||
static const char hex_chars[] = "0123456789abcdef";
|
char hex[4] = {'\\', 'x', '0', '0'};
|
||||||
char hex[6] = "\\x00";
|
bytetohex(*data, hex + 2);
|
||||||
hex[2] = hex_chars[((*data) & 0xf0) >> 4];
|
|
||||||
hex[3] = hex_chars[(*data) & 0x0f];
|
|
||||||
desc->AddRaw(hex, 4);
|
desc->AddRaw(hex, 4);
|
||||||
|
|
||||||
++data;
|
++data;
|
||||||
|
|
|
@ -160,10 +160,11 @@ bool JSON::Describe(ODesc* desc, Value* val, const string& name) const
|
||||||
// 2byte Unicode escape special characters.
|
// 2byte Unicode escape special characters.
|
||||||
if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' )
|
if ( c < 32 || c > 126 || c == '\n' || c == '"' || c == '\'' || c == '\\' || c == '&' )
|
||||||
{
|
{
|
||||||
static const char hex_chars[] = "0123456789abcdef";
|
|
||||||
desc->AddRaw("\\u00", 4);
|
desc->AddRaw("\\u00", 4);
|
||||||
desc->AddRaw(&hex_chars[(c & 0xf0) >> 4], 1);
|
char hex[2] = {'0', '0'};
|
||||||
desc->AddRaw(&hex_chars[c & 0x0f], 1);
|
bytetohex(c, hex);
|
||||||
|
desc->AddRaw(hex, 1);
|
||||||
|
desc->AddRaw(hex + 1, 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
desc->AddRaw(&c, 1);
|
desc->AddRaw(&c, 1);
|
||||||
|
|
30
src/util.cc
30
src/util.cc
|
@ -120,31 +120,41 @@ std::string get_unescaped_string(const std::string& arg_str)
|
||||||
* Takes a string, escapes characters into equivalent hex codes (\x##), and
|
* Takes a string, escapes characters into equivalent hex codes (\x##), and
|
||||||
* returns a string containing all escaped values.
|
* returns a string containing all escaped values.
|
||||||
*
|
*
|
||||||
|
* @param d an ODesc object to store the escaped hex version of the string,
|
||||||
|
* if null one will be allocated and returned from the function.
|
||||||
* @param str string to escape
|
* @param str string to escape
|
||||||
* @param escape_all If true, all characters are escaped. If false, only
|
* @param escape_all If true, all characters are escaped. If false, only
|
||||||
* characters are escaped that are either whitespace or not printable in
|
* characters are escaped that are either whitespace or not printable in
|
||||||
* ASCII.
|
* ASCII.
|
||||||
* @return A std::string containing a list of escaped hex values of the form
|
* @return A ODesc object containing a list of escaped hex values of the form
|
||||||
* \x## */
|
* \x##, which may be newly allocated if \a d was a null pointer. */
|
||||||
std::string get_escaped_string(const std::string& str, bool escape_all)
|
ODesc* get_escaped_string(ODesc* d, const char* str, size_t len,
|
||||||
|
bool escape_all)
|
||||||
{
|
{
|
||||||
char tbuf[16];
|
if ( ! d )
|
||||||
string esc = "";
|
d = new ODesc();
|
||||||
|
|
||||||
for ( size_t i = 0; i < str.length(); ++i )
|
for ( size_t i = 0; i < len; ++i )
|
||||||
{
|
{
|
||||||
char c = str[i];
|
char c = str[i];
|
||||||
|
|
||||||
if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) )
|
if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) )
|
||||||
{
|
{
|
||||||
snprintf(tbuf, sizeof(tbuf), "\\x%02x", str[i]);
|
char hex[4] = {'\\', 'x', '0', '0' };
|
||||||
esc += tbuf;
|
bytetohex(c, hex + 2);
|
||||||
|
d->AddRaw(hex, 4);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
esc += c;
|
d->AddRaw(&c, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return esc;
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_escaped_string(const char* str, size_t len, bool escape_all)
|
||||||
|
{
|
||||||
|
ODesc d;
|
||||||
|
return get_escaped_string(&d, str, len, escape_all)->Description();
|
||||||
}
|
}
|
||||||
|
|
||||||
char* copy_string(const char* s)
|
char* copy_string(const char* s)
|
||||||
|
|
19
src/util.h
19
src/util.h
|
@ -102,8 +102,25 @@ void delete_each(T* t)
|
||||||
std::string extract_ip(const std::string& i);
|
std::string extract_ip(const std::string& i);
|
||||||
std::string extract_ip_and_len(const std::string& i, int* len);
|
std::string extract_ip_and_len(const std::string& i, int* len);
|
||||||
|
|
||||||
|
inline void bytetohex(unsigned char byte, char* hex_out)
|
||||||
|
{
|
||||||
|
static const char hex_chars[] = "0123456789abcdef";
|
||||||
|
hex_out[0] = hex_chars[(byte & 0xf0) >> 4];
|
||||||
|
hex_out[1] = hex_chars[byte & 0x0f];
|
||||||
|
}
|
||||||
|
|
||||||
std::string get_unescaped_string(const std::string& str);
|
std::string get_unescaped_string(const std::string& str);
|
||||||
std::string get_escaped_string(const std::string& str, bool escape_all);
|
|
||||||
|
class ODesc;
|
||||||
|
|
||||||
|
ODesc* get_escaped_string(ODesc* d, const char* str, size_t len,
|
||||||
|
bool escape_all);
|
||||||
|
std::string get_escaped_string(const char* str, size_t len, bool escape_all);
|
||||||
|
|
||||||
|
inline std::string get_escaped_string(const std::string& str, bool escape_all)
|
||||||
|
{
|
||||||
|
return get_escaped_string(str.data(), str.length(), escape_all);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<std::string>* tokenize_string(std::string input,
|
std::vector<std::string>* tokenize_string(std::string input,
|
||||||
const std::string& delim,
|
const std::string& delim,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue