Use json_escape_utf8 for all utf8 data in ODesc

This commit is contained in:
Tim Wojtulewicz 2021-08-30 16:14:49 -07:00
parent a4ceb98bf8
commit 404fed6923
6 changed files with 44 additions and 44 deletions

View file

@ -251,11 +251,8 @@ size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end)
if ( escape_sequences.empty() )
return 0;
escape_set::const_iterator it;
for ( it = escape_sequences.begin(); it != escape_sequences.end(); ++it )
for ( const auto& esc_str : escape_sequences )
{
const std::string& esc_str = *it;
size_t esc_len = esc_str.length();
if ( start + esc_len > end )
@ -289,33 +286,9 @@ std::pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n
if ( len )
return escape_pos(bytes + i, len);
if ( ! printable && utf8 )
{
size_t utf_found = getNumBytesForUTF8(bytes[i]);
if ( utf_found == 1 )
return escape_pos(bytes + i, 1);
if ( i + utf_found > n )
// Don't know if this is even meant to be a utf8 encoding,
// since there's not enough bytes left to check it's a valid
// sequence, so maybe safest to just move up by one instead
// of escaping the entire remainder.
return escape_pos(bytes + i, 1);
if ( isLegalUTF8Sequence(reinterpret_cast<const unsigned char *>(bytes + i),
reinterpret_cast<const unsigned char *>(bytes + i + utf_found)) )
{
i += utf_found - 1;
continue;
}
return escape_pos(bytes + i, 1);
}
}
return escape_pos(0, 0);
return escape_pos(nullptr, 0);
}
void ODesc::AddBytes(const void* bytes, unsigned int n)
@ -331,17 +304,31 @@ void ODesc::AddBytes(const void* bytes, unsigned int n)
while ( s < e )
{
std::pair<const char*, size_t> p = FirstEscapeLoc(s, e - s);
auto [ esc_start, esc_len ] = FirstEscapeLoc(s, e - s);
if ( p.first )
if ( esc_start != nullptr )
{
AddBytesRaw(s, p.first - s);
util::get_escaped_string(this, p.first, p.second, true);
s = p.first + p.second;
if ( utf8 )
{
std::string result = util::json_escape_utf8(s, esc_start - s, false);
AddBytesRaw(result.c_str(), result.size());
}
else
AddBytesRaw(s, esc_start - s);
util::get_escaped_string(this, esc_start, esc_len, true);
s = esc_start + esc_len;
}
else
{
if ( utf8 )
{
std::string result = util::json_escape_utf8(s, e - s, false);
AddBytesRaw(result.c_str(), result.size());
}
else
AddBytesRaw(s, e - s);
break;
}
}

View file

@ -2439,10 +2439,14 @@ static bool check_ok_utf8(const unsigned char* start, const unsigned char* end)
return true;
}
string json_escape_utf8(const string& val)
string json_escape_utf8(const string& val, bool escape_printable_controls)
{
auto val_data = reinterpret_cast<const unsigned char*>(val.c_str());
auto val_size = val.length();
return json_escape_utf8(val.c_str(), val.size(), escape_printable_controls);
}
string json_escape_utf8(const char* val, size_t val_size, bool escape_printable_controls)
{
auto val_data = reinterpret_cast<const unsigned char*>(val);
// Reserve at least the size of the existing string to avoid resizing the string in the best-case
// scenario where we don't have any multi-byte characters. We keep two versions of this string:
@ -2464,7 +2468,7 @@ string json_escape_utf8(const string& val)
// Normal ASCII characters plus a few of the control characters can be inserted directly. The
// rest of the control characters should be escaped as regular bytes.
if ( ( ch >= 32 && ch < 127 ) ||
ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' )
( escape_printable_controls && ( ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' ) ) )
{
if ( ! found_bad )
utf_result.push_back(ch);

View file

@ -550,7 +550,16 @@ char* zeekenv(const char* name);
* @param val the input string to be escaped
* @return the escaped string
*/
std::string json_escape_utf8(const std::string& val);
std::string json_escape_utf8(const std::string& val, bool escape_printable_controls=true);
/**
* Escapes bytes in a string that are not valid UTF8 characters with \xYY format. Used
* by the JSON writer and BIF methods.
* @param val the character data to be escaped
* @param val_size the length of the character data
* @return the escaped string
*/
std::string json_escape_utf8(const char* val, size_t val_size, bool escape_printable_controls=true);
} // namespace util
} // namespace zeek

View file

@ -7,6 +7,6 @@
#open XXXX-XX-XX-XX-XX-XX
#fields ts id.src_ip id.src_p id.dst_ip id.dst_p sensor_id signature_id signature generator_id generator signature_revision classification_id classification priority_id event_id packet
#types time addr port addr port count count string count string count count string count count string
XXXXXXXXXX.XXXXXX 192.168.1.72 50185 74.125.225.49 80 0 2003058 ET MALWARE 180solutions (Zango) Spyware Installer Download 1 snort general alert 5 21 trojan-activity 1 2 \xd80bHŵx\xca9\xb7\xe4r\x08\x00E\x10\x00\\\x1a\xce@\x00@\x062\x1f\xc0\xa8\x01HJ}\xe11\xc4\x09\x00P*\xa8bv]z/ހ\x18\x82+\x88,\x00\x00\x01\x01\x08\x0a\x17J\x83Q\xfe\xad\xac\x1aGET /Zango/ZangoInstaller.exe HTTP/1.0\x0d\x0a
XXXXXXXXXX.XXXXXX 192.168.1.72 49862 199.47.216.144 80 0 2012647 ET POLICY Dropbox.com Offsite File Backup in Use 1 snort general alert 3 33 policy-violation 1 3 \xd80bHŵx\xca9\xb7\xe4r\x08\x00E\x00\x00\xf8Q\xdf@\x00@\x06\x86p\xc0\xa8\x01H\xc7/ؐ\xc2\xc6\x00P\x9cm\x97U\xf07\x084\x80\x18\x82\x18%<\x00\x00\x01\x01\x08\x0a\x17J\xd7\xde\x00\x92\x81\xc5GET /subscribe?host_int=43112345&ns_map=123456_1234524412104916591&ts=1323827344 HTTP/1.1\x0d\x0aHost: notify1.dropbox.com\x0d\x0aAccept-Encoding: identity\x0d\x0aConnection: keep-alive\x0d\x0aX-Dropbox-Locale: en_US\x0d\x0a\x0d\x0a
XXXXXXXXXX.XXXXXX 192.168.1.72 50185 74.125.225.49 80 0 2003058 ET MALWARE 180solutions (Zango) Spyware Installer Download 1 snort general alert 5 21 trojan-activity 1 2 \xd80bH\xc5\xb5x\xca9\xb7\xe4r\x08\x00E\x10\x00\\\x1a\xce@\x00@\x062\x1f\xc0\xa8\x01HJ}\xe11\xc4\x09\x00P*\xa8bv]z/\xde\x80\x18\x82+\x88,\x00\x00\x01\x01\x08\x0a\x17J\x83Q\xfe\xad\xac\x1aGET /Zango/ZangoInstaller.exe HTTP/1.0\x0d\x0a
XXXXXXXXXX.XXXXXX 192.168.1.72 49862 199.47.216.144 80 0 2012647 ET POLICY Dropbox.com Offsite File Backup in Use 1 snort general alert 3 33 policy-violation 1 3 \xd80bH\xc5\xb5x\xca9\xb7\xe4r\x08\x00E\x00\x00\xf8Q\xdf@\x00@\x06\x86p\xc0\xa8\x01H\xc7/\xd8\x90\xc2\xc6\x00P\x9cm\x97U\xf07\x084\x80\x18\x82\x18%<\x00\x00\x01\x01\x08\x0a\x17J\xd7\xde\x00\x92\x81\xc5GET /subscribe?host_int=43112345&ns_map=123456_1234524412104916591&ts=1323827344 HTTP/1.1\x0d\x0aHost: notify1.dropbox.com\x0d\x0aAccept-Encoding: identity\x0d\x0aConnection: keep-alive\x0d\x0aX-Dropbox-Locale: en_US\x0d\x0a\x0d\x0a
#close XXXX-XX-XX-XX-XX-XX

View file

@ -1 +1 @@
465b0bb55451934b211dda72ad388496b3a2d1d7
fe19edc8e08e1f4cc41a5166ee7b7bf1b6e71bc5

View file

@ -1 +1 @@
4b88837c49ade5d9fd980d5e6cf02ec91d19a3bb
9e15189e3d4356e98bb1b155da282de2375ac80f