Optimize json_escape_utf8 a bit by removing repeated calls to string methods

This commit is contained in:
Tim Wojtulewicz 2020-01-08 12:08:32 -07:00
parent ee0619f999
commit 23f551876c
2 changed files with 26 additions and 22 deletions

View file

@ -2248,50 +2248,54 @@ TEST_CASE("util json_escape_utf8")
string json_escape_utf8(const string& val) string json_escape_utf8(const string& val)
{ {
string result;
result.reserve(val.length());
auto val_data = reinterpret_cast<const unsigned char*>(val.c_str()); auto val_data = reinterpret_cast<const unsigned char*>(val.c_str());
auto val_size = val.length();
// Reserve at least the size of the existing string to avoid resizing the string in the best-case
// scenario where we don't have any multi-byte characters.
string result;
result.reserve(val_size);
size_t idx; size_t idx;
for ( idx = 0; idx < val.length(); ) for ( idx = 0; idx < val_size; )
{ {
// Normal ASCII characters plus a few of the control characters can be inserted directly. The rest of char ch = val[idx];
// the control characters should be escaped as regular bytes.
if ( ( val[idx] >= 32 && val[idx] <= 127 ) || // Normal ASCII characters plus a few of the control characters can be inserted directly. The
val[idx] == '\b' || val[idx] == '\f' || val[idx] == '\n' || val[idx] == '\r' || val[idx] == '\t' ) // rest of the control characters should be escaped as regular bytes.
if ( ( ch >= 32 && ch <= 127 ) ||
ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t' )
{ {
result.push_back(val[idx]); result.push_back(ch);
++idx; ++idx;
continue; continue;
} }
else if ( val[idx] >= 0 && val[idx] < 32 ) else if ( ch >= 0 && ch < 32 )
{ {
result.append(json_escape_byte(val[idx])); result.append(json_escape_byte(ch));
++idx; ++idx;
continue; continue;
} }
// Find out how long the next character should be. // Find out how long the next character should be.
unsigned int char_size = getNumBytesForUTF8(val[idx]); unsigned int char_size = getNumBytesForUTF8(ch);
// If it says that it's a single character or it's not an invalid string UTF8 sequence, insert the one // If it says that it's a single character or it's not an valid string UTF8 sequence, insert
// escaped byte into the string, step forward one, and go to the next character. // the one escaped byte into the string, step forward one, and go to the next character.
if ( char_size == 0 || idx+char_size > val.length() || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 ) if ( char_size == 0 || idx+char_size > val_size || isLegalUTF8Sequence(val_data+idx, val_data+idx+char_size) == 0 )
{ {
result.append(json_escape_byte(val[idx])); result.append(json_escape_byte(ch));
++idx; ++idx;
continue; continue;
} }
for ( size_t step = 0; step < char_size; step++, idx++ ) result.append(val, idx, char_size);
result.push_back(val[idx]); idx += char_size;
} }
// Insert any of the remaining bytes into the string as escaped bytes // Insert any of the remaining bytes into the string as escaped bytes
if ( idx != val.length() ) for ( ; idx < val_size; ++idx )
for ( ; idx < val.length(); ++idx ) result.append(json_escape_byte(val[idx]));
result.append(json_escape_byte(val[idx]));
return result; return result;
} }

View file

@ -118,7 +118,7 @@ std::string extract_ip_and_len(const std::string& i, int* len);
inline void bytetohex(unsigned char byte, char* hex_out) inline void bytetohex(unsigned char byte, char* hex_out)
{ {
static const char hex_chars[] = "0123456789abcdef"; static constexpr char hex_chars[] = "0123456789abcdef";
hex_out[0] = hex_chars[(byte & 0xf0) >> 4]; hex_out[0] = hex_chars[(byte & 0xf0) >> 4];
hex_out[1] = hex_chars[byte & 0x0f]; hex_out[1] = hex_chars[byte & 0x0f];
} }