mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Use getNumBytesForUTF8 method to determine number of bytes
This commit is contained in:
parent
6fcb23066d
commit
c59a7279f0
1 changed files with 2 additions and 24 deletions
26
src/Desc.cc
26
src/Desc.cc
|
@ -257,28 +257,6 @@ size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t check_utf8 (const char* bytes, size_t n, size_t i)
|
|
||||||
{
|
|
||||||
// Check if this is infact a multibyte UTF-8 sequence,
|
|
||||||
// which requires a 1 to be the first bit of the first byte
|
|
||||||
if (!(bytes[i] >> 7 & 1))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
// Checks two to four bytes from starting position i
|
|
||||||
// and returns the length of the valid utf-8 sequence
|
|
||||||
size_t num_to_check = ((n-i+1) < 4) ? (n-i+1) : 4;
|
|
||||||
|
|
||||||
for (size_t j = 1; j <= num_to_check; ++j)
|
|
||||||
{
|
|
||||||
if (isLegalUTF8Sequence(reinterpret_cast<const unsigned char *>(bytes+i), reinterpret_cast<const unsigned char *>(bytes+i+j) ))
|
|
||||||
{
|
|
||||||
return j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
||||||
{
|
{
|
||||||
typedef pair<const char*, size_t> escape_pos;
|
typedef pair<const char*, size_t> escape_pos;
|
||||||
|
@ -300,8 +278,8 @@ pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
||||||
{
|
{
|
||||||
if (utf8)
|
if (utf8)
|
||||||
{
|
{
|
||||||
size_t utf_found = check_utf8(bytes, n, i);
|
size_t utf_found = getNumBytesForUTF8(bytes[i]);
|
||||||
if (utf_found)
|
if (utf_found > 1 && utf_found < (n-i+1) && isLegalUTF8Sequence(reinterpret_cast<const unsigned char *>(bytes+i), reinterpret_cast<const unsigned char *>(bytes+i+utf_found) ))
|
||||||
{
|
{
|
||||||
i += utf_found - 1;
|
i += utf_found - 1;
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue