mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Added optional script and redef bool to enable utf-8 in ASCII logs
This commit is contained in:
parent
6927dd1213
commit
66557d3178
10 changed files with 111 additions and 2 deletions
42
src/Desc.cc
42
src/Desc.cc
|
@ -10,6 +10,8 @@
|
|||
#include "File.h"
|
||||
#include "Reporter.h"
|
||||
|
||||
#include "ConvertUTF.h"
|
||||
|
||||
#define DEFAULT_SIZE 128
|
||||
#define SLOP 10
|
||||
|
||||
|
@ -39,6 +41,7 @@ ODesc::ODesc(desc_type t, BroFile* arg_f)
|
|||
include_stats = 0;
|
||||
indent_with_spaces = 0;
|
||||
escape = false;
|
||||
utf8 = false;
|
||||
}
|
||||
|
||||
ODesc::~ODesc()
|
||||
|
@ -57,6 +60,11 @@ void ODesc::EnableEscaping()
|
|||
escape = true;
|
||||
}
|
||||
|
||||
void ODesc::EnableUTF8 ()
|
||||
{
|
||||
utf8 = true;
|
||||
}
|
||||
|
||||
void ODesc::PushIndent()
|
||||
{
|
||||
++indent_level;
|
||||
|
@ -249,6 +257,23 @@ size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end)
|
|||
return 0;
|
||||
}
|
||||
|
||||
size_t check_utf8 (const char* bytes, size_t n, size_t i)
|
||||
{
|
||||
// Checks two to four bytes from starting position i
|
||||
// and returns the length of the valid utf-8 sequence
|
||||
size_t num_to_check = ((n-i+1) < 4) ? (n-i+1) : 4;
|
||||
|
||||
for (size_t j = 1; j <= num_to_check; ++j)
|
||||
{
|
||||
if (isLegalUTF8Sequence(reinterpret_cast<const unsigned char *>(bytes+i), reinterpret_cast<const unsigned char *>(bytes+i+j) ))
|
||||
{
|
||||
return j;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
||||
{
|
||||
typedef pair<const char*, size_t> escape_pos;
|
||||
|
@ -258,8 +283,21 @@ pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
|||
|
||||
for ( size_t i = 0; i < n; ++i )
|
||||
{
|
||||
//if ( ! isprint(bytes[i]) || bytes[i] == '\\' )
|
||||
if ( bytes[i] == '\\' )
|
||||
if (!isprint(bytes[i]))
|
||||
{
|
||||
if (utf8)
|
||||
{
|
||||
size_t utf_found = check_utf8(bytes, n, i);
|
||||
if (utf_found)
|
||||
{
|
||||
i += utf_found - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return escape_pos(bytes + i, 1);
|
||||
}
|
||||
|
||||
else if (bytes[i] == '\\' )
|
||||
return escape_pos(bytes + i, 1);
|
||||
|
||||
size_t len = StartsWithEscapeSequence(bytes + i, bytes + n);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue