mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Added optional script and redef bool to enable utf-8 in ASCII logs
This commit is contained in:
parent
6927dd1213
commit
66557d3178
10 changed files with 111 additions and 2 deletions
|
@ -26,6 +26,12 @@ export {
|
||||||
## This option is also available as a per-filter ``$config`` option.
|
## This option is also available as a per-filter ``$config`` option.
|
||||||
const use_json = F &redef;
|
const use_json = F &redef;
|
||||||
|
|
||||||
|
## If true, non ASCII UTF-8 characters will pass through and
|
||||||
|
## be written into logs
|
||||||
|
##
|
||||||
|
## This option is also available as a per-filter ``$config`` option.
|
||||||
|
const enable_utf_8 = F &redef;
|
||||||
|
|
||||||
## Define the gzip level to compress the logs. If 0, then no gzip
|
## Define the gzip level to compress the logs. If 0, then no gzip
|
||||||
## compression is performed. Enabling compression also changes
|
## compression is performed. Enabling compression also changes
|
||||||
## the log file name extension to include ".gz".
|
## the log file name extension to include ".gz".
|
||||||
|
|
4
scripts/policy/tuning/enable-utf-8-logs.zeek
Normal file
4
scripts/policy/tuning/enable-utf-8-logs.zeek
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
##! Loading this script will enable utf-8 characters
|
||||||
|
##! instead of escaping them into the \xYY format
|
||||||
|
|
||||||
|
redef LogAscii::enable_utf_8=T;
|
|
@ -112,4 +112,5 @@
|
||||||
@load tuning/defaults/packet-fragments.zeek
|
@load tuning/defaults/packet-fragments.zeek
|
||||||
@load tuning/defaults/warnings.zeek
|
@load tuning/defaults/warnings.zeek
|
||||||
@load tuning/json-logs.zeek
|
@load tuning/json-logs.zeek
|
||||||
|
@load tuning/enable-utf-8-logs.zeek
|
||||||
@load tuning/track-all-assets.zeek
|
@load tuning/track-all-assets.zeek
|
||||||
|
|
42
src/Desc.cc
42
src/Desc.cc
|
@ -10,6 +10,8 @@
|
||||||
#include "File.h"
|
#include "File.h"
|
||||||
#include "Reporter.h"
|
#include "Reporter.h"
|
||||||
|
|
||||||
|
#include "ConvertUTF.h"
|
||||||
|
|
||||||
#define DEFAULT_SIZE 128
|
#define DEFAULT_SIZE 128
|
||||||
#define SLOP 10
|
#define SLOP 10
|
||||||
|
|
||||||
|
@ -39,6 +41,7 @@ ODesc::ODesc(desc_type t, BroFile* arg_f)
|
||||||
include_stats = 0;
|
include_stats = 0;
|
||||||
indent_with_spaces = 0;
|
indent_with_spaces = 0;
|
||||||
escape = false;
|
escape = false;
|
||||||
|
utf8 = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
ODesc::~ODesc()
|
ODesc::~ODesc()
|
||||||
|
@ -57,6 +60,11 @@ void ODesc::EnableEscaping()
|
||||||
escape = true;
|
escape = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ODesc::EnableUTF8 ()
|
||||||
|
{
|
||||||
|
utf8 = true;
|
||||||
|
}
|
||||||
|
|
||||||
void ODesc::PushIndent()
|
void ODesc::PushIndent()
|
||||||
{
|
{
|
||||||
++indent_level;
|
++indent_level;
|
||||||
|
@ -249,6 +257,23 @@ size_t ODesc::StartsWithEscapeSequence(const char* start, const char* end)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t check_utf8 (const char* bytes, size_t n, size_t i)
|
||||||
|
{
|
||||||
|
// Checks two to four bytes from starting position i
|
||||||
|
// and returns the length of the valid utf-8 sequence
|
||||||
|
size_t num_to_check = ((n-i+1) < 4) ? (n-i+1) : 4;
|
||||||
|
|
||||||
|
for (size_t j = 1; j <= num_to_check; ++j)
|
||||||
|
{
|
||||||
|
if (isLegalUTF8Sequence(reinterpret_cast<const unsigned char *>(bytes+i), reinterpret_cast<const unsigned char *>(bytes+i+j) ))
|
||||||
|
{
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
||||||
{
|
{
|
||||||
typedef pair<const char*, size_t> escape_pos;
|
typedef pair<const char*, size_t> escape_pos;
|
||||||
|
@ -258,8 +283,21 @@ pair<const char*, size_t> ODesc::FirstEscapeLoc(const char* bytes, size_t n)
|
||||||
|
|
||||||
for ( size_t i = 0; i < n; ++i )
|
for ( size_t i = 0; i < n; ++i )
|
||||||
{
|
{
|
||||||
//if ( ! isprint(bytes[i]) || bytes[i] == '\\' )
|
if (!isprint(bytes[i]))
|
||||||
if ( bytes[i] == '\\' )
|
{
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
size_t utf_found = check_utf8(bytes, n, i);
|
||||||
|
if (utf_found)
|
||||||
|
{
|
||||||
|
i += utf_found - 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return escape_pos(bytes + i, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (bytes[i] == '\\' )
|
||||||
return escape_pos(bytes + i, 1);
|
return escape_pos(bytes + i, 1);
|
||||||
|
|
||||||
size_t len = StartsWithEscapeSequence(bytes + i, bytes + n);
|
size_t len = StartsWithEscapeSequence(bytes + i, bytes + n);
|
||||||
|
|
|
@ -54,6 +54,7 @@ public:
|
||||||
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
|
void SetFlush(int arg_do_flush) { do_flush = arg_do_flush; }
|
||||||
|
|
||||||
void EnableEscaping();
|
void EnableEscaping();
|
||||||
|
void EnableUTF8();
|
||||||
void AddEscapeSequence(const char* s) { escape_sequences.insert(s); }
|
void AddEscapeSequence(const char* s) { escape_sequences.insert(s); }
|
||||||
void AddEscapeSequence(const char* s, size_t n)
|
void AddEscapeSequence(const char* s, size_t n)
|
||||||
{ escape_sequences.insert(string(s, n)); }
|
{ escape_sequences.insert(string(s, n)); }
|
||||||
|
@ -185,6 +186,7 @@ protected:
|
||||||
unsigned int offset; // where we are in the buffer
|
unsigned int offset; // where we are in the buffer
|
||||||
unsigned int size; // size of buffer in bytes
|
unsigned int size; // size of buffer in bytes
|
||||||
|
|
||||||
|
bool utf8; //whether to allow non ascii utf-8 characters to pass through
|
||||||
bool escape; // escape unprintable characters in output?
|
bool escape; // escape unprintable characters in output?
|
||||||
typedef set<string> escape_set;
|
typedef set<string> escape_set;
|
||||||
escape_set escape_sequences; // additional sequences of chars to escape
|
escape_set escape_sequences; // additional sequences of chars to escape
|
||||||
|
|
|
@ -23,6 +23,7 @@ Ascii::Ascii(WriterFrontend* frontend) : WriterBackend(frontend)
|
||||||
include_meta = false;
|
include_meta = false;
|
||||||
tsv = false;
|
tsv = false;
|
||||||
use_json = false;
|
use_json = false;
|
||||||
|
enable_utf_8 = false;
|
||||||
formatter = 0;
|
formatter = 0;
|
||||||
gzip_level = 0;
|
gzip_level = 0;
|
||||||
gzfile = nullptr;
|
gzfile = nullptr;
|
||||||
|
@ -36,6 +37,7 @@ void Ascii::InitConfigOptions()
|
||||||
output_to_stdout = BifConst::LogAscii::output_to_stdout;
|
output_to_stdout = BifConst::LogAscii::output_to_stdout;
|
||||||
include_meta = BifConst::LogAscii::include_meta;
|
include_meta = BifConst::LogAscii::include_meta;
|
||||||
use_json = BifConst::LogAscii::use_json;
|
use_json = BifConst::LogAscii::use_json;
|
||||||
|
enable_utf_8 = BifConst::LogAscii::enable_utf_8;
|
||||||
gzip_level = BifConst::LogAscii::gzip_level;
|
gzip_level = BifConst::LogAscii::gzip_level;
|
||||||
|
|
||||||
separator.assign(
|
separator.assign(
|
||||||
|
@ -115,6 +117,19 @@ bool Ascii::InitFilterOptions()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if ( strcmp(i->first, "enable_utf_8") == 0 )
|
||||||
|
{
|
||||||
|
if ( strcmp(i->second, "T") == 0 )
|
||||||
|
enable_utf_8 = true;
|
||||||
|
else if ( strcmp(i->second, "F") == 0 )
|
||||||
|
enable_utf_8 = false;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Error("invalid value for 'enable_utf_8', must be a string and either \"T\" or \"F\"");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
else if ( strcmp(i->first, "output_to_stdout") == 0 )
|
else if ( strcmp(i->first, "output_to_stdout") == 0 )
|
||||||
{
|
{
|
||||||
if ( strcmp(i->second, "T") == 0 )
|
if ( strcmp(i->second, "T") == 0 )
|
||||||
|
@ -181,6 +196,9 @@ bool Ascii::InitFormatter()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// Enable utf-8 if needed
|
||||||
|
if (enable_utf_8)
|
||||||
|
desc.EnableUTF8();
|
||||||
// Use the default "Bro logs" format.
|
// Use the default "Bro logs" format.
|
||||||
desc.EnableEscaping();
|
desc.EnableEscaping();
|
||||||
desc.AddEscapeSequence(separator);
|
desc.AddEscapeSequence(separator);
|
||||||
|
|
|
@ -65,6 +65,7 @@ private:
|
||||||
|
|
||||||
int gzip_level; // level > 0 enables gzip compression
|
int gzip_level; // level > 0 enables gzip compression
|
||||||
bool use_json;
|
bool use_json;
|
||||||
|
bool enable_utf_8;
|
||||||
string json_timestamps;
|
string json_timestamps;
|
||||||
|
|
||||||
threading::formatter::Formatter* formatter;
|
threading::formatter::Formatter* formatter;
|
||||||
|
|
|
@ -11,5 +11,6 @@ const set_separator: string;
|
||||||
const empty_field: string;
|
const empty_field: string;
|
||||||
const unset_field: string;
|
const unset_field: string;
|
||||||
const use_json: bool;
|
const use_json: bool;
|
||||||
|
const enable_utf_8: bool;
|
||||||
const json_timestamps: JSON::TimestampFormat;
|
const json_timestamps: JSON::TimestampFormat;
|
||||||
const gzip_level: count;
|
const gzip_level: count;
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
#separator \x09
|
||||||
|
#set_separator ,
|
||||||
|
#empty_field (empty)
|
||||||
|
#unset_field -
|
||||||
|
#path test
|
||||||
|
#open 2019-07-22-10-13-09
|
||||||
|
#fields s
|
||||||
|
#types string
|
||||||
|
foo ® bar
|
||||||
|
दुनिया को नमस्ते
|
||||||
|
hello 𠜎
|
||||||
|
#close 2019-07-22-10-13-09
|
|
@ -0,0 +1,26 @@
|
||||||
|
#
|
||||||
|
# @TEST-EXEC: zeek -b %INPUT
|
||||||
|
# @TEST-EXEC: btest-diff test.log
|
||||||
|
|
||||||
|
@load tuning/enable-utf-8-logs
|
||||||
|
|
||||||
|
module Test;
|
||||||
|
export {
|
||||||
|
redef enum Log::ID += { LOG };
|
||||||
|
|
||||||
|
type Log: record {
|
||||||
|
s: string;
|
||||||
|
} &log;
|
||||||
|
}
|
||||||
|
|
||||||
|
event zeek_init()
|
||||||
|
{
|
||||||
|
local a = "foo \xc2\xae bar"; # 2 bytes
|
||||||
|
local b = "दुनिया को नमस्ते"; # Hindi characters are 3 byte utf-8
|
||||||
|
local c = "hello 𠜎"; # A 4 byte Chinese character
|
||||||
|
|
||||||
|
Log::create_stream(Test::LOG, [$columns=Log]);
|
||||||
|
Log::write(Test::LOG, [$s=a]);
|
||||||
|
Log::write(Test::LOG, [$s=b]);
|
||||||
|
Log::write(Test::LOG, [$s=c]);
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue