zeek/src/util.cc
Tim Wojtulewicz a1d121e5aa Use std::string/string_view versions of starts_with/ends_with where appropriate
The util:: versions of these methods remain as a thin wrapper around them so
they can be used with const char* arguments. Otherwise callers have to manually
make string_view objects from the input.
s Please enter the commit message for your changes. Lines starting
2025-07-17 09:08:54 -07:00

2500 lines
69 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/util.h"
#include "zeek/zeek-config.h"
#include "zeek/zeek-config-paths.h"
#ifdef HAVE_DARWIN
#include <mach/mach_init.h>
#include <mach/task.h>
#endif
#include <fcntl.h>
#include <libgen.h>
#include <openssl/md5.h>
#include <openssl/sha.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <cctype>
#include <cerrno>
#include <csignal>
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#if defined(HAVE_MALLINFO) || defined(HAVE_MALLINFO2)
#include <malloc.h>
#endif
#ifdef __linux__
#if __has_include(<sys/random.h>)
#define HAVE_GETRANDOM
#include <sys/random.h>
#endif
#endif
#include <algorithm>
#include <array>
#include <filesystem>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include "zeek/3rdparty/ConvertUTF.h"
#include "zeek/Desc.h"
#include "zeek/Hash.h"
#include "zeek/Obj.h"
#include "zeek/Reporter.h"
#include "zeek/RunState.h"
#include "zeek/Val.h"
#include "zeek/digest.h"
#include "zeek/input.h"
#include "zeek/iosource/Manager.h"
#include "zeek/iosource/PktSrc.h"
#ifdef _MSC_VER
#include "zeek/ScannedFile.h"
#endif
#include "zeek/3rdparty/doctest.h"
using namespace std;
extern const char* proc_status_file;
static bool can_read(const string& path) { return access(path.c_str(), R_OK) == 0; }
static string zeek_path_value;
const string zeek_path_list_separator(path_list_separator.begin(), path_list_separator.end());
namespace zeek::util {
namespace detail {
TEST_CASE("util extract_ip") {
CHECK(extract_ip("[1.2.3.4]") == "1.2.3.4");
CHECK(extract_ip("0x1.2.3.4") == "1.2.3.4");
CHECK(extract_ip("[]") == "");
}
/**
* Return IP address without enclosing brackets and any leading 0x. Also
* trims leading/trailing whitespace.
*/
std::string extract_ip(const std::string& i) {
std::string s(strstrip(i));
if ( s.size() > 0 && s[0] == '[' )
s.erase(0, 1);
if ( s.starts_with("0x") )
s.erase(0, 2);
if ( size_t pos = s.find(']'); pos != std::string::npos )
s = s.substr(0, pos);
return s;
}
TEST_CASE("util extract_ip_and_len") {
int len;
std::string out = extract_ip_and_len("[1.2.3.4/24]", &len);
CHECK(out == "1.2.3.4");
CHECK(len == 24);
out = extract_ip_and_len("0x1.2.3.4/32", &len);
CHECK(out == "1.2.3.4");
CHECK(len == 32);
out = extract_ip_and_len("[]/abcd", &len);
CHECK(out == "");
CHECK(len == 0);
out = extract_ip_and_len("[]/16", nullptr);
CHECK(out == "");
}
/**
* Given a subnet string, return IP address and subnet length separately.
*/
std::string extract_ip_and_len(const std::string& i, int* len) {
size_t pos = i.find('/');
if ( pos == std::string::npos )
return i;
if ( len )
*len = atoi(i.substr(pos + 1).c_str());
return extract_ip(i.substr(0, pos));
}
static constexpr int parse_octal_digit(char ch) noexcept {
if ( ch >= '0' && ch <= '7' )
return ch - '0';
else
return -1;
}
static constexpr int parse_hex_digit(char ch) noexcept {
if ( ch >= '0' && ch <= '9' )
return ch - '0';
else if ( ch >= 'a' && ch <= 'f' )
return 10 + ch - 'a';
else if ( ch >= 'A' && ch <= 'F' )
return 10 + ch - 'A';
else
return -1;
}
int expand_escape(const char*& s) {
switch ( *(s++) ) {
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'a': return '\a';
case 'v': return '\v';
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': { // \<octal>{1,3}
--s; // put back the first octal digit
const char* start = s;
// require at least one octal digit and parse at most three
int result = parse_octal_digit(*s++);
if ( result < 0 ) {
reporter->Error("bad octal escape: %s", start);
return 0;
}
// second digit?
int digit = parse_octal_digit(*s);
if ( digit >= 0 ) {
result = (result << 3) | digit;
++s;
// third digit?
digit = parse_octal_digit(*s);
if ( digit >= 0 ) {
result = (result << 3) | digit;
++s;
}
}
return result;
}
case 'x': { /* \x<hex> */
const char* start = s;
// Look at most 2 characters, so that "\x0ddir" -> "^Mdir".
int result = parse_hex_digit(*s++);
if ( result < 0 ) {
reporter->Error("bad hexadecimal escape: %s", start);
return 0;
}
// second digit?
int digit = parse_hex_digit(*s);
if ( digit >= 0 ) {
result = (result << 4) | digit;
++s;
}
return result;
}
default: return s[-1];
}
}
const char* fmt_access_time(double t) {
static char buf[256];
time_t time = (time_t)t;
struct tm ts;
if ( ! localtime_r(&time, &ts) ) {
reporter->InternalError("unable to get time");
}
strftime(buf, sizeof(buf), "%d/%m-%H:%M", &ts);
return buf;
}
bool ensure_intermediate_dirs(const char* dirname) {
if ( ! dirname || strlen(dirname) == 0 )
return false;
bool absolute = dirname[0] == '/';
string path = normalize_path(dirname);
const auto path_components = tokenize_string(path, '/');
string current_dir;
for ( size_t i = 0; i < path_components.size(); ++i ) {
if ( i > 0 || absolute )
current_dir += "/";
current_dir += path_components[i];
if ( ! ensure_dir(current_dir.c_str()) )
return false;
}
return true;
}
bool ensure_dir(const char* dirname) {
if ( mkdir(dirname, 0777) == 0 )
return true;
auto mkdir_errno = errno;
struct stat st;
if ( stat(dirname, &st) == -1 ) {
// Show the original failure reason for mkdir() since nothing's there
// or we can't even tell what is now.
reporter->Warning("can't create directory %s: %s", dirname, strerror(mkdir_errno));
return false;
}
if ( S_ISDIR(st.st_mode) )
return true;
reporter->Warning("%s exists but is not a directory", dirname);
return false;
}
void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]) {
if ( ! zeek::detail::KeyedHash::seeds_initialized )
reporter->InternalError("HMAC-MD5 invoked before the HMAC key is set");
zeek::detail::internal_md5(bytes, size, digest);
for ( int i = 0; i < 16; ++i )
digest[i] ^= zeek::detail::KeyedHash::shared_hmac_md5_key[i];
zeek::detail::internal_md5(digest, 16, digest);
}
static bool read_random_seeds(const char* read_file, uint32_t* seed,
std::array<uint32_t, zeek::detail::KeyedHash::SEED_INIT_SIZE>& buf) {
FILE* f = fopen(read_file, "r");
if ( ! f ) {
reporter->Warning("Could not open seed file '%s': %s", read_file, strerror(errno));
return false;
}
// Read seed for srandom().
if ( fscanf(f, "%u", seed) != 1 ) {
fclose(f);
return false;
}
// Read seeds for hmac-md5/siphash/highwayhash.
for ( auto& v : buf ) {
uint32_t tmp;
if ( fscanf(f, "%u", &tmp) != 1 ) {
fclose(f);
return false;
}
v = tmp;
}
fclose(f);
return true;
}
static bool write_random_seeds(const char* write_file, uint32_t seed,
std::array<uint32_t, zeek::detail::KeyedHash::SEED_INIT_SIZE>& buf) {
FILE* f = fopen(write_file, "w+");
if ( ! f ) {
reporter->Warning("Could not create seed file '%s': %s", write_file, strerror(errno));
return false;
}
fprintf(f, "%u\n", seed);
for ( const auto& v : buf )
fprintf(f, "%u\n", v);
fclose(f);
return true;
}
// Same as read_random_seeds() but takes seeds from a space separated string instead.
static bool fill_random_seeds(const std::string& seed_string, uint32_t* seed,
std::array<uint32_t, zeek::detail::KeyedHash::SEED_INIT_SIZE>& buf) {
stringstream ss{seed_string};
if ( ! (ss >> *seed) )
return false;
for ( auto& v : buf ) {
if ( ! (ss >> v) )
return false;
}
return true;
}
static bool zeek_rand_deterministic = false;
static long int zeek_rand_state = 0;
static bool first_seed_saved = false;
static unsigned int first_seed = 0;
static void zeek_srandom(unsigned int seed) {
zeek_rand_state = seed == 0 ? 1 : seed;
srandom(seed);
}
void seed_random(unsigned int seed) {
if ( zeek_rand_deterministic )
zeek_rand_state = seed == 0 ? 1 : seed;
else
srandom(seed);
}
void init_random_seed(const char* read_file, const char* write_file, bool use_empty_seeds,
const std::string& seed_string) {
std::array<uint32_t, zeek::detail::KeyedHash::SEED_INIT_SIZE> buf = {};
uint32_t seed = 0;
if ( write_file )
// run in deterministic mode when we write a file
zeek_rand_deterministic = true;
if ( read_file || use_empty_seeds || ! seed_string.empty() ) {
// if a seed is provided - run Zeek in deterministic mode
zeek_rand_deterministic = true;
if ( read_file ) {
if ( ! read_random_seeds(read_file, &seed, buf) )
reporter->FatalError("Could not load seeds from file '%s'.", read_file);
}
else if ( ! seed_string.empty() ) {
if ( ! fill_random_seeds(seed_string, &seed, buf) )
reporter->FatalError("Could not load seeds from string");
}
}
else { // no seed provided
size_t pos = 0; // accumulates entropy
#ifdef HAVE_GETRANDOM
// getrandom() guarantees reads up to 256 bytes are always successful,
assert(sizeof(buf) < 256);
auto nbytes = getrandom(buf.data(), sizeof(buf), 0);
assert(nbytes == sizeof(buf));
pos += nbytes / sizeof(uint32_t);
#else
// Gather up some entropy.
gettimeofday((struct timeval*)(buf.data() + pos), nullptr);
pos += sizeof(struct timeval) / sizeof(uint32_t);
// use urandom. For reasons see e.g. http://www.2uo.de/myths-about-urandom/
#if defined(O_NONBLOCK)
int fd = open("/dev/urandom", O_RDONLY | O_NONBLOCK);
#elif defined(O_NDELAY)
int fd = open("/dev/urandom", O_RDONLY | O_NDELAY);
#else
int fd = open("/dev/urandom", O_RDONLY);
#endif
if ( fd >= 0 ) {
int amt = read(fd, buf.data() + pos, sizeof(uint32_t) * (zeek::detail::KeyedHash::SEED_INIT_SIZE - pos));
safe_close(fd);
if ( amt > 0 )
pos += amt / sizeof(uint32_t);
else
// Clear errno, which can be set on some
// systems due to a lack of entropy.
errno = 0;
}
#endif
if ( pos < zeek::detail::KeyedHash::SEED_INIT_SIZE )
reporter->FatalError("Could not read enough random data. Wanted %d, got %zu",
zeek::detail::KeyedHash::SEED_INIT_SIZE, pos);
for ( size_t i = 0; i < pos; ++i ) {
seed ^= buf[i];
seed = (seed << 1) | (seed >> 31);
}
}
zeek_srandom(seed);
if ( ! first_seed_saved ) {
first_seed = seed;
first_seed_saved = true;
}
if ( ! zeek::detail::KeyedHash::IsInitialized() )
zeek::detail::KeyedHash::InitializeSeeds(buf);
if ( write_file && ! write_random_seeds(write_file, seed, buf) )
reporter->Error("Could not write seeds to file '%s'.\n", write_file);
}
unsigned int initial_seed() { return first_seed; }
bool have_random_seed() { return zeek_rand_deterministic; }
constexpr uint32_t zeek_prng_mod = 2147483647;
constexpr uint32_t zeek_prng_max = zeek_prng_mod - 1;
long int max_random() { return zeek_rand_deterministic ? zeek_prng_max : RAND_MAX; }
long int prng(long int state) {
// Use our own simple linear congruence PRNG to make sure we are
// predictable across platforms. (Lehmer RNG, Schrage's method)
// Note: the choice of "long int" storage type for the state is mostly
// for parity with the possible return values of random().
constexpr uint32_t m = zeek_prng_mod;
constexpr uint32_t a = 16807;
constexpr uint32_t q = m / a;
constexpr uint32_t r = m % a;
uint32_t rem = state % q;
uint32_t div = state / q;
int32_t s = a * rem;
int32_t t = r * div;
int32_t res = s - t;
if ( res < 0 )
res += m;
return res;
}
long int random_number() {
if ( ! zeek_rand_deterministic )
return random(); // Use system PRNG.
zeek_rand_state = detail::prng(zeek_rand_state);
return zeek_rand_state;
}
TEST_CASE("util is_package_loader") {
CHECK(is_package_loader("/some/path/__load__.zeek") == true);
CHECK(is_package_loader("/some/path/notload.zeek") == false);
}
bool is_package_loader(const string& path) {
string filename(std::move(SafeBasename(path).result));
return (filename == "__load__.zeek");
}
void add_to_zeek_path(const string& dir) {
// Make sure path is initialized.
zeek_path();
zeek_path_value += zeek_path_list_separator + dir;
}
FILE* open_package(string& path, const string& mode) {
string arg_path = path;
path.append("/__load__");
string p = path + ".zeek";
if ( can_read(p) ) {
path.append(".zeek");
return open_file(path, mode);
}
path.append(".zeek");
string package_loader = "__load__.zeek";
reporter->Error("Failed to open package '%s': missing '%s' file", arg_path.c_str(), package_loader.c_str());
return nullptr;
}
TEST_CASE("util flatten_script_name") {
CHECK(flatten_script_name("script", "some/path") == "some.path.script");
#ifndef _MSC_VER
// TODO: this test fails on Windows because the implementation of dirname() in libunistd
// returns a trailing slash on paths, even tho the POSIX implementation doesn't. Commenting
// this out until we can fix that.
CHECK(flatten_script_name("other/path/__load__.zeek", "some/path") == "some.path.other.path");
#endif
CHECK(flatten_script_name("path/to/script", "") == "path.to.script");
}
string flatten_script_name(const string& name, const string& prefix) {
string rval = prefix;
if ( ! rval.empty() )
rval.append(".");
if ( is_package_loader(name) )
rval.append(SafeDirname(name).result);
else
rval.append(name);
size_t i;
while ( (i = rval.find('/')) != string::npos )
rval[i] = '.';
return rval;
}
TEST_CASE("util normalize_path") {
#ifdef _MSC_VER
// TODO: adapt these tests to Windows
#else
CHECK(normalize_path("/1/2/3") == "/1/2/3");
CHECK(normalize_path("/1/./2/3") == "/1/2/3");
CHECK(normalize_path("/1/2/../3") == "/1/3");
CHECK(normalize_path("1/2/3/") == "1/2/3");
CHECK(normalize_path("1/2//3///") == "1/2/3");
CHECK(normalize_path("~/zeek/testing") == "~/zeek/testing");
CHECK(normalize_path("~jon/zeek/testing") == "~jon/zeek/testing");
CHECK(normalize_path("~jon/./zeek/testing") == "~jon/zeek/testing");
CHECK(normalize_path("~/zeek/testing/../././.") == "~/zeek");
CHECK(normalize_path("./zeek") == "./zeek");
CHECK(normalize_path("../zeek") == "../zeek");
CHECK(normalize_path("../zeek/testing/..") == "../zeek");
CHECK(normalize_path("./zeek/..") == ".");
CHECK(normalize_path("./zeek/../..") == "..");
CHECK(normalize_path("./zeek/../../..") == "../..");
CHECK(normalize_path("./..") == "..");
CHECK(normalize_path("../..") == "../..");
CHECK(normalize_path("/..") == "/..");
CHECK(normalize_path("~/..") == "~/..");
CHECK(normalize_path("/../..") == "/../..");
CHECK(normalize_path("~/../..") == "~/../..");
CHECK(normalize_path("zeek/..") == "");
CHECK(normalize_path("zeek/../..") == "..");
#endif
}
string normalize_path(std::string_view path) {
#ifdef _MSC_VER
if ( 0 == path.compare(zeek::detail::ScannedFile::canonical_stdin_path) ) {
return string(path);
}
// "//" interferes with std::weakly_canonical
string stringPath = string(path);
if ( stringPath.starts_with("//") ) {
stringPath.erase(0, 2);
}
return std::filesystem::path(stringPath).lexically_normal().string();
#else
if ( path.find("/.") == std::string_view::npos && path.find("//") == std::string_view::npos ) {
// no need to normalize anything
if ( path.size() > 1 && path.back() == '/' )
path.remove_suffix(1);
return std::string(path);
}
size_t n;
vector<std::string_view> final_components;
string new_path;
new_path.reserve(path.size());
if ( ! path.empty() && path[0] == '/' )
new_path = "/";
const auto components = tokenize_string(path, '/');
final_components.reserve(components.size());
for ( auto it = components.begin(); it != components.end(); ++it ) {
if ( *it == "" )
continue;
if ( *it == "." && it != components.begin() )
continue;
final_components.push_back(*it);
if ( *it == ".." ) {
auto cur_idx = final_components.size() - 1;
if ( cur_idx != 0 ) {
auto last_idx = cur_idx - 1;
auto& last_component = final_components[last_idx];
if ( last_component == "/" || last_component == "~" || last_component == ".." )
continue;
if ( last_component == "." ) {
last_component = "..";
final_components.pop_back();
}
else {
final_components.pop_back();
final_components.pop_back();
}
}
}
}
for ( const auto& comp : final_components ) {
new_path.append(comp);
new_path.append("/");
}
if ( new_path.size() > 1 && new_path[new_path.size() - 1] == '/' )
new_path.erase(new_path.size() - 1);
return new_path;
#endif
}
string without_zeekpath_component(std::string_view path) {
string rval = normalize_path(path);
const auto paths = tokenize_string(zeek_path(), path_list_separator[0]);
for ( const auto& p : paths ) {
string common = normalize_path(p);
if ( ! rval.starts_with(common) )
continue;
// Found the containing directory.
std::string_view v(rval);
v.remove_prefix(common.size());
// Remove leading path separators.
while ( ! v.empty() && v.front() == '/' )
v.remove_prefix(1);
return std::string(v);
}
return rval;
}
std::string get_exe_path(const std::string& invocation) {
if ( invocation.empty() )
return "";
std::filesystem::path invocation_path(invocation);
if ( invocation_path.is_absolute() || invocation_path.root_directory() == "~" )
// Absolute path
return invocation;
if ( invocation_path.is_relative() && invocation_path.has_parent_path() ) {
// Relative path
char cwd[PATH_MAX];
if ( ! getcwd(cwd, sizeof(cwd)) ) {
fprintf(stderr, "failed to get current directory: %s\n", strerror(errno));
exit(1);
}
return (std::filesystem::path(cwd) / invocation_path).string();
}
auto path = getenv("PATH");
if ( ! path )
return "";
return find_file(invocation, path);
}
FILE* rotate_file(const char* name, RecordVal* rotate_info) {
// Build file names.
const int buflen = strlen(name) + 128;
auto newname_buf = std::make_unique<char[]>(buflen);
auto tmpname_buf = std::make_unique<char[]>(buflen + 4);
auto newname = newname_buf.get();
auto tmpname = tmpname_buf.get();
snprintf(newname, buflen, "%s.%d.%.06f.tmp", name, getpid(), run_state::network_time);
newname[buflen - 1] = '\0';
strcpy(tmpname, newname);
strcat(tmpname, ".tmp");
// First open the new file using a temporary name.
FILE* newf = fopen(tmpname, "w");
if ( ! newf ) {
reporter->Error("rotate_file: can't open %s: %s", tmpname, strerror(errno));
return nullptr;
}
// Then move old file to "<name>.<pid>.<timestamp>" and make sure
// it really gets created.
struct stat dummy;
if ( link(name, newname) < 0 || stat(newname, &dummy) < 0 ) {
reporter->Error("rotate_file: can't move %s to %s: %s", name, newname, strerror(errno));
fclose(newf);
unlink(newname);
unlink(tmpname);
return nullptr;
}
// Close current file, and move the tmp to its place.
if ( unlink(name) < 0 || link(tmpname, name) < 0 || unlink(tmpname) < 0 ) {
reporter->Error("rotate_file: can't move %s to %s: %s", tmpname, name, strerror(errno));
exit(1); // hard to fix, but shouldn't happen anyway...
}
// Init rotate_info.
if ( rotate_info ) {
rotate_info->Assign(0, name);
rotate_info->Assign(1, newname);
rotate_info->AssignTime(2, run_state::network_time);
rotate_info->AssignTime(3, run_state::network_time);
}
return newf;
}
const char* log_file_name(const char* tag) {
const char* env = getenv("ZEEK_LOG_SUFFIX");
return fmt("%s.%s", tag, (env ? env : "log"));
}
double parse_rotate_base_time(const char* rotate_base_time) {
double base = -1;
if ( rotate_base_time && rotate_base_time[0] != '\0' ) {
struct tm t;
if ( ! strptime(rotate_base_time, "%H:%M", &t) )
reporter->Error("calc_next_rotate(): can't parse rotation base time");
else
base = t.tm_min * 60 + t.tm_hour * 60 * 60;
}
return base;
}
double calc_next_rotate(double current, double interval, double base) {
if ( ! interval ) {
reporter->Error("calc_next_rotate(): interval is zero, falling back to 24hrs");
interval = 86400;
}
// Calculate start of day.
time_t teatime = time_t(current);
struct tm t;
if ( ! localtime_r(&teatime, &t) ) {
reporter->Error("calc_next_rotate(): failure processing current time (%.6f)", current);
// fall back to the method used if no base time is given
base = -1;
}
if ( base < 0 )
// No base time given. To get nice timestamps, we round
// the time up to the next multiple of the rotation interval.
return floor(current / interval) * interval + interval - current;
t.tm_hour = t.tm_min = t.tm_sec = 0;
double startofday = mktime(&t);
// current < startofday + base + i * interval <= current + interval
double delta_t = startofday + base + ceil((current - startofday - base) / interval) * interval - current;
return delta_t > 0.0 ? delta_t : interval;
}
void terminate_processing() {
if ( ! run_state::terminating )
kill(getpid(), SIGTERM);
}
void set_processing_status(const char* status, const char* reason) {
// This function can be called from a signal context, so we have to
// make sure to only call reentrant & async-signal-safe functions,
// and to restore errno afterwards.
if ( ! proc_status_file )
return;
auto write_str = [](int fd, const char* s) {
int len = strlen(s);
while ( len ) {
int n = write(fd, s, len);
if ( n < 0 && errno != EINTR && errno != EAGAIN )
// Ignore errors, as they're too difficult to
// safely report here.
break;
s += n;
len -= n;
}
};
auto report_error_with_errno = [&](const char* msg) {
// strerror_r() is not async-signal-safe, hence we don't do
// the translation from errno to string.
auto errno_str = std::to_string(errno);
write_str(2, msg);
write_str(2, " '");
write_str(2, proc_status_file);
write_str(2, "': ");
write_str(2, errno_str.c_str());
write_str(2, " [");
write_str(2, status);
write_str(2, "]\n");
};
int old_errno = errno;
int fd = open(proc_status_file, O_CREAT | O_WRONLY | O_TRUNC, 0777);
if ( fd < 0 ) {
report_error_with_errno("Failed to open process status file");
errno = old_errno;
return;
}
write_str(fd, status);
write_str(fd, " [");
write_str(fd, reason);
write_str(fd, "]\n");
if ( close(fd) < 0 && errno != EINTR ) {
report_error_with_errno("Failed to close process status file");
abort(); // same as safe_close()
}
errno = old_errno;
}
void set_thread_name(const char* name, pthread_t tid) {
#ifdef HAVE_LINUX
prctl(PR_SET_NAME, name, 0, 0, 0);
#endif
#ifdef __APPLE__
pthread_setname_np(name);
#endif
#ifdef __FreeBSD__
pthread_set_name_np(tid, name);
#endif
}
int setvbuf(FILE* stream, char* buf, int type, size_t size) {
#ifndef _MSC_VER
return ::setvbuf(stream, buf, type, size);
#else
// TODO: this turns off buffering altogether because Windows wants us to pass a valid
// buffer and length if we're going to pass one of the other modes. We need to
// investigate the performance ramifications of this.
return ::setvbuf(stream, NULL, _IONBF, 0);
#endif
}
} // namespace detail
TEST_CASE("util get_unescaped_string") {
CHECK(get_unescaped_string("abcde") == "abcde");
CHECK(get_unescaped_string("\\x41BCD\\x45") == "ABCDE");
}
/**
* Takes a string, unescapes all characters that are escaped as hex codes
* (\x##) and turns them into the equivalent ascii-codes. Returns a string
* containing no escaped values
*
* @param str string to unescape
* @return A str::string without escaped characters.
*/
std::string get_unescaped_string(const std::string& arg_str) {
const char* str = arg_str.c_str();
char* buf = new char[arg_str.length() + 1]; // it will at most have the same length as str.
char* bufpos = buf;
size_t pos = 0;
while ( pos < arg_str.length() ) {
if ( str[pos] == '\\' && str[pos + 1] == 'x' && isxdigit(str[pos + 2]) && isxdigit(str[pos + 3]) ) {
*bufpos = (decode_hex(str[pos + 2]) << 4) + decode_hex(str[pos + 3]);
pos += 4;
bufpos++;
}
else
*bufpos++ = str[pos++];
}
*bufpos = 0;
string outstring(buf, bufpos - buf);
delete[] buf;
return outstring;
}
TEST_CASE("util get_escaped_string") {
SUBCASE("returned ODesc") {
ODesc* d = get_escaped_string(nullptr, "a bcd\n", 6, false);
CHECK(strcmp(d->Description(), "a\\x20bcd\\x0a") == 0);
delete d;
}
SUBCASE("provided ODesc") {
ODesc d2;
get_escaped_string(&d2, "ab\\e", 4, true);
CHECK(strcmp(d2.Description(), "\\x61\\x62\\\\\\x65") == 0);
}
SUBCASE("std::string versions") {
std::string s = get_escaped_string("a b c", 5, false);
CHECK(s == "a\\x20b\\x20c");
s = get_escaped_string("d e", false);
CHECK(s == "d\\x20e");
}
}
/**
* Takes a string, escapes characters into equivalent hex codes (\x##), and
* returns a string containing all escaped values.
*
* @param d an ODesc object to store the escaped hex version of the string,
* if null one will be allocated and returned from the function.
* @param str string to escape
* @param escape_all If true, all characters are escaped. If false, only
* characters are escaped that are either whitespace or not printable in
* ASCII.
* @return A ODesc object containing a list of escaped hex values of the form
* \x##, which may be newly allocated if \a d was a null pointer. */
ODesc* get_escaped_string(ODesc* d, const char* str, size_t len, bool escape_all) {
if ( ! d )
d = new ODesc();
for ( size_t i = 0; i < len; ++i ) {
char c = str[i];
if ( escape_all || isspace(c) || ! isascii(c) || ! isprint(c) ) {
if ( c == '\\' )
d->AddRaw("\\\\", 2);
else {
char hex[4] = {'\\', 'x', '0', '0'};
bytetohex(c, hex + 2);
d->AddRaw(hex, 4);
}
}
else
d->AddRaw(&c, 1);
}
return d;
}
std::string get_escaped_string(const char* str, size_t len, bool escape_all) {
ODesc d;
return get_escaped_string(&d, str, len, escape_all)->Description();
}
char* copy_string(const char* str, size_t len) {
if ( ! str )
return nullptr;
char* c = new char[len + 1];
memcpy(c, str, len);
c[len] = '\0';
return c;
}
char* copy_string(const char* s) {
if ( ! s )
return nullptr;
char* c = new char[strlen(s) + 1];
strcpy(c, s);
return c;
}
TEST_CASE("util streq") {
CHECK(streq("abcd", "abcd") == true);
CHECK(streq("abcd", "efgh") == false);
}
bool streq(const char* s1, const char* s2) { return strcmp(s1, s2) == 0; }
bool starts_with(std::string_view s, std::string_view beginning) { return s.starts_with(beginning); }
bool ends_with(std::string_view s, std::string_view ending) { return s.ends_with(ending); }
char* skip_whitespace(char* s) {
while ( *s == ' ' || *s == '\t' )
++s;
return s;
}
const char* skip_whitespace(const char* s) {
while ( *s == ' ' || *s == '\t' )
++s;
return s;
}
char* skip_whitespace(char* s, char* end_of_s) {
while ( s < end_of_s && (*s == ' ' || *s == '\t') )
++s;
return s;
}
const char* skip_whitespace(const char* s, const char* end_of_s) {
while ( s < end_of_s && (*s == ' ' || *s == '\t') )
++s;
return s;
}
char* skip_digits(char* s) {
while ( *s && isdigit(*s) )
++s;
return s;
}
TEST_CASE("util get_word") {
char orig[10];
strcpy(orig, "two words");
SUBCASE("get first word") {
char* a = (char*)orig;
char* b = get_word(a);
CHECK(strcmp(a, "words") == 0);
CHECK(strcmp(b, "two") == 0);
}
SUBCASE("get length of first word") {
int len = strlen(orig);
int len2;
const char* b = nullptr;
get_word(len, orig, len2, b);
CHECK(len2 == 3);
}
}
char* get_word(char*& s) {
char* w = s;
while ( *s && ! isspace(*s) )
++s;
if ( *s ) {
*s = '\0'; // terminate the word
s = skip_whitespace(s + 1);
}
return w;
}
TEST_CASE("util get_word 2") {
char orig[10];
strcpy(orig, "two words");
char* a = (char*)orig;
const char* b;
int blen;
get_word(9, a, blen, b);
CHECK(blen == 3);
CHECK(a == b);
}
void get_word(int length, const char* s, int& pwlen, const char*& pw) {
pw = s;
int len = 0;
while ( len < length && *s && ! isspace(*s) ) {
++s;
++len;
}
pwlen = len;
}
TEST_CASE("util to_upper") {
char a[10];
strcpy(a, "aBcD");
to_upper(a);
CHECK(strcmp(a, "ABCD") == 0);
std::string b = "aBcD";
CHECK(to_upper(b) == "ABCD");
}
void to_upper(char* s) {
while ( *s ) {
if ( islower(*s) )
*s = toupper(*s);
++s;
}
}
string to_upper(const std::string& s) {
string t = s;
std::transform(t.begin(), t.end(), t.begin(), ::toupper);
return t;
}
int decode_hex(char ch) {
if ( ch >= '0' && ch <= '9' )
return ch - '0';
if ( ch >= 'A' && ch <= 'F' )
return ch - 'A' + 10;
if ( ch >= 'a' && ch <= 'f' )
return ch - 'a' + 10;
return -1;
}
unsigned char encode_hex(int h) {
static const char hex[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
if ( h < 0 || h > 15 ) {
reporter->InternalWarning("illegal value for encode_hex: %d", h);
return 'X';
}
return hex[h];
}
TEST_CASE("util strpbrk_n") {
const char* s = "abcdef";
const char* o = strpbrk_n(5, s, "gc");
CHECK(strcmp(o, "cdef") == 0);
const char* f = strpbrk_n(5, s, "xyz");
CHECK(f == nullptr);
}
// Same as strpbrk except that s is not NUL-terminated, but limited by
// len. Note that '\0' is always implicitly contained in charset.
const char* strpbrk_n(size_t len, const char* s, const char* charset) {
for ( const char* p = s; p < s + len; ++p )
if ( strchr(charset, *p) )
return p;
return nullptr;
}
#if ! defined(HAVE_STRCASESTR)
// This code is derived from software contributed to BSD by Chris Torek.
char* strcasestr(const char* s, const char* find) {
char c = *find++;
if ( c ) {
c = tolower((unsigned char)c);
size_t len = strlen(find);
do {
char sc;
do {
sc = *s++;
if ( sc == 0 )
return 0;
} while ( char(tolower((unsigned char)sc)) != c );
} while ( strncasecmp(s, find, len) != 0 );
--s;
}
return (char*)s;
}
TEST_CASE("util strcasestr") {
const char* s = "this is a string";
const char* out = strcasestr(s, "is a");
CHECK(strcmp(out, "is a string") == 0);
const char* out2 = strcasestr(s, "Is A");
CHECK(strcmp(out2, "is a string") == 0);
const char* out3 = strcasestr(s, "not there");
CHECK(out3 == nullptr);
}
#endif
TEST_CASE("util atoi_n") {
const char* dec = "12345";
int val;
CHECK(atoi_n(strlen(dec), dec, nullptr, 10, val) == 1);
CHECK(val == 12345);
const char* hex = "12AB";
CHECK(atoi_n(strlen(hex), hex, nullptr, 16, val) == 1);
CHECK(val == 0x12AB);
const char* fail = "XYZ";
CHECK(atoi_n(strlen(fail), fail, nullptr, 10, val) == 0);
}
template<class T>
int atoi_n(int len, const char* s, const char** end, int base, T& result) {
T n = 0;
int neg = 0;
if ( len > 0 && *s == '-' ) {
neg = 1;
--len;
++s;
}
int i;
for ( i = 0; i < len; ++i ) {
unsigned int d;
if ( isdigit(s[i]) )
d = s[i] - '0';
else if ( s[i] >= 'a' && s[i] < 'a' - 10 + base )
d = s[i] - 'a' + 10;
else if ( s[i] >= 'A' && s[i] < 'A' - 10 + base )
d = s[i] - 'A' + 10;
else if ( i > 0 )
break;
else
return 0;
n = n * base + d;
}
if ( neg )
result = -n;
else
result = n;
if ( end )
*end = s + i;
return 1;
}
// Instantiate the ones we need.
template int atoi_n<int>(int len, const char* s, const char** end, int base, int& result);
template int atoi_n<uint16_t>(int len, const char* s, const char** end, int base, uint16_t& result);
template int atoi_n<uint32_t>(int len, const char* s, const char** end, int base, uint32_t& result);
template int atoi_n<int64_t>(int len, const char* s, const char** end, int base, int64_t& result);
template int atoi_n<uint64_t>(int len, const char* s, const char** end, int base, uint64_t& result);
TEST_CASE("util uitoa_n") {
int val = 12345;
char str[20];
const char* result = uitoa_n(val, str, 20, 10, "pref: ");
// TODO: i'm not sure this is the correct output. was it supposed to reverse the digits?
CHECK(strcmp(str, "pref: 54321") == 0);
}
char* uitoa_n(uint64_t value, char* str, int n, int base, const char* prefix) {
static constexpr char dig[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
assert(n);
int i = 0;
uint64_t v;
char c;
if ( prefix ) {
strncpy(str, prefix, n - 1);
str[n - 1] = '\0';
i += strlen(prefix);
}
if ( i >= n - 1 )
return str;
v = value;
do {
str[i++] = dig[v % base];
v /= base;
} while ( v && i < n - 1 );
str[i] = '\0';
return str;
}
TEST_CASE("util strstr_n") {
const u_char* s = reinterpret_cast<const u_char*>("this is a string");
int out = strstr_n(16, s, 3, reinterpret_cast<const u_char*>("str"));
CHECK(out == 10);
out = strstr_n(16, s, 17, reinterpret_cast<const u_char*>("is"));
CHECK(out == -1);
out = strstr_n(16, s, 2, reinterpret_cast<const u_char*>("IS"));
CHECK(out == -1);
out = strstr_n(16, s, 9, reinterpret_cast<const u_char*>("not there"));
CHECK(out == -1);
}
int strstr_n(const int big_len, const u_char* big, const int little_len, const u_char* little) {
if ( little_len > big_len )
return -1;
for ( int i = 0; i <= big_len - little_len; ++i ) {
if ( ! memcmp(big + i, little, little_len) )
return i;
}
return -1;
}
int fputs(int len, const char* s, FILE* fp) {
for ( int i = 0; i < len; ++i )
if ( fputc(s[i], fp) == EOF )
return EOF;
return 0;
}
TEST_CASE("util is_printable") {
CHECK(is_printable("abcd", 4) == true);
CHECK(is_printable("ab\0d", 4) == false);
}
bool is_printable(const char* s, int len) {
while ( --len >= 0 )
if ( ! isprint(*s++) )
return false;
return true;
}
TEST_CASE("util strtolower") {
const char* a = "aBcD";
CHECK(strtolower(a) == "abcd");
std::string b = "aBcD";
CHECK(strtolower(b) == "abcd");
}
std::string strtolower(const std::string& s) {
std::string t = s;
std::transform(t.begin(), t.end(), t.begin(), ::tolower);
return t;
}
TEST_CASE("util strtoupper") {
const char* a = "aBcD";
CHECK(strtoupper(a) == "ABCD");
std::string b = "aBcD";
CHECK(strtoupper(b) == "ABCD");
}
std::string strtoupper(const std::string& s) {
std::string t = s;
std::transform(t.begin(), t.end(), t.begin(), ::toupper);
return t;
}
TEST_CASE("util fmt_bytes") {
const char* a = "abcd";
const char* af = fmt_bytes(a, 4);
CHECK(strcmp(a, af) == 0);
const char* b = "abc\0abc";
const char* bf = fmt_bytes(b, 7);
CHECK(strcmp(bf, "abc\\x00abc") == 0);
const char* cf = fmt_bytes(a, 3);
CHECK(strcmp(cf, "abc") == 0);
}
const char* fmt_bytes(const char* data, int len) {
static char buf[1024];
char* p = buf;
for ( int i = 0; i < len && p - buf < int(sizeof(buf)); ++i ) {
if ( isprint((unsigned char)(data[i])) )
*p++ = data[i];
else
p += snprintf(p, sizeof(buf) - (p - buf), "\\x%02x", (unsigned char)data[i]);
}
if ( p - buf < int(sizeof(buf)) )
*p = '\0';
else
buf[sizeof(buf) - 1] = '\0';
return buf;
}
const char* vfmt(const char* format, va_list al) {
static char* buf = nullptr;
static int buf_len = 1024;
if ( ! buf )
buf = (char*)safe_malloc(buf_len);
va_list alc;
va_copy(alc, al);
int n = vsnprintf(buf, buf_len, format, al);
if ( n > 0 && buf_len <= n ) { // Not enough room, grow the buffer.
buf_len = n + 32;
buf = (char*)safe_realloc(buf, buf_len);
n = vsnprintf(buf, buf_len, format, alc);
}
if ( n < 0 )
reporter->InternalError("confusion reformatting in fmt()");
va_end(alc);
return buf;
}
const char* fmt(const char* format, ...) {
va_list al;
va_start(al, format);
auto rval = vfmt(format, al);
va_end(al);
return rval;
}
bool is_dir(const std::string& path) {
struct stat st;
if ( stat(path.c_str(), &st) < 0 ) {
if ( errno != ENOENT )
reporter->Warning("can't stat %s: %s", path.c_str(), strerror(errno));
return false;
}
return S_ISDIR(st.st_mode);
}
bool is_file(const std::string& path) {
struct stat st;
if ( stat(path.c_str(), &st) < 0 ) {
if ( errno != ENOENT )
reporter->Warning("can't stat %s: %s", path.c_str(), strerror(errno));
return false;
}
return S_ISREG(st.st_mode);
}
TEST_CASE("util strreplace") {
string s = "this is not a string";
CHECK(strreplace(s, "not", "really") == "this is really a string");
CHECK(strreplace(s, "not ", "") == "this is a string");
CHECK(strreplace("\"abc\"", "\"", "\\\"") == "\\\"abc\\\"");
CHECK(strreplace("\\\"abc\\\"", "\\\"", "\"") == "\"abc\"");
}
string strreplace(const string& s, const string& o, const string& n) {
string r = s;
size_t i = 0;
while ( true ) {
i = r.find(o, i);
if ( i == std::string::npos )
break;
r.replace(i, o.size(), n);
i += n.size();
}
return r;
}
TEST_CASE("util strstrip") {
string s = " abcd";
CHECK(strstrip(s) == "abcd");
s = "abcd ";
CHECK(strstrip(s) == "abcd");
s = " abcd ";
CHECK(strstrip(s) == "abcd");
}
std::string strstrip(std::string s) {
auto notspace = [](unsigned char c) { return ! std::isspace(c); };
s.erase(s.begin(), std::find_if(s.begin(), s.end(), notspace));
s.erase(std::find_if(s.rbegin(), s.rend(), notspace).base(), s.end());
return s;
}
int int_list_cmp(const void* v1, const void* v2) {
std::intptr_t i1 = *(std::intptr_t*)v1;
std::intptr_t i2 = *(std::intptr_t*)v2;
if ( i1 < i2 )
return -1;
else if ( i1 == i2 )
return 0;
else
return 1;
}
const std::string& zeek_path() {
if ( zeek_path_value.empty() ) {
const char* path = getenv("ZEEKPATH");
if ( ! path )
path = DEFAULT_ZEEKPATH;
zeek_path_value = path;
}
return zeek_path_value;
}
const char* zeek_plugin_path() {
const char* path = getenv("ZEEK_PLUGIN_PATH");
if ( ! path )
path = ZEEK_PLUGIN_INSTALL_PATH;
return path;
}
const char* zeek_plugin_activate() {
const char* names = getenv("ZEEK_PLUGIN_ACTIVATE");
if ( ! names )
names = "";
return names;
}
string zeek_prefixes() {
string rval;
for ( const auto& prefix : zeek::detail::zeek_script_prefixes ) {
if ( ! rval.empty() )
rval.append(path_list_separator);
rval.append(prefix);
}
return rval;
}
FILE* open_file(const string& path, const string& mode) {
if ( path.empty() )
return nullptr;
FILE* rval = fopen(path.c_str(), mode.c_str());
if ( ! rval ) {
char buf[256];
zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("Failed to open file %s: %s", filename, buf);
}
return rval;
}
TEST_CASE("util implode_string_vector") {
std::vector<std::string> v = {"a", "b", "c"};
CHECK(implode_string_vector(v, ",") == "a,b,c");
CHECK(implode_string_vector(v, "") == "abc");
v.clear();
CHECK(implode_string_vector(v, ",") == "");
}
string implode_string_vector(const std::vector<std::string>& v, const std::string& delim) {
string rval;
for ( size_t i = 0; i < v.size(); ++i ) {
if ( i > 0 )
rval += delim;
rval += v[i];
}
return rval;
}
TEST_CASE("util tokenize_string") {
auto v = tokenize_string("/this/is/a/path", "/", nullptr);
CHECK(v->size() == 5);
CHECK(*v == vector<string>({"", "this", "is", "a", "path"}));
delete v;
std::vector<std::string> v2;
tokenize_string("/this/is/path/2", "/", &v2);
CHECK(v2.size() == 5);
CHECK(v2 == vector<string>({"", "this", "is", "path", "2"}));
v2.clear();
tokenize_string("/wrong/delim", ",", &v2);
CHECK(v2.size() == 1);
auto svs = tokenize_string("one,two,three,four,", ',');
std::vector<std::string_view> expect{"one", "two", "three", "four", ""};
CHECK(svs == expect);
auto letters = tokenize_string("a--b--c--d", "--");
CHECK(*letters == vector<string>({"a", "b", "c", "d"}));
delete letters;
}
vector<string>* tokenize_string(std::string_view input, std::string_view delim, vector<string>* rval, int limit) {
if ( ! rval )
rval = new vector<string>();
size_t pos = 0;
size_t n;
auto found = 0;
while ( (n = input.find(delim, pos)) != string::npos ) {
++found;
rval->emplace_back(input.substr(pos, n - pos));
pos = n + delim.size();
if ( limit && found == limit )
break;
}
rval->emplace_back(input.substr(pos));
return rval;
}
vector<std::string_view> tokenize_string(std::string_view input, const char delim) {
vector<std::string_view> rval;
size_t pos = 0;
size_t n;
while ( (n = input.find(delim, pos)) != string::npos ) {
rval.emplace_back(input.substr(pos, n - pos));
pos = n + 1;
}
rval.emplace_back(input.substr(pos));
return rval;
}
static string find_file_in_path(const string& filename, const string& path, const vector<string>& opt_ext) {
if ( filename.empty() )
return {};
std::filesystem::path filepath(filename);
// If file name is an absolute path, searching within *path* is pointless.
if ( filepath.is_absolute() ) {
if ( can_read(filename) )
return detail::normalize_path(filename);
else
return {};
}
auto abs_path = (std::filesystem::path(path) / filepath).string();
if ( ! opt_ext.empty() ) {
for ( const string& ext : opt_ext ) {
string with_ext = abs_path + ext;
if ( can_read(with_ext) )
return detail::normalize_path(with_ext);
}
}
if ( can_read(abs_path) )
return detail::normalize_path(abs_path);
return {};
}
string find_file(const string& filename, const string& path_set, const string& opt_ext) {
vector<string> paths;
tokenize_string(path_set, path_list_separator, &paths);
vector<string> ext;
if ( ! opt_ext.empty() )
ext.push_back(opt_ext);
for ( const auto& p : paths ) {
string f = find_file_in_path(filename, p, ext);
if ( ! f.empty() )
return f;
}
return {};
}
string find_script_file(const string& filename, const string& path_set) {
vector<string> paths;
tokenize_string(path_set, path_list_separator, &paths);
vector<string> ext = {".zeek"};
for ( const auto& p : paths ) {
string f = find_file_in_path(filename, p, ext);
if ( ! f.empty() )
return f;
}
return {};
}
RETSIGTYPE sig_handler(int signo);
double current_time(bool real) {
struct timeval tv;
#ifdef _MSC_VER
auto now = std::chrono::system_clock::now();
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch());
tv.tv_sec = ms.count() / 1000;
tv.tv_usec = (ms.count() % 1000) * 1000;
#else
if ( gettimeofday(&tv, nullptr) < 0 )
reporter->InternalError("gettimeofday failed in current_time()");
#endif
double t = double(tv.tv_sec) + double(tv.tv_usec) / 1e6;
if ( ! run_state::pseudo_realtime || real || ! iosource_mgr || ! iosource_mgr->GetPktSrc() )
return t;
// This obviously only works for a single source ...
iosource::PktSrc* src = iosource_mgr->GetPktSrc();
if ( run_state::is_processing_suspended() )
return run_state::detail::current_pseudo;
// We don't scale with pseudo_realtime here as that would give us a
// jumping real-time.
return run_state::detail::current_pseudo + (t - run_state::current_packet_wallclock());
}
struct timeval double_to_timeval(double t) {
struct timeval tv;
double t1 = std::floor(t);
tv.tv_sec = static_cast<time_t>(t1);
tv.tv_usec = std::lround((t - t1) * 1e6);
return tv;
}
int time_compare(struct timeval* tv_a, struct timeval* tv_b) {
if ( tv_a->tv_sec == tv_b->tv_sec )
return tv_a->tv_usec - tv_b->tv_usec;
else
return tv_a->tv_sec - tv_b->tv_sec;
}
double curr_CPU_time() {
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
return double(ts.tv_sec) + double(ts.tv_nsec) / 1e9;
}
struct UIDEntry {
UIDEntry() : key(0, 0), needs_init(true) {}
UIDEntry(const uint64_t i) : key(i, 0), needs_init(false) {}
struct UIDKey {
UIDKey(uint64_t i, uint64_t c) : instance(i), counter(c) {}
uint64_t instance;
uint64_t counter;
} key;
bool needs_init;
};
static std::vector<UIDEntry> uid_pool;
uint64_t calculate_unique_id() { return calculate_unique_id(UID_POOL_DEFAULT_INTERNAL); }
uint64_t calculate_unique_id(size_t pool) {
uint64_t uid_instance = 0;
if ( pool >= uid_pool.size() ) {
if ( pool < 10000 )
uid_pool.resize(pool + 1);
else {
reporter->Warning("pool passed to calculate_unique_id() too large, using default");
pool = UID_POOL_DEFAULT_INTERNAL;
}
}
if ( uid_pool[pool].needs_init ) {
// This is the first time we need a UID for this pool.
if ( ! detail::have_random_seed() ) {
// If we don't need deterministic output (as
// indicated by a set seed), we calculate the
// instance ID by hashing something likely to be
// globally unique.
struct {
char hostname[120];
uint64_t pool;
struct timeval time;
pid_t pid;
int rnd;
} unique;
memset(&unique, 0, sizeof(unique)); // Make valgrind happy.
gethostname(unique.hostname, 120);
unique.hostname[sizeof(unique.hostname) - 1] = '\0';
gettimeofday(&unique.time, nullptr);
unique.pool = (uint64_t)pool;
unique.pid = getpid();
unique.rnd = static_cast<int>(detail::random_number());
uid_instance = zeek::detail::HashKey::HashBytes(&unique, sizeof(unique));
++uid_instance; // Now it's larger than zero.
}
else
// Generate deterministic UIDs for each individual pool.
uid_instance = pool;
// Our instance is unique. Huzzah.
uid_pool[pool] = UIDEntry(uid_instance);
}
assert(! uid_pool[pool].needs_init);
assert(uid_pool[pool].key.instance != 0);
++uid_pool[pool].key.counter;
return zeek::detail::HashKey::HashBytes(&(uid_pool[pool].key), sizeof(uid_pool[pool].key));
}
bool safe_write(int fd, const char* data, int len) {
while ( len > 0 ) {
int n = write(fd, data, len);
if ( n < 0 ) {
if ( errno == EINTR )
continue;
char buf[128];
zeek_strerror_r(errno, buf, sizeof(buf));
fprintf(stderr, "safe_write error: %d (%s)\n", errno, buf);
abort();
return false;
}
data += n;
len -= n;
}
return true;
}
bool safe_pwrite(int fd, const unsigned char* data, size_t len, size_t offset) {
while ( len != 0 ) {
ssize_t n = pwrite(fd, data, len, offset);
if ( n < 0 ) {
if ( errno == EINTR )
continue;
char buf[128];
zeek_strerror_r(errno, buf, sizeof(buf));
fprintf(stderr, "safe_write error: %d (%s)\n", errno, buf);
abort();
return false;
}
data += n;
offset += n;
len -= n;
}
return true;
}
bool safe_fsync(int fd) {
int r;
/*
* Failure cases of fsync(2) are EABDF, EINTR, ENOSPC, EROFS, EINVAL, EDQUOT.
*
* For EINTR, one can just retry until it is not interrupted. For the others
* we report an error.
*
* Note that we don't use the reporter here to allow use from different threads.
*/
do {
r = fsync(fd);
} while ( r < 0 && errno == EINTR );
if ( r < 0 ) {
char buf[128];
zeek_strerror_r(errno, buf, sizeof(buf));
fprintf(stderr, "safe_fsync error %d: %s\n", errno, buf);
abort();
return false;
}
return true;
}
void safe_close(int fd) {
/*
* Failure cases of close(2) are ...
* EBADF: Indicative of programming logic error that needs to be fixed, we
* should always be attempting to close a valid file descriptor.
* EINTR: Ignore signal interruptions, most implementations will actually
* reclaim the open descriptor and POSIX standard doesn't leave many
* options by declaring the state of the descriptor as "unspecified".
* Attempting to inspect actual state or re-attempt close() is not
* thread safe.
* EIO: Again the state of descriptor is "unspecified", but don't recover
* from an I/O error, safe_write() won't either.
*
* Note that we don't use the reporter here to allow use from different threads.
*/
if ( close(fd) < 0 && errno != EINTR ) {
char buf[128];
zeek_strerror_r(errno, buf, sizeof(buf));
fprintf(stderr, "safe_close error %d: %s\n", errno, buf);
abort();
}
}
const void* memory_align(const void* ptr, size_t size) {
if ( ! size )
return ptr;
ASSERT(is_power_of_2(size));
const char* buf = reinterpret_cast<const char*>(ptr);
size_t mask = size - 1; // Assume size is a power of 2.
intptr_t l_ptr = reinterpret_cast<intptr_t>(ptr);
ptrdiff_t offset = l_ptr & mask;
if ( offset > 0 )
return reinterpret_cast<const void*>(buf - offset + size);
else
return reinterpret_cast<const void*>(buf);
}
TEST_CASE("util memory_align") {
void* p1000 = (void*)0x1000;
void* p1001 = (void*)0x1001;
void* p1002 = (void*)0x1002;
void* p1003 = (void*)0x1003;
void* p1004 = (void*)0x1004;
CHECK(memory_align(p1000, 0) == p1000);
CHECK(memory_align(p1000, 1) == p1000);
CHECK(memory_align(p1000, 2) == p1000);
CHECK(memory_align(p1000, 4) == p1000);
CHECK(memory_align(p1001, 0) == p1001);
CHECK(memory_align(p1001, 1) == p1001);
CHECK(memory_align(p1001, 2) == p1002);
CHECK(memory_align(p1001, 4) == p1004);
CHECK(memory_align(p1002, 4) == p1004);
CHECK(memory_align(p1003, 4) == p1004);
}
void* memory_align_and_pad(void* ptr, size_t size) {
if ( ! size )
return ptr;
ASSERT(is_power_of_2(size));
char* buf = reinterpret_cast<char*>(ptr);
size_t mask = size - 1;
while ( (reinterpret_cast<intptr_t>(buf) & mask) != 0 )
// Not aligned - zero pad.
*buf++ = '\0';
return reinterpret_cast<void*>(buf);
}
TEST_CASE("util memory_align_and_pad") {
unsigned char mem[16];
memset(mem, 0xff, 16);
CHECK((mem[0] == 0xff && mem[1] == 0xff));
CHECK(memory_align_and_pad(mem, 0) == mem);
CHECK((mem[0] == 0xff && mem[1] == 0xff));
CHECK(memory_align_and_pad(mem, 2) == mem);
CHECK((mem[0] == 0xff && mem[1] == 0xff));
CHECK(memory_align_and_pad(mem + 1, 2) == mem + 2);
for ( int i = 1; i < 2; i++ )
CHECK(mem[i] == 0x00);
CHECK((mem[0] == 0xff && mem[2] == 0xff));
memset(mem, 0xff, 16);
CHECK(memory_align_and_pad(mem + 1, 4) == mem + 4);
for ( int i = 1; i < 3; i++ )
CHECK(mem[i] == 0x00);
CHECK((mem[0] == 0xff && mem[4] == 0xff));
memset(mem, 0xff, 16);
CHECK(memory_align_and_pad(mem + 1, 8) == mem + 8);
for ( int i = 1; i < 7; i++ )
CHECK(mem[i] == 0x00);
CHECK((mem[0] == 0xff && mem[8] == 0xff));
}
int memory_size_align(size_t offset, size_t size) {
if ( ! size || ! offset )
return offset;
ASSERT(is_power_of_2(size));
size_t mask = size - 1; // Assume size is a power of 2.
if ( offset & mask ) {
offset &= ~mask; // Round down.
offset += size; // Round up.
}
return offset;
}
TEST_CASE("util memory_size_align") {
CHECK(memory_size_align(0x1000, 0) == 0x1000);
CHECK(memory_size_align(0x1000, 1) == 0x1000);
CHECK(memory_size_align(0x1000, 2) == 0x1000);
CHECK(memory_size_align(0x1000, 4) == 0x1000);
CHECK(memory_size_align(0x1001, 0) == 0x1001);
CHECK(memory_size_align(0x1001, 1) == 0x1001);
CHECK(memory_size_align(0x1001, 2) == 0x1002);
CHECK(memory_size_align(0x1001, 4) == 0x1004);
CHECK(memory_size_align(0x1002, 4) == 0x1004);
CHECK(memory_size_align(0x1003, 4) == 0x1004);
}
void get_memory_usage(uint64_t* total, uint64_t* malloced) {
uint64_t ret_total;
#if defined(HAVE_MALLINFO2) || defined(HAVE_MALLINFO)
if ( malloced ) {
#ifdef HAVE_MALLINFO2
struct mallinfo2 mi = mallinfo2();
#else
struct mallinfo mi = mallinfo();
#endif
*malloced = mi.uordblks;
}
#endif
#ifdef HAVE_DARWIN
struct mach_task_basic_info t_info;
mach_msg_type_number_t t_info_count = MACH_TASK_BASIC_INFO;
if ( KERN_SUCCESS != task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count) )
ret_total = 0;
else
ret_total = t_info.resident_size;
#else
struct rusage r;
getrusage(RUSAGE_SELF, &r);
// In KB.
ret_total = r.ru_maxrss * 1024;
if ( malloced )
// This will overwrite any mallinfo[2] value from above, should
// be restructured to avoid unnecessary work.
*malloced = r.ru_ixrss * 1024;
#endif
if ( total )
*total = ret_total;
}
#ifdef malloc
#undef malloc
#undef realloc
#undef free
extern "C" {
void* malloc(size_t);
void* realloc(void*, size_t);
void free(void*);
}
static int malloc_debug = 0;
void* debug_malloc(size_t t) {
void* v = malloc(t);
if ( malloc_debug )
printf("%.6f malloc %x %d\n", run_state::network_time, v, t);
return v;
}
void* debug_realloc(void* v, size_t t) {
v = realloc(v, t);
if ( malloc_debug )
printf("%.6f realloc %x %d\n", run_state::network_time, v, t);
return v;
}
void debug_free(void* v) {
if ( malloc_debug )
printf("%.6f free %x\n", run_state::network_time, v);
free(v);
}
void* operator new(size_t t) {
void* v = malloc(t);
if ( malloc_debug )
printf("%.6f new %x %d\n", run_state::network_time, v, t);
return v;
}
void* operator new[](size_t t) {
void* v = malloc(t);
if ( malloc_debug )
printf("%.6f new[] %x %d\n", run_state::network_time, v, t);
return v;
}
void operator delete(void* v) {
if ( malloc_debug )
printf("%.6f delete %x\n", run_state::network_time, v);
free(v);
}
void operator delete[](void* v) {
if ( malloc_debug )
printf("%.6f delete %x\n", run_state::network_time, v);
free(v);
}
#endif
TEST_CASE("util canonify_name") { CHECK(canonify_name("file name") == "FILE_NAME"); }
std::string canonify_name(const std::string& name) {
unsigned int len = name.size();
std::string nname;
for ( unsigned int i = 0; i < len; i++ ) {
char c = isalnum(name[i]) ? name[i] : '_';
nname += toupper(c);
}
return nname;
}
static void strerror_r_helper(char* result, char* buf, size_t buflen) {
// Seems the GNU flavor of strerror_r may return a pointer to a static
// string. So try to copy as much as possible into desired buffer.
auto len = strlen(result);
strncpy(buf, result, buflen);
if ( len >= buflen )
buf[buflen - 1] = 0;
}
static void strerror_r_helper(int result, char* buf, size_t buflen) { /* XSI flavor of strerror_r, no-op. */ }
void zeek_strerror_r(int zeek_errno, char* buf, size_t buflen) {
#ifdef _MSC_VER
auto str = "Error number: " + std::to_string(zeek_errno);
auto res = str.data();
#else
auto res = strerror_r(zeek_errno, buf, buflen);
#endif
// GNU vs. XSI flavors make it harder to use strerror_r.
strerror_r_helper(res, buf, buflen);
}
static string json_escape_byte(char c) {
char hex[2] = {'0', '0'};
bytetohex(c, hex);
string result = "\\x";
result.append(hex, 2);
return result;
}
TEST_CASE("util json_escape_utf8") {
CHECK(json_escape_utf8("string") == "string");
CHECK(json_escape_utf8("string\n") == "string\n");
CHECK(json_escape_utf8("string\x82") == "string\\x82");
CHECK(json_escape_utf8("\x07\xd4\xb7o") == "\\x07\\xd4\\xb7o");
// These strings are duplicated from the scripts.base.frameworks.logging.ascii-json-utf8 btest
// Valid ASCII and valid ASCII control characters
CHECK(json_escape_utf8("a") == "a");
// NOLINTNEXTLINE(bugprone-string-literal-with-embedded-nul)
CHECK(json_escape_utf8("\b\f\n\r\t\x00\x15") == "\b\f\n\r\t\x00\x15");
// Table 3-7 in https://www.unicode.org/versions/Unicode12.0.0/ch03.pdf describes what is
// valid and invalid for the tests below
// Valid 2 Octet Sequence
CHECK(json_escape_utf8("\xc3\xb1") == "\xc3\xb1");
// Invalid 2 Octet Sequence
CHECK(json_escape_utf8("\xc3\x28") == "\\xc3(");
CHECK(json_escape_utf8("\xc0\x81") == "\\xc0\\x81");
CHECK(json_escape_utf8("\xc1\x81") == "\\xc1\\x81");
CHECK(json_escape_utf8("\xc2\xcf") == "\\xc2\\xcf");
// Invalid Sequence Identifier
CHECK(json_escape_utf8("\xa0\xa1") == "\\xa0\\xa1");
// Valid 3 Octet Sequence
CHECK(json_escape_utf8("\xe2\x82\xa1") == "\xe2\x82\xa1");
CHECK(json_escape_utf8("\xe0\xa3\xa1") == "\xe0\xa3\xa1");
// Invalid 3 Octet Sequence (in 2nd Octet)
CHECK(json_escape_utf8("\xe0\x80\xa1") == "\\xe0\\x80\\xa1");
CHECK(json_escape_utf8("\xe2\x28\xa1") == "\\xe2(\\xa1");
CHECK(json_escape_utf8("\xed\xa0\xa1") == "\\xed\\xa0\\xa1");
// Invalid 3 Octet Sequence (in 3rd Octet)
CHECK(json_escape_utf8("\xe2\x82\x28") == "\\xe2\\x82(");
// Valid 4 Octet Sequence
CHECK(json_escape_utf8("\xf0\x90\x8c\xbc") == "\xf0\x90\x8c\xbc");
CHECK(json_escape_utf8("\xf1\x80\x8c\xbc") == "\xf1\x80\x8c\xbc");
CHECK(json_escape_utf8("\xf4\x80\x8c\xbc") == "\xf4\x80\x8c\xbc");
// Invalid 4 Octet Sequence (in 2nd Octet)
CHECK(json_escape_utf8("\xf0\x80\x8c\xbc") == "\\xf0\\x80\\x8c\\xbc");
CHECK(json_escape_utf8("\xf2\x28\x8c\xbc") == "\\xf2(\\x8c\\xbc");
CHECK(json_escape_utf8("\xf4\x90\x8c\xbc") == "\\xf4\\x90\\x8c\\xbc");
// Invalid 4 Octet Sequence (in 3rd Octet)
CHECK(json_escape_utf8("\xf0\x90\x28\xbc") == "\\xf0\\x90(\\xbc");
// Invalid 4 Octet Sequence (in 4th Octet)
CHECK(json_escape_utf8("\xf0\x28\x8c\x28") == "\\xf0(\\x8c(");
// Invalid 4 Octet Sequence (too short)
CHECK(json_escape_utf8("\xf4\x80\x8c") == "\\xf4\\x80\\x8c");
CHECK(json_escape_utf8("\xf0") == "\\xf0");
// Private Use Area (E000-F8FF) are always invalid
CHECK(json_escape_utf8("\xee\x8b\xa0") == "\\xee\\x8b\\xa0");
// Valid UTF-8 character followed by an invalid one
CHECK(json_escape_utf8("\xc3\xb1\xc0\x81") == "\\xc3\\xb1\\xc0\\x81");
}
static bool check_ok_utf8(const unsigned char* start, const unsigned char* end) {
// There's certain blocks of UTF-8 that we don't want, but the easiest way to find
// them is to convert to UTF-32 and then compare. This is annoying, but it also calls
// isLegalUTF8Sequence along the way so go with it.
std::array<UTF32, 2> output;
UTF32* output2 = output.data();
auto result = ConvertUTF8toUTF32(&start, end, &output2, output2 + 1, strictConversion);
if ( result != conversionOK )
return false;
if ( ((output[0] <= 0x001F) || (output[0] == 0x007F) ||
(output[0] >= 0x0080 && output[0] <= 0x009F)) || // Control characters
(output[0] >= 0xE000 && output[0] <= 0xF8FF) || // Private Use Area
(output[0] >= 0xFFF0 && output[0] <= 0xFFFF) ) // Special characters
return false;
return true;
}
string json_escape_utf8(const string& val, bool escape_printable_controls) {
return json_escape_utf8(val.c_str(), val.size(), escape_printable_controls);
}
string json_escape_utf8(const char* val, size_t val_size, bool escape_printable_controls) {
auto val_data = reinterpret_cast<const unsigned char*>(val);
// Reserve at least the size of the existing string to avoid resizing the string in the
// best-case scenario where we don't have any multi-byte characters. We keep two versions of
// this string: one that has a valid utf8 string and one that has a fully-escaped version. The
// utf8 string gets returned if all of the characters were valid utf8 sequences, but it will
// fall back to the escaped version otherwise. This uses slightly more memory but it avoids
// looping through all of the characters a second time in the case of a bad utf8 sequence.
string utf_result;
utf_result.reserve(val_size);
string escaped_result;
escaped_result.reserve(val_size);
bool found_bad = false;
size_t idx = 0;
while ( idx < val_size ) {
const char ch = val[idx];
// Normal ASCII characters plus a few of the control characters can be inserted directly.
// The rest of the control characters should be escaped as regular bytes.
if ( (ch >= 32 && ch < 127) ||
(escape_printable_controls && (ch == '\b' || ch == '\f' || ch == '\n' || ch == '\r' || ch == '\t')) ) {
if ( ! found_bad )
utf_result.push_back(ch);
escaped_result.push_back(ch);
++idx;
continue;
}
else if ( found_bad ) {
// If we already found a bad UTF8 character (see check_ok_utf8) just insert the bytes
// as escaped characters into the escaped result and move on.
escaped_result.append(json_escape_byte(ch));
++idx;
continue;
}
// If we haven't found a bad UTF-8 character yet, check to see if the next one starts a
// UTF-8 character. If not, we'll mark that we're on a bad result. Otherwise we'll go
// ahead and insert this character and continue.
if ( ! found_bad ) {
// Find out how long the next character should be.
unsigned int char_size = getNumBytesForUTF8(ch);
// If we don't have enough data for this character or it's an invalid sequence,
// insert the one escaped byte into the string and go to the next character.
if ( idx + char_size > val_size || ! check_ok_utf8(val_data + idx, val_data + idx + char_size) ) {
found_bad = true;
escaped_result.append(json_escape_byte(ch));
++idx;
continue;
}
else {
for ( unsigned int i = 0; i < char_size; i++ )
escaped_result.append(json_escape_byte(val[idx + i]));
utf_result.append(val + idx, char_size);
idx += char_size;
}
}
}
if ( found_bad )
return escaped_result;
else
return utf_result;
}
TEST_CASE("util filesystem") {
#ifdef _MSC_VER
// TODO: adapt these tests to Windows paths
#else
std::filesystem::path path1("/a/b");
CHECK(path1.is_absolute());
CHECK(! path1.is_relative());
CHECK(path1.filename() == "b");
CHECK(path1.parent_path() == "/a");
std::filesystem::path path2("/a//b//conn.log");
CHECK(path2.lexically_normal() == "/a/b/conn.log");
std::filesystem::path path3("a//b//");
CHECK(path3.lexically_normal() == "a/b/");
auto info = std::filesystem::space(".");
CHECK(info.capacity > 0);
#endif
}
TEST_CASE("util split") {
using str_vec = std::vector<std::string_view>;
using wstr_vec = std::vector<std::wstring_view>;
SUBCASE("w/ delim") {
CHECK_EQ(split("a:b:c", ""), str_vec({"a:b:c"}));
CHECK_EQ(split("", ""), str_vec({""}));
CHECK_EQ(split("a:b:c", ":"), str_vec({"a", "b", "c"}));
CHECK_EQ(split("a:b::c", ":"), str_vec({"a", "b", "", "c"}));
CHECK_EQ(split("a:b:::c", ":"), str_vec({"a", "b", "", "", "c"}));
CHECK_EQ(split(":a:b:c", ":"), str_vec({"", "a", "b", "c"}));
CHECK_EQ(split("::a:b:c", ":"), str_vec({"", "", "a", "b", "c"}));
CHECK_EQ(split("a:b:c:", ":"), str_vec({"a", "b", "c", ""}));
CHECK_EQ(split("a:b:c::", ":"), str_vec({"a", "b", "c", "", ""}));
CHECK_EQ(split("", ":"), str_vec({""}));
CHECK_EQ(split("12345", "1"), str_vec({"", "2345"}));
CHECK_EQ(split("12345", "23"), str_vec{"1", "45"});
CHECK_EQ(split("12345", "a"), str_vec{"12345"});
CHECK_EQ(split("12345", ""), str_vec{"12345"});
}
SUBCASE("wchar_t w/ delim") {
CHECK_EQ(split(L"a:b:c", L""), wstr_vec({L"a:b:c"}));
CHECK_EQ(split(L"", L""), wstr_vec({L""}));
CHECK_EQ(split(L"a:b:c", L":"), wstr_vec({L"a", L"b", L"c"}));
CHECK_EQ(split(L"a:b::c", L":"), wstr_vec({L"a", L"b", L"", L"c"}));
CHECK_EQ(split(L"a:b:::c", L":"), wstr_vec({L"a", L"b", L"", L"", L"c"}));
CHECK_EQ(split(L":a:b:c", L":"), wstr_vec({L"", L"a", L"b", L"c"}));
CHECK_EQ(split(L"::a:b:c", L":"), wstr_vec({L"", L"", L"a", L"b", L"c"}));
CHECK_EQ(split(L"a:b:c:", L":"), wstr_vec({L"a", L"b", L"c", L""}));
CHECK_EQ(split(L"a:b:c::", L":"), wstr_vec({L"a", L"b", L"c", L"", L""}));
CHECK_EQ(split(L"", L":"), wstr_vec({L""}));
CHECK_EQ(split(L"12345", L"1"), wstr_vec({L"", L"2345"}));
CHECK_EQ(split(L"12345", L"23"), wstr_vec{L"1", L"45"});
CHECK_EQ(split(L"12345", L"a"), wstr_vec{L"12345"});
CHECK_EQ(split(L"12345", L""), wstr_vec{L"12345"});
}
}
TEST_CASE("util approx_equal") {
CHECK(approx_equal(47.0, 47.0) == true);
CHECK(approx_equal(47.0, -47.0) == false);
CHECK(approx_equal(47.00001, 47.00002) == false);
CHECK(approx_equal(47.00001, 47.00002, 1e-5) == true);
CHECK(approx_equal(47.0, -47.0, 1e2) == true);
CHECK(approx_equal(47.0, -47.0, 94 + 1e-10) == true);
CHECK(approx_equal(47.0, -47.0, 94) == false);
constexpr auto inf = std::numeric_limits<double>::infinity();
CHECK_FALSE(approx_equal(inf, inf));
CHECK_FALSE(approx_equal(-inf, inf));
CHECK_FALSE(approx_equal(inf, -inf));
CHECK_FALSE(approx_equal(inf, inf, inf));
constexpr auto qnan = std::numeric_limits<double>::quiet_NaN(); // There's also `signaling_NaN`.
CHECK_FALSE(approx_equal(qnan, qnan));
CHECK_FALSE(approx_equal(-qnan, qnan));
CHECK_FALSE(approx_equal(qnan, -qnan));
}
/**
* Returns whether two double values are approximately equal within some tolerance value.
*/
bool approx_equal(double a, double b, double tolerance) { return std::abs(a - b) < std::abs(tolerance); }
} // namespace zeek::util
extern "C" void out_of_memory(const char* where) {
fprintf(stderr, "out of memory in %s.\n", where);
if ( zeek::reporter )
// Guess that might fail here if memory is really tight ...
zeek::reporter->FatalError("out of memory in %s.\n", where);
abort();
}