// See the file "COPYING" in the main distribution directory for copyright. #pragma once #include "zeek/zeek-config.h" // Expose C99 functionality from inttypes.h, which would otherwise not be // available in C++. #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef TIME_WITH_SYS_TIME #include #include #elif defined(HAVE_SYS_TIME_H) #include #else #include #endif #ifdef DEBUG #include #ifdef ASSERT #undef ASSERT #endif #define ASSERT(x) assert(x) #define DEBUG_MSG(...) fprintf(stderr, __VA_ARGS__) #define DEBUG_fputs fputs #else #ifdef MSTCPIP_ASSERT_UNDEFINED #undef ASSERT #endif #define ASSERT(x) #define DEBUG_MSG(...) #define DEBUG_fputs(...) #endif #ifdef USE_PERFTOOLS_DEBUG #include #include extern HeapLeakChecker* heap_checker; #endif #include extern "C" { #include "zeek/3rdparty/modp_numtoa.h" } #ifdef HAVE_LINUX #include #include #endif #ifdef __FreeBSD__ #include #endif #ifdef _MSC_VER #include #include namespace zeek { namespace filesystem = std::filesystem; } inline constexpr std::string_view path_list_separator = ";"; #else // Expose ghc::filesystem as zeek::filesystem until we can // switch to std::filesystem on all platforms. #include "zeek/3rdparty/ghc/filesystem.hpp" namespace zeek { namespace filesystem = ghc::filesystem; } inline constexpr std::string_view path_list_separator = ":"; #endif #include "zeek/3rdparty/nonstd/expected.hpp" namespace zeek { template using expected = nonstd::expected; template using unexpected = nonstd::unexpected; } // namespace zeek #include "zeek/Span.h" using zeek_int_t = int64_t; using zeek_uint_t = uint64_t; #ifndef HAVE_STRCASESTR extern char* strcasestr(const char* s, const char* find); #endif // This is used by the patricia code and so it remains outside of the namespace. extern "C" void out_of_memory(const char* where); namespace zeek { class ODesc; class RecordVal; // Byte buffer types used by serialization code in storage and cluster. using byte_buffer = std::vector; using byte_buffer_span = Span; namespace util { namespace detail { std::string extract_ip(const std::string& i); std::string extract_ip_and_len(const std::string& i, int* len); // Returns the character corresponding to the given escape sequence (s points // just past the '\'), and updates s to point just beyond the last character // of the sequence. extern int expand_escape(const char*& s); extern const char* fmt_access_time(double time); extern bool ensure_intermediate_dirs(const char* dirname); extern bool ensure_dir(const char* dirname); extern void hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]); // Initializes RNGs for zeek::random_number() and hmac-md5/siphash/highwayhash usage. // If load_file is given, the seeds (both random & hashes) are loaded from that file. This // takes precedence over the "seed_string and "use_empty_seeds" arguments. The content of // "seed_string" is used as seeds if not empty next. Otherwise, when "use_empty_seeds" is // set it zero-initializes all seed values. If neither of these provides initial seed values, // platform specific random data is used as seeds. If write_file is given, the seeds are // written to that file. extern void init_random_seed(const char* load_file, const char* write_file, bool use_empty_seeds, const std::string& seed_string = {}); // Retrieves the initial seed computed after the very first call to // init_random_seed(). Repeated calls to init_random_seed() will not affect // the return value of this function. unsigned int initial_seed(); // Returns true if the user explicitly set a seed via init_random_seed(); extern bool have_random_seed(); /** * A platform-independent PRNG implementation. Note that this is not * necessarily a "statistically sound" implementation as the main purpose is * not for production use, but rather for regression testing. * @param state The value used to generate the next random number. * @return A new random value generated from *state* and that can passed * back into subsequent calls to generate further random numbers. */ long int prng(long int state); /** * Wrapper for system random() in the default case, but when running in * deterministic mode, uses the platform-independent zeek::prng() * to obtain consistent results since implementations of rand() may vary. * @return A value in the range [0, zeek::max_random()]. */ long int random_number(); /** * @return The maximum value that can be returned from zeek::random_number(). * When not using deterministic-mode, this is always equivalent to RAND_MAX. */ long int max_random(); /** * Wrapper for system srandom() in the default case, but when running in * deterministic mode, updates the state used for calling zeek::prng() * inside of zeek::random_number(). * @param seed Value to use for initializing the PRNG. */ void seed_random(unsigned int seed); /** * Set the process/thread name. May not be supported on all OSs. * @param name new name for the process/thread. OS limitations typically * truncate the name to 15 bytes maximum. * @param tid handle of thread whose name shall change */ void set_thread_name(const char* name, pthread_t tid = pthread_self()); // Each event source that may generate events gets an internally unique ID. // This is always LOCAL for a local Zeek. For remote event sources, it gets // assigned by the RemoteSerializer. // // FIXME: Find a nicer place for this type definition. // Unfortunately, it introduces circular dependencies when defined in one of // the obvious places (like Event.h or RemoteSerializer.h) using SourceID = std::uintptr_t; constexpr SourceID SOURCE_LOCAL = 0; // TODO: This is a temporary marker to flag events coming in via Broker. // Those are remote events but we don't have any further peer information // available for them (as the old communication code would have). Once we // remove RemoteSerializer, we can turn the SourceID into a simple boolean // indicating whether it's a local or remote event. constexpr SourceID SOURCE_BROKER = 0xffffffff; bool is_package_loader(const std::string& path); extern void add_to_zeek_path(const std::string& dir); /** * Wrapper class for functions like dirname(3) or basename(3) that won't * modify the path argument and may optionally abort execution on error. */ class SafePathOp { public: std::string result; bool error; protected: SafePathOp() : result(), error() {} void CheckValid(const char* result, const char* path, bool error_aborts); }; /** * Flatten a script name by replacing '/' path separators with '.'. * @param file A path to a Zeek script. If it is a __load__.zeek, that part * is discarded when constructing the flattened the name. * @param prefix A string to prepend to the flattened script name. * @return The flattened script name. */ std::string flatten_script_name(const std::string& name, const std::string& prefix = ""); /** * Return a canonical/shortened path string by removing superfluous elements * (path delimiters, dots referring to CWD or parent dir). * @param path A filesystem path. * @return A canonical/shortened version of \a path. */ std::string normalize_path(std::string_view path); /** * Strip the ZEEKPATH component from a path. * @param path A file/directory path that may be within a ZEEKPATH component. * @return *path* minus the common ZEEKPATH component (if any) removed. */ std::string without_zeekpath_component(std::string_view path); /** * Gets the full path used to invoke some executable. * @param invocation any possible string that may be seen in argv[0], such as * absolute path, relative path, or name to lookup in PATH. * @return the absolute path to the executable file */ std::string get_exe_path(const std::string& invocation); /** Opens a Zeek script package. * @param path Location of a Zeek script package (a directory). Will be changed * to the path of the package's loader script. * @param mode An fopen(3) mode. * @return The return value of fopen(3) on the loader script or null if one * doesn't exist. */ FILE* open_package(std::string& path, const std::string& mode = "r"); // This mimics the script-level function with the same name. const char* log_file_name(const char* tag); // Terminates processing gracefully, similar to pressing CTRL-C. void terminate_processing(); // Sets the current status of the Zeek process to the given string. // If the option --status-file has been set, this is written into // the corresponding file. Otherwise, the function is a no-op. void set_processing_status(const char* status, const char* reason); // Renames the given file to a new temporary name, and opens a new file with // the original name. Returns new file or NULL on error. Inits rotate_info if // given (open time is set network time). extern FILE* rotate_file(const char* name, RecordVal* rotate_info); // Parse a time string of the form "HH:MM" (as used for the rotation base // time) into a double representing the number of seconds. Returns -1 if the // string cannot be parsed. The function's result is intended to be used with // calc_next_rotate(). // // This function is not thread-safe. double parse_rotate_base_time(const char* rotate_base_time); // Calculate the duration until the next time a file is to be rotated, based // on the given rotate_interval and rotate_base_time. 'current' the the // current time to be used as base, 'rotate_interval' the rotation interval, // and 'base' the value returned by parse_rotate_base_time(). For the latter, // if the function returned -1, that's fine, calc_next_rotate() handles that. // // This function is thread-safe. double calc_next_rotate(double current, double rotate_interval, double base); int setvbuf(FILE* stream, char* buf, int type, size_t size); } // namespace detail template void delete_each(T* t) { using iterator = typename T::iterator; for ( iterator it = t->begin(); it != t->end(); ++it ) delete *it; } inline void bytetohex(unsigned char byte, char* hex_out) { static constexpr char hex_chars[] = "0123456789abcdef"; hex_out[0] = hex_chars[(byte & 0xf0) >> 4]; hex_out[1] = hex_chars[byte & 0x0f]; } std::string get_unescaped_string(const std::string& str); ODesc* get_escaped_string(ODesc* d, const char* str, size_t len, bool escape_all); std::string get_escaped_string(const char* str, size_t len, bool escape_all); inline std::string get_escaped_string(const std::string& str, bool escape_all) { return get_escaped_string(str.data(), str.length(), escape_all); } std::vector* tokenize_string(std::string_view input, std::string_view delim, std::vector* rval = nullptr, int limit = 0); std::vector tokenize_string(std::string_view input, const char delim) noexcept; extern char* copy_string(const char* str, size_t len); extern char* copy_string(const char* s); extern bool streq(const char* s1, const char* s2); extern bool starts_with(std::string_view s, std::string_view beginning); extern bool ends_with(std::string_view s, std::string_view ending); extern char* skip_whitespace(char* s); extern const char* skip_whitespace(const char* s); extern char* skip_whitespace(char* s, char* end_of_s); extern const char* skip_whitespace(const char* s, const char* end_of_s); extern char* skip_digits(char* s); extern char* get_word(char*& s); extern void get_word(int length, const char* s, int& pwlen, const char*& pw); extern void to_upper(char* s); extern std::string to_upper(const std::string& s); extern int decode_hex(char ch); extern unsigned char encode_hex(int h); template int atoi_n(int len, const char* s, const char** end, int base, T& result); extern char* uitoa_n(uint64_t value, char* str, int n, int base, const char* prefix = nullptr); extern const char* strpbrk_n(size_t len, const char* s, const char* charset); int strstr_n(const int big_len, const unsigned char* big, const int little_len, const unsigned char* little); // Replaces all occurrences of *o* in *s* with *n*. extern std::string strreplace(const std::string& s, const std::string& o, const std::string& n); // Remove all leading and trailing white space from string. extern std::string strstrip(std::string s); // Return a lower-cased version of the string. extern std::string strtolower(const std::string& s); // Return a upper-cased version of the string. extern std::string strtoupper(const std::string& s); extern int fputs(int len, const char* s, FILE* fp); extern bool is_printable(const char* s, int len); extern const char* fmt_bytes(const char* data, int len); // Note: returns a pointer into a shared buffer. extern const char* vfmt(const char* format, va_list args); // Note: returns a pointer into a shared buffer. extern const char* fmt(const char* format, ...) __attribute__((format(printf, 1, 2))); // Returns true if path exists and is a directory. bool is_dir(const std::string& path); // Returns true if path exists and is a file. bool is_file(const std::string& path); extern int int_list_cmp(const void* v1, const void* v2); extern const std::string& zeek_path(); extern const char* zeek_plugin_path(); extern const char* zeek_plugin_activate(); extern std::string zeek_prefixes(); class SafeDirname : public detail::SafePathOp { public: explicit SafeDirname(const char* path, bool error_aborts = true); explicit SafeDirname(const std::string& path, bool error_aborts = true); private: void DoFunc(const std::string& path, bool error_aborts = true); }; class SafeBasename : public detail::SafePathOp { public: explicit SafeBasename(const char* path, bool error_aborts = true); explicit SafeBasename(const std::string& path, bool error_aborts = true); private: void DoFunc(const std::string& path, bool error_aborts = true); }; std::string implode_string_vector(const std::vector& v, const std::string& delim = "\n"); /** * Locate a file within a given search path. * @param filename Name of a file to find. * @param path_set Colon-delimited set of paths to search for the file. * @param opt_ext A filename extension/suffix to allow. * @return Path to the found file, or an empty string if not found. */ std::string find_file(const std::string& filename, const std::string& path_set, const std::string& opt_ext = ""); /** * Locate a script file within a given search path. * @param filename Name of a file to find. * @param path_set Colon-delimited set of paths to search for the file. * @return Path to the found file, or an empty string if not found. */ std::string find_script_file(const std::string& filename, const std::string& path_set); // Wrapper around fopen(3). Emits an error when failing to open. FILE* open_file(const std::string& path, const std::string& mode = "r"); // Returns the current time. // (In pseudo-realtime mode this is faked to be the start time of the // trace plus the time interval Zeek has been running. To avoid this, // call with real=true). extern double current_time(bool real = false); // Convert a time represented as a double to a timeval struct. extern struct timeval double_to_timeval(double t); // Return > 0 if tv_a > tv_b, 0 if equal, < 0 if tv_a < tv_b. extern int time_compare(struct timeval* tv_a, struct timeval* tv_b); // Returns the CPU time consumed to date. extern double curr_CPU_time(); // Returns an integer that's very likely to be unique, even across Zeek // instances. The integer can be drawn from different pools, which is helpful // when the random number generator is seeded to be deterministic. In that // case, the same sequence of integers is generated per pool. #define UID_POOL_DEFAULT_INTERNAL 1 #define UID_POOL_DEFAULT_SCRIPT 2 #define UID_POOL_CUSTOM_SCRIPT 10 // First available custom script level pool. extern uint64_t calculate_unique_id(); extern uint64_t calculate_unique_id(const size_t pool); // Use for map's string keys. struct ltstr { bool operator()(const char* s1, const char* s2) const { return strcmp(s1, s2) < 0; } }; constexpr size_t pad_size(size_t size) { // We emulate glibc here (values measured on Linux i386). // FIXME: We should better copy the portable value definitions from glibc. if ( size == 0 ) return 0; // glibc allocated 16 bytes anyway. const int pad = 8; if ( size < 12 ) return 2 * pad; return ((size + 3) / pad + 1) * pad; } #define padded_sizeof(x) (zeek::util::pad_size(sizeof(x))) // Like write() but handles interrupted system calls by restarting. Returns // true if the write was successful, otherwise sets errno. This function is // thread-safe as long as no two threads write to the same descriptor. extern bool safe_write(int fd, const char* data, int len); // Same as safe_write(), but for pwrite(). extern bool safe_pwrite(int fd, const unsigned char* data, size_t len, size_t offset); // Like fsync() but handles interrupted system calls by retrying and // aborts on unrecoverable errors. extern bool safe_fsync(int fd); // Wraps close(2) to emit error messages and abort on unrecoverable errors. extern void safe_close(int fd); // Versions of realloc/malloc which abort() on out of memory // Versions of realloc/malloc which abort() on out of memory inline void* safe_realloc(void* ptr, size_t size) { ptr = realloc(ptr, size); if ( size && ! ptr ) out_of_memory("realloc"); return ptr; } inline void* safe_malloc(size_t size) { void* ptr = malloc(size); if ( ! ptr ) out_of_memory("malloc"); return ptr; } inline char* safe_strncpy(char* dest, const char* src, size_t n) { char* result = strncpy(dest, src, n - 1); dest[n - 1] = '\0'; return result; } // Memory alignment helpers. inline bool is_power_of_2(zeek_uint_t x) { return ((x - 1) & x) == 0; } // Rounds the given pointer up to the nearest multiple of the // given size, if not already a multiple. const void* memory_align(const void* ptr, size_t size); // Rounds the given pointer up to the nearest multiple of the // given size, padding the skipped region with 0 bytes. void* memory_align_and_pad(void* ptr, size_t size); // Returns offset rounded up so it can correctly align data of the given size. int memory_size_align(size_t offset, size_t size); // Returns total memory allocations and (if available) amount actually // handed out by malloc. extern void get_memory_usage(uint64_t* total, uint64_t* malloced); // Class to be used as a third argument for STL maps to be able to use // char*'s as keys. Otherwise the pointer values will be compared instead of // the actual string values. struct CompareString { bool operator()(char const* a, char const* b) const { return strcmp(a, b) < 0; } }; /** * Canonicalizes a name by converting it to uppercase letters and replacing * all non-alphanumeric characters with an underscore. * @param name The string to canonicalize. * @return The canonicalized version of \a name which caller may later delete[]. */ std::string canonify_name(const std::string& name); /** * Reentrant version of strerror(). Takes care of the difference between the * XSI-compliant and the GNU-specific version of strerror_r(). */ void zeek_strerror_r(int zeek_errno, char* buf, size_t buflen); /** * Escapes bytes in a string that are not valid UTF8 characters with \xYY format. Used * by the JSON writer and BIF methods. * @param val the input string to be escaped * @return the escaped string */ std::string json_escape_utf8(const std::string& val, bool escape_printable_controls = true); /** * Escapes bytes in a string that are not valid UTF8 characters with \xYY format. Used * by the JSON writer and BIF methods. * @param val the character data to be escaped * @param val_size the length of the character data * @return the escaped string */ std::string json_escape_utf8(const char* val, size_t val_size, bool escape_printable_controls = true); /** * Checks for values that are approximately equal. * @param a first value to compare * @param b second value to compare * @param tolerance how close they need to be to deem them "approximately equal" * @return true if `a` is within the given tolerance of `b`, false otherwise */ bool approx_equal(double a, double b, double tolerance = std::numeric_limits::epsilon()); /** * Splits a string at all occurrences of a delimiter. Successive occurrences * of the delimiter will be split into multiple pieces. * * \note This function is not UTF8-aware. */ template std::vector split(T s, const T& delim) { // If there's no delimiter, return a copy of the existing string. if ( delim.empty() ) return {std::move(s)}; // If the delimiter won't fit in the string, just return a copy as well. if ( s.size() < delim.size() ) return {std::move(s)}; std::vector l; const bool ends_in_delim = (s.substr(s.size() - delim.size()) == delim); do { size_t p = s.find(delim); l.push_back(s.substr(0, p)); if ( p == std::string::npos ) break; s = s.substr(p + delim.size()); } while ( ! s.empty() ); if ( ends_in_delim ) l.emplace_back(T{}); return l; } /** * Specialized version of util::split that allows for differing string and delimiter types, * with the requirement that the delimiter must be of the same type as what is stored in the * string type. For example, this allows passing a std::string as the string to split with * a const char* delimiter. * * @param s the string to split * @param delim the delimiter to split the string on * @return a vector of containing the separate parts of the string. */ template std::vector split(T s, U delim) { return split(s, T{delim}); } /** * Specialized version of util::split that takes a const char* string and delimiter. * * @param s the string to split * @param delim the delimiter to split the string on * @return a vector of string_view objects containing the separate parts of the string. */ inline std::vector split(const char* s, const char* delim) { return split(std::string_view(s), std::string_view(delim)); } /** * Specialized version of util::split that takes a const wchar_t* string and delimiter. * * @param s the string to split * @param delim the delimiter to split the string on * @return a vector of wstring_view objects containing the separate parts of the string. */ inline std::vector split(const wchar_t* s, const wchar_t* delim) { return split(std::wstring_view(s), std::wstring_view(delim)); } } // namespace util } // namespace zeek