diff --git a/CHANGES b/CHANGES index ce9f1852b2..1e1f0969a4 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,11 @@ +3.1.0-dev.521 | 2020-01-31 14:18:17 -0800 + + * util: optimize expand_escape() by avoiding sscanf() (Max Kellermann) + + sscanf() is notoriously slow, and the default scripts have lots of hex + escapes. This patch can reduce Zeek's startup time by 3-9%. + 3.1.0-dev.519 | 2020-01-31 13:19:09 -0800 * util: optimize tokenize_string() and normalize_path() (Max Kellermann) diff --git a/VERSION b/VERSION index 19a258a590..433ae06f33 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0-dev.519 +3.1.0-dev.521 diff --git a/src/util.cc b/src/util.cc index 28f95a002e..51bfc8639c 100644 --- a/src/util.cc +++ b/src/util.cc @@ -293,6 +293,26 @@ int streq(const char* s1, const char* s2) return ! strcmp(s1, s2); } +static constexpr int parse_octal_digit(char ch) noexcept + { + if ( ch >= '0' && ch <= '7' ) + return ch - '0'; + else + return -1; + } + +static constexpr int parse_hex_digit(char ch) noexcept + { + if ( ch >= '0' && ch <= '9' ) + return ch - '0'; + else if ( ch >= 'a' && ch <= 'f' ) + return 10 + ch - 'a'; + else if ( ch >= 'A' && ch <= 'F' ) + return 10 + ch - 'A'; + else + return -1; + } + int expand_escape(const char*& s) { switch ( *(s++) ) { @@ -310,23 +330,32 @@ int expand_escape(const char*& s) --s; // put back the first octal digit const char* start = s; - // Don't increment inside loop control - // because if isdigit() is a macro it might - // expand into multiple increments ... + // require at least one octal digit and parse at most three - // Here we define a maximum length for escape sequence - // to allow easy handling of string like: "^H0" as - // "\0100". + int result = parse_octal_digit(*s++); - for ( int len = 0; len < 3 && isascii(*s) && isdigit(*s); - ++s, ++len) - ; - - int result; - if ( sscanf(start, "%3o", &result) != 1 ) + if ( result < 0 ) { - reporter->Warning("bad octal escape: %s ", start); - result = 0; + reporter->Error("bad octal escape: %s", start); + return 0; + } + + // second digit? + int digit = parse_octal_digit(*s); + + if ( digit >= 0 ) + { + result = (result << 3) | digit; + ++s; + + // third digit? + digit = parse_octal_digit(*s); + + if ( digit >= 0 ) + { + result = (result << 3) | digit; + ++s; + } } return result; @@ -337,15 +366,22 @@ int expand_escape(const char*& s) const char* start = s; // Look at most 2 characters, so that "\x0ddir" -> "^Mdir". - for ( int len = 0; len < 2 && isascii(*s) && isxdigit(*s); - ++s, ++len) - ; - int result; - if ( sscanf(start, "%2x", &result) != 1 ) + int result = parse_hex_digit(*s++); + + if ( result < 0 ) { - reporter->Warning("bad hexadecimal escape: %s", start); - result = 0; + reporter->Error("bad hexadecimal escape: %s", start); + return 0; + } + + // second digit? + int digit = parse_hex_digit(*s); + + if ( digit >= 0 ) + { + result = (result << 4) | digit; + ++s; } return result;