mirror of
https://github.com/zeek/zeek.git
synced 2025-10-09 10:08:20 +00:00
Merge branch 'optimize_normalize_path' of https://github.com/MaxKellermann/zeek
- Minor changes in merge: extended unit test, prefer emplace_back(), remove unused "found" count in new function * 'optimize_normalize_path' of https://github.com/MaxKellermann/zeek: util: add a tokenize_string() overload which returns string_views util: store std::string_view in "final_components" vector util: use "auto" in normalize_path() util: reserve space in normalize_path() util: skip "." completely in normalize_path() util: pass std::string_view to normalize_path() util: pass std::string_view to tokenize_string() util: don't modify the input string in tokenize_string()
This commit is contained in:
commit
d39bb42b14
4 changed files with 57 additions and 23 deletions
10
CHANGES
10
CHANGES
|
@ -1,4 +1,14 @@
|
|||
|
||||
3.1.0-dev.519 | 2020-01-31 13:19:09 -0800
|
||||
|
||||
* util: optimize tokenize_string() and normalize_path() (Max Kellermann)
|
||||
|
||||
This patch can speed up Zeek startup by 10-25%.
|
||||
|
||||
Adds a new tokenize_string() overload which returns string_views and
|
||||
changes existing tokenize_string() and normalize_path() to use string_view
|
||||
arguments.
|
||||
|
||||
3.1.0-dev.510 | 2020-01-31 11:20:28 -0800
|
||||
|
||||
* Remove extra fmt() in a reporter->Error() call (Jon Siwek, Corelight)
|
||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
|||
3.1.0-dev.510
|
||||
3.1.0-dev.519
|
||||
|
|
59
src/util.cc
59
src/util.cc
|
@ -838,8 +838,7 @@ bool ensure_intermediate_dirs(const char* dirname)
|
|||
bool absolute = dirname[0] == '/';
|
||||
string path = normalize_path(dirname);
|
||||
|
||||
vector<string> path_components;
|
||||
tokenize_string(path, "/", &path_components);
|
||||
const auto path_components = tokenize_string(path, '/');
|
||||
|
||||
string current_dir;
|
||||
|
||||
|
@ -1500,28 +1499,50 @@ TEST_CASE("util tokenize_string")
|
|||
v2.clear();
|
||||
tokenize_string("/wrong/delim", ",", &v2);
|
||||
CHECK(v2.size() == 1);
|
||||
|
||||
auto svs = tokenize_string("one,two,three,four,", ',');
|
||||
std::vector<std::string_view> expect{"one", "two", "three", "four", ""};
|
||||
CHECK(svs == expect);
|
||||
}
|
||||
|
||||
vector<string>* tokenize_string(string input, const string& delim,
|
||||
vector<string>* tokenize_string(const std::string_view input, const std::string_view delim,
|
||||
vector<string>* rval, int limit)
|
||||
{
|
||||
if ( ! rval )
|
||||
rval = new vector<string>();
|
||||
|
||||
size_t pos = 0;
|
||||
size_t n;
|
||||
auto found = 0;
|
||||
|
||||
while ( (n = input.find(delim)) != string::npos )
|
||||
while ( (n = input.find(delim, pos)) != string::npos )
|
||||
{
|
||||
++found;
|
||||
rval->push_back(input.substr(0, n));
|
||||
input.erase(0, n + 1);
|
||||
rval->emplace_back(input.substr(pos, n - pos));
|
||||
pos = n + 1;
|
||||
|
||||
if ( limit && found == limit )
|
||||
break;
|
||||
}
|
||||
|
||||
rval->push_back(input);
|
||||
rval->emplace_back(input.substr(pos));
|
||||
return rval;
|
||||
}
|
||||
|
||||
vector<std::string_view> tokenize_string(const std::string_view input, const char delim) noexcept
|
||||
{
|
||||
vector<std::string_view> rval;
|
||||
|
||||
size_t pos = 0;
|
||||
size_t n;
|
||||
|
||||
while ( (n = input.find(delim, pos)) != string::npos )
|
||||
{
|
||||
rval.emplace_back(input.substr(pos, n - pos));
|
||||
pos = n + 1;
|
||||
}
|
||||
|
||||
rval.emplace_back(input.substr(pos));
|
||||
return rval;
|
||||
}
|
||||
|
||||
|
@ -1552,26 +1573,27 @@ TEST_CASE("util normalize_path")
|
|||
CHECK(normalize_path("zeek/../..") == "..");
|
||||
}
|
||||
|
||||
string normalize_path(const string& path)
|
||||
string normalize_path(const std::string_view path)
|
||||
{
|
||||
size_t n;
|
||||
vector<string> components, final_components;
|
||||
vector<std::string_view> final_components;
|
||||
string new_path;
|
||||
new_path.reserve(path.size());
|
||||
|
||||
if ( path[0] == '/' )
|
||||
if ( ! path.empty() && path[0] == '/' )
|
||||
new_path = "/";
|
||||
|
||||
tokenize_string(path, "/", &components);
|
||||
const auto components = tokenize_string(path, '/');
|
||||
final_components.reserve(components.size());
|
||||
|
||||
vector<string>::const_iterator it;
|
||||
for ( it = components.begin(); it != components.end(); ++it )
|
||||
for ( auto it = components.begin(); it != components.end(); ++it )
|
||||
{
|
||||
if ( *it == "" ) continue;
|
||||
if ( *it == "." && it != components.begin() ) continue;
|
||||
|
||||
final_components.push_back(*it);
|
||||
|
||||
if ( *it == "." && it != components.begin() )
|
||||
final_components.pop_back();
|
||||
else if ( *it == ".." )
|
||||
if ( *it == ".." )
|
||||
{
|
||||
auto cur_idx = final_components.size() - 1;
|
||||
|
||||
|
@ -1598,7 +1620,7 @@ string normalize_path(const string& path)
|
|||
}
|
||||
}
|
||||
|
||||
for ( it = final_components.begin(); it != final_components.end(); ++it )
|
||||
for ( auto it = final_components.begin(); it != final_components.end(); ++it )
|
||||
{
|
||||
new_path.append(*it);
|
||||
new_path.append("/");
|
||||
|
@ -1614,8 +1636,7 @@ string without_bropath_component(const string& path)
|
|||
{
|
||||
string rval = normalize_path(path);
|
||||
|
||||
vector<string> paths;
|
||||
tokenize_string(bro_path(), ":", &paths);
|
||||
const auto paths = tokenize_string(bro_path(), ':');
|
||||
|
||||
for ( size_t i = 0; i < paths.size(); ++i )
|
||||
{
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <cstdint>
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
#include <stdio.h>
|
||||
|
@ -145,10 +146,12 @@ inline std::string get_escaped_string(const std::string& str, bool escape_all)
|
|||
return get_escaped_string(str.data(), str.length(), escape_all);
|
||||
}
|
||||
|
||||
std::vector<std::string>* tokenize_string(std::string input,
|
||||
const std::string& delim,
|
||||
std::vector<std::string>* tokenize_string(std::string_view input,
|
||||
std::string_view delim,
|
||||
std::vector<std::string>* rval = 0, int limit = 0);
|
||||
|
||||
std::vector<std::string_view> tokenize_string(const std::string_view input, const char delim) noexcept;
|
||||
|
||||
extern char* copy_string(const char* s);
|
||||
extern int streq(const char* s1, const char* s2);
|
||||
|
||||
|
@ -343,7 +346,7 @@ std::string flatten_script_name(const std::string& name,
|
|||
* @param path A filesystem path.
|
||||
* @return A canonical/shortened version of \a path.
|
||||
*/
|
||||
std::string normalize_path(const std::string& path);
|
||||
std::string normalize_path(std::string_view path);
|
||||
|
||||
/**
|
||||
* Strip the ZEEKPATH component from a path.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue