mirror of
https://github.com/zeek/zeek.git
synced 2025-10-09 10:08:20 +00:00
Merge branch 'optimize_normalize_path' of https://github.com/MaxKellermann/zeek
- Minor changes in merge: extended unit test, prefer emplace_back(), remove unused "found" count in new function * 'optimize_normalize_path' of https://github.com/MaxKellermann/zeek: util: add a tokenize_string() overload which returns string_views util: store std::string_view in "final_components" vector util: use "auto" in normalize_path() util: reserve space in normalize_path() util: skip "." completely in normalize_path() util: pass std::string_view to normalize_path() util: pass std::string_view to tokenize_string() util: don't modify the input string in tokenize_string()
This commit is contained in:
commit
d39bb42b14
4 changed files with 57 additions and 23 deletions
10
CHANGES
10
CHANGES
|
@ -1,4 +1,14 @@
|
||||||
|
|
||||||
|
3.1.0-dev.519 | 2020-01-31 13:19:09 -0800
|
||||||
|
|
||||||
|
* util: optimize tokenize_string() and normalize_path() (Max Kellermann)
|
||||||
|
|
||||||
|
This patch can speed up Zeek startup by 10-25%.
|
||||||
|
|
||||||
|
Adds a new tokenize_string() overload which returns string_views and
|
||||||
|
changes existing tokenize_string() and normalize_path() to use string_view
|
||||||
|
arguments.
|
||||||
|
|
||||||
3.1.0-dev.510 | 2020-01-31 11:20:28 -0800
|
3.1.0-dev.510 | 2020-01-31 11:20:28 -0800
|
||||||
|
|
||||||
* Remove extra fmt() in a reporter->Error() call (Jon Siwek, Corelight)
|
* Remove extra fmt() in a reporter->Error() call (Jon Siwek, Corelight)
|
||||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
3.1.0-dev.510
|
3.1.0-dev.519
|
||||||
|
|
59
src/util.cc
59
src/util.cc
|
@ -838,8 +838,7 @@ bool ensure_intermediate_dirs(const char* dirname)
|
||||||
bool absolute = dirname[0] == '/';
|
bool absolute = dirname[0] == '/';
|
||||||
string path = normalize_path(dirname);
|
string path = normalize_path(dirname);
|
||||||
|
|
||||||
vector<string> path_components;
|
const auto path_components = tokenize_string(path, '/');
|
||||||
tokenize_string(path, "/", &path_components);
|
|
||||||
|
|
||||||
string current_dir;
|
string current_dir;
|
||||||
|
|
||||||
|
@ -1500,28 +1499,50 @@ TEST_CASE("util tokenize_string")
|
||||||
v2.clear();
|
v2.clear();
|
||||||
tokenize_string("/wrong/delim", ",", &v2);
|
tokenize_string("/wrong/delim", ",", &v2);
|
||||||
CHECK(v2.size() == 1);
|
CHECK(v2.size() == 1);
|
||||||
|
|
||||||
|
auto svs = tokenize_string("one,two,three,four,", ',');
|
||||||
|
std::vector<std::string_view> expect{"one", "two", "three", "four", ""};
|
||||||
|
CHECK(svs == expect);
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string>* tokenize_string(string input, const string& delim,
|
vector<string>* tokenize_string(const std::string_view input, const std::string_view delim,
|
||||||
vector<string>* rval, int limit)
|
vector<string>* rval, int limit)
|
||||||
{
|
{
|
||||||
if ( ! rval )
|
if ( ! rval )
|
||||||
rval = new vector<string>();
|
rval = new vector<string>();
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
size_t n;
|
size_t n;
|
||||||
auto found = 0;
|
auto found = 0;
|
||||||
|
|
||||||
while ( (n = input.find(delim)) != string::npos )
|
while ( (n = input.find(delim, pos)) != string::npos )
|
||||||
{
|
{
|
||||||
++found;
|
++found;
|
||||||
rval->push_back(input.substr(0, n));
|
rval->emplace_back(input.substr(pos, n - pos));
|
||||||
input.erase(0, n + 1);
|
pos = n + 1;
|
||||||
|
|
||||||
if ( limit && found == limit )
|
if ( limit && found == limit )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
rval->push_back(input);
|
rval->emplace_back(input.substr(pos));
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<std::string_view> tokenize_string(const std::string_view input, const char delim) noexcept
|
||||||
|
{
|
||||||
|
vector<std::string_view> rval;
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
|
size_t n;
|
||||||
|
|
||||||
|
while ( (n = input.find(delim, pos)) != string::npos )
|
||||||
|
{
|
||||||
|
rval.emplace_back(input.substr(pos, n - pos));
|
||||||
|
pos = n + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
rval.emplace_back(input.substr(pos));
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1552,26 +1573,27 @@ TEST_CASE("util normalize_path")
|
||||||
CHECK(normalize_path("zeek/../..") == "..");
|
CHECK(normalize_path("zeek/../..") == "..");
|
||||||
}
|
}
|
||||||
|
|
||||||
string normalize_path(const string& path)
|
string normalize_path(const std::string_view path)
|
||||||
{
|
{
|
||||||
size_t n;
|
size_t n;
|
||||||
vector<string> components, final_components;
|
vector<std::string_view> final_components;
|
||||||
string new_path;
|
string new_path;
|
||||||
|
new_path.reserve(path.size());
|
||||||
|
|
||||||
if ( path[0] == '/' )
|
if ( ! path.empty() && path[0] == '/' )
|
||||||
new_path = "/";
|
new_path = "/";
|
||||||
|
|
||||||
tokenize_string(path, "/", &components);
|
const auto components = tokenize_string(path, '/');
|
||||||
|
final_components.reserve(components.size());
|
||||||
|
|
||||||
vector<string>::const_iterator it;
|
for ( auto it = components.begin(); it != components.end(); ++it )
|
||||||
for ( it = components.begin(); it != components.end(); ++it )
|
|
||||||
{
|
{
|
||||||
if ( *it == "" ) continue;
|
if ( *it == "" ) continue;
|
||||||
|
if ( *it == "." && it != components.begin() ) continue;
|
||||||
|
|
||||||
final_components.push_back(*it);
|
final_components.push_back(*it);
|
||||||
|
|
||||||
if ( *it == "." && it != components.begin() )
|
if ( *it == ".." )
|
||||||
final_components.pop_back();
|
|
||||||
else if ( *it == ".." )
|
|
||||||
{
|
{
|
||||||
auto cur_idx = final_components.size() - 1;
|
auto cur_idx = final_components.size() - 1;
|
||||||
|
|
||||||
|
@ -1598,7 +1620,7 @@ string normalize_path(const string& path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( it = final_components.begin(); it != final_components.end(); ++it )
|
for ( auto it = final_components.begin(); it != final_components.end(); ++it )
|
||||||
{
|
{
|
||||||
new_path.append(*it);
|
new_path.append(*it);
|
||||||
new_path.append("/");
|
new_path.append("/");
|
||||||
|
@ -1614,8 +1636,7 @@ string without_bropath_component(const string& path)
|
||||||
{
|
{
|
||||||
string rval = normalize_path(path);
|
string rval = normalize_path(path);
|
||||||
|
|
||||||
vector<string> paths;
|
const auto paths = tokenize_string(bro_path(), ':');
|
||||||
tokenize_string(bro_path(), ":", &paths);
|
|
||||||
|
|
||||||
for ( size_t i = 0; i < paths.size(); ++i )
|
for ( size_t i = 0; i < paths.size(); ++i )
|
||||||
{
|
{
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -145,10 +146,12 @@ inline std::string get_escaped_string(const std::string& str, bool escape_all)
|
||||||
return get_escaped_string(str.data(), str.length(), escape_all);
|
return get_escaped_string(str.data(), str.length(), escape_all);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string>* tokenize_string(std::string input,
|
std::vector<std::string>* tokenize_string(std::string_view input,
|
||||||
const std::string& delim,
|
std::string_view delim,
|
||||||
std::vector<std::string>* rval = 0, int limit = 0);
|
std::vector<std::string>* rval = 0, int limit = 0);
|
||||||
|
|
||||||
|
std::vector<std::string_view> tokenize_string(const std::string_view input, const char delim) noexcept;
|
||||||
|
|
||||||
extern char* copy_string(const char* s);
|
extern char* copy_string(const char* s);
|
||||||
extern int streq(const char* s1, const char* s2);
|
extern int streq(const char* s1, const char* s2);
|
||||||
|
|
||||||
|
@ -343,7 +346,7 @@ std::string flatten_script_name(const std::string& name,
|
||||||
* @param path A filesystem path.
|
* @param path A filesystem path.
|
||||||
* @return A canonical/shortened version of \a path.
|
* @return A canonical/shortened version of \a path.
|
||||||
*/
|
*/
|
||||||
std::string normalize_path(const std::string& path);
|
std::string normalize_path(std::string_view path);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Strip the ZEEKPATH component from a path.
|
* Strip the ZEEKPATH component from a path.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue