From cebb85b1e86cd9b03ed5b7e128a99bb1e85cbaf3 Mon Sep 17 00:00:00 2001 From: Dominik Charousset Date: Thu, 2 Nov 2023 17:37:11 +0100 Subject: [PATCH] Fix unsafe and inefficient uses of copy_string Add a new overload to `copy_string` that takes the input characters plus size. The new overload avoids inefficient scanning of the input for the null terminator in cases where we know the size beforehand. Furthermore, this overload *must* be used when dealing with input character sequences that may have no null terminator, e.g., when the input is from a `std::string_view` object. --- src/Debug.cc | 6 +++-- src/Expr.cc | 2 +- src/Stmt.cc | 2 +- src/Val.cc | 2 ++ src/Val.h | 1 + src/ZeekString.cc | 11 +++++--- src/ZeekString.h | 5 ++++ src/input/Manager.cc | 7 ++--- src/input/readers/ascii/Ascii.cc | 2 +- src/input/readers/benchmark/Benchmark.cc | 2 +- src/input/readers/config/Config.cc | 16 ++++++----- src/input/readers/raw/Raw.cc | 10 ++++--- src/logging/Manager.cc | 27 +++++++++++-------- src/logging/WriterBackend.cc | 8 +++--- src/script_opt/CPP/RuntimeInits.cc | 2 +- src/spicy/runtime-support.cc | 2 +- src/threading/MsgThread.cc | 4 +-- src/threading/SerialTypes.cc | 4 +-- src/threading/formatters/Ascii.cc | 2 +- src/util.cc | 10 +++++++ src/util.h | 1 + src/zeek-setup.cc | 5 ++-- src/zeek.bif | 3 ++- src/zeekygen/Target.cc | 5 +--- .../btest/plugins/reader-plugin/src/Foo.cc | 2 +- 25 files changed, 85 insertions(+), 56 deletions(-) diff --git a/src/Debug.cc b/src/Debug.cc index 3d2fd26273..4f136f5293 100644 --- a/src/Debug.cc +++ b/src/Debug.cc @@ -157,7 +157,8 @@ int TraceState::LogTrace(const char* fmt, ...) { } if ( ! loc.filename ) { - loc.filename = util::copy_string(""); + static constexpr const char str[] = ""; + loc.filename = util::copy_string(str, std::size(str) - 1); loc.last_line = 0; } @@ -675,7 +676,8 @@ string get_context_description(const Stmt* stmt, const Frame* frame) { if ( stmt ) loc = *stmt->GetLocationInfo(); else { - loc.filename = util::copy_string(""); + static constexpr const char str[] = ""; + loc.filename = util::copy_string(str, std::size(str) - 1); loc.last_line = 0; } diff --git a/src/Expr.cc b/src/Expr.cc index 80f48dd17c..82ef6d3217 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -3362,7 +3362,7 @@ bool FieldAssignExpr::PromoteTo(TypePtr t) { bool FieldAssignExpr::IsRecordElement(TypeDecl* td) const { if ( td ) { td->type = op->GetType(); - td->id = util::copy_string(field_name.c_str()); + td->id = util::copy_string(field_name.c_str(), field_name.size()); } return true; diff --git a/src/Stmt.cc b/src/Stmt.cc index 5bab9618f8..e2ff8567e3 100644 --- a/src/Stmt.cc +++ b/src/Stmt.cc @@ -1752,7 +1752,7 @@ WhenInfo::WhenInfo(ExprPtr arg_cond, FuncType::CaptureList* arg_cl, bool arg_is_ auto param_list = new type_decl_list(); auto count_t = base_type(TYPE_COUNT); - param_list->push_back(new TypeDecl(util::copy_string(lambda_param_id.c_str()), count_t)); + param_list->push_back(new TypeDecl(util::copy_string(lambda_param_id.c_str(), lambda_param_id.size()), count_t)); auto params = make_intrusive(param_list); lambda_ft = make_intrusive(params, base_type(TYPE_ANY), FUNC_FLAVOR_FUNCTION); diff --git a/src/Val.cc b/src/Val.cc index 885c6a8f59..2550d1d5c4 100644 --- a/src/Val.cc +++ b/src/Val.cc @@ -752,6 +752,8 @@ const u_char* StringVal::Bytes() const { return AsString()->Bytes(); } const char* StringVal::CheckString() const { return AsString()->CheckString(); } +std::pair StringVal::CheckStringWithSize() const { return AsString()->CheckStringWithSize(); } + string StringVal::ToStdString() const { auto* bs = AsString(); return {(char*)bs->Bytes(), static_cast(bs->Len())}; diff --git a/src/Val.h b/src/Val.h index c8c892091a..9ddeec842f 100644 --- a/src/Val.h +++ b/src/Val.h @@ -546,6 +546,7 @@ public: int Len() const; const u_char* Bytes() const; const char* CheckString() const; + std::pair CheckStringWithSize() const; // Note that one needs to de-allocate the return value of // ExpandedString() to avoid a memory leak. diff --git a/src/ZeekString.cc b/src/ZeekString.cc index 672149b57e..6d2e39ac31 100644 --- a/src/ZeekString.cc +++ b/src/ZeekString.cc @@ -135,10 +135,10 @@ void String::Set(std::string_view str) { void String::Set(const String& str) { *this = str; } -const char* String::CheckString() const { +std::pair String::CheckStringWithSize() const { void* nulTerm; if ( n == 0 ) - return ""; + return {"", 0}; nulTerm = memchr(b, '\0', n + final_NUL); if ( nulTerm != &b[n] ) { @@ -151,12 +151,15 @@ const char* String::CheckString() const { reporter->Error("string without NUL terminator: \"%s\"", exp_s); delete[] exp_s; - return ""; + static constexpr const char result[] = ""; + return {result, std::size(result) - 1}; } - return (const char*)b; + return {(const char*)b, n}; } +const char* String::CheckString() const { return CheckStringWithSize().first; } + char* String::Render(int format, int* len) const { // Maximum character expansion is as \xHH, so a factor of 4. char* s = new char[n * 4 + 1]; // +1 is for final '\0' diff --git a/src/ZeekString.h b/src/ZeekString.h index dea4c602e0..44fee2d8cd 100644 --- a/src/ZeekString.h +++ b/src/ZeekString.h @@ -84,6 +84,11 @@ public: */ const char* CheckString() const; + /** + * Like @c CheckString(), but also returns the length of the string. + */ + std::pair CheckStringWithSize() const; + enum render_style { ESC_NONE = 0, ESC_ESC = (1 << 1), // '\' -> "\\" diff --git a/src/input/Manager.cc b/src/input/Manager.cc index 0fd531f00b..2de28b3735 100644 --- a/src/input/Manager.cc +++ b/src/input/Manager.cc @@ -215,8 +215,8 @@ bool Manager::CreateStream(Stream* info, RecordVal* description) { string source((const char*)bsource->Bytes(), bsource->Len()); ReaderBackend::ReaderInfo rinfo; - rinfo.source = util::copy_string(source.c_str()); - rinfo.name = util::copy_string(name.c_str()); + rinfo.source = util::copy_string(source.c_str(), source.size()); + rinfo.name = util::copy_string(name.c_str(), name.size()); auto mode_val = description->GetFieldOrDefault("mode"); auto mode = mode_val->AsEnumVal(); @@ -244,7 +244,8 @@ bool Manager::CreateStream(Stream* info, RecordVal* description) { auto index = info->config->RecreateIndex(*k); string key = index->Idx(0)->AsString()->CheckString(); string value = v->GetVal()->AsString()->CheckString(); - rinfo.config.insert(std::make_pair(util::copy_string(key.c_str()), util::copy_string(value.c_str()))); + rinfo.config.insert(std::make_pair(util::copy_string(key.c_str(), key.size()), + util::copy_string(value.c_str(), value.size()))); } } diff --git a/src/input/readers/ascii/Ascii.cc b/src/input/readers/ascii/Ascii.cc index a553cd48f0..365336f93d 100644 --- a/src/input/readers/ascii/Ascii.cc +++ b/src/input/readers/ascii/Ascii.cc @@ -152,7 +152,7 @@ bool Ascii::OpenFile() { if ( ! read_location ) { read_location = LocationPtr(new zeek::detail::Location()); - read_location->filename = util::copy_string(fname.c_str()); + read_location->filename = util::copy_string(fname.c_str(), fname.size()); } StopWarningSuppression(); diff --git a/src/input/readers/benchmark/Benchmark.cc b/src/input/readers/benchmark/Benchmark.cc index 3d79690321..80b6182e1d 100644 --- a/src/input/readers/benchmark/Benchmark.cc +++ b/src/input/readers/benchmark/Benchmark.cc @@ -120,7 +120,7 @@ threading::Value* Benchmark::EntryToVal(TypeTag type, TypeTag subtype) { case TYPE_STRING: { std::string rnd = RandomString(10); - val->val.string_val.data = util::copy_string(rnd.c_str()); + val->val.string_val.data = util::copy_string(rnd.c_str(), rnd.size()); val->val.string_val.length = rnd.size(); break; } diff --git a/src/input/readers/config/Config.cc b/src/input/readers/config/Config.cc index 9ad9ddc396..405af70d8d 100644 --- a/src/input/readers/config/Config.cc +++ b/src/input/readers/config/Config.cc @@ -222,11 +222,11 @@ bool Config::DoUpdate() { Value** fields = new Value*[2]; Value* keyval = new threading::Value(TYPE_STRING, true); keyval->val.string_val.length = key.size(); - keyval->val.string_val.data = util::copy_string(key.c_str()); + keyval->val.string_val.data = util::copy_string(key.c_str(), key.size()); fields[0] = keyval; Value* val = new threading::Value(TYPE_STRING, true); val->val.string_val.length = value.size(); - val->val.string_val.data = util::copy_string(value.c_str()); + val->val.string_val.data = util::copy_string(value.c_str(), value.size()); fields[1] = val; if ( Info().mode == MODE_STREAM ) @@ -238,13 +238,15 @@ bool Config::DoUpdate() { { Value** vals = new Value*[4]; vals[0] = new Value(TYPE_STRING, true); - vals[0]->val.string_val.data = util::copy_string(Info().name); - vals[0]->val.string_val.length = strlen(Info().name); + auto val0_len = strlen(Info().name); + vals[0]->val.string_val.data = util::copy_string(Info().name, val0_len); + vals[0]->val.string_val.length = val0_len; vals[1] = new Value(TYPE_STRING, true); - vals[1]->val.string_val.data = util::copy_string(Info().source); - vals[1]->val.string_val.length = strlen(Info().source); + auto val1_len = strlen(Info().source); + vals[1]->val.string_val.data = util::copy_string(Info().source, val1_len); + vals[1]->val.string_val.length = val1_len; vals[2] = new Value(TYPE_STRING, true); - vals[2]->val.string_val.data = util::copy_string(key.c_str()); + vals[2]->val.string_val.data = util::copy_string(key.c_str(), key.size()); vals[2]->val.string_val.length = key.size(); vals[3] = eventval; diff --git a/src/input/readers/raw/Raw.cc b/src/input/readers/raw/Raw.cc index ed59c38ce6..5161843af0 100644 --- a/src/input/readers/raw/Raw.cc +++ b/src/input/readers/raw/Raw.cc @@ -679,11 +679,13 @@ bool Raw::DoUpdate() { Value** vals = new Value*[4]; vals[0] = new Value(TYPE_STRING, true); - vals[0]->val.string_val.data = util::copy_string(Info().name); - vals[0]->val.string_val.length = strlen(Info().name); + auto val0_len = strlen(Info().name); + vals[0]->val.string_val.data = util::copy_string(Info().name, val0_len); + vals[0]->val.string_val.length = val0_len; vals[1] = new Value(TYPE_STRING, true); - vals[1]->val.string_val.data = util::copy_string(Info().source); - vals[1]->val.string_val.length = strlen(Info().source); + auto val1_len = strlen(Info().source); + vals[1]->val.string_val.data = util::copy_string(Info().source, val1_len); + vals[1]->val.string_val.length = val1_len; vals[2] = new Value(TYPE_COUNT, true); vals[2]->val.int_val = code; vals[3] = new Value(TYPE_BOOL, true); diff --git a/src/logging/Manager.cc b/src/logging/Manager.cc index 6f9d8f4e2e..01844e596c 100644 --- a/src/logging/Manager.cc +++ b/src/logging/Manager.cc @@ -803,14 +803,15 @@ bool Manager::Write(EnumVal* id, RecordVal* columns_arg) { if ( const auto& val = filter->field_name_map->Find(fn) ) { delete[] filter->fields[j]->name; - filter->fields[j]->name = util::copy_string(val->AsStringVal()->CheckString()); + auto [data, len] = val->AsStringVal()->CheckStringWithSize(); + filter->fields[j]->name = util::copy_string(data, len); } } arg_fields[j] = new threading::Field(*filter->fields[j]); } info = new WriterBackend::WriterInfo; - info->path = util::copy_string(path.c_str()); + info->path = util::copy_string(path.c_str(), path.size()); info->network_time = run_state::network_time; auto* filter_config_table = filter->config->AsTable(); @@ -821,7 +822,8 @@ bool Manager::Write(EnumVal* id, RecordVal* columns_arg) { auto index = filter->config->RecreateIndex(*k); string key = index->Idx(0)->AsString()->CheckString(); string value = v->GetVal()->AsString()->CheckString(); - info->config.insert(std::make_pair(util::copy_string(key.c_str()), util::copy_string(value.c_str()))); + info->config.emplace(util::copy_string(key.c_str(), key.size()), + util::copy_string(value.c_str(), value.size())); } // CreateWriter() will set the other fields in info. @@ -883,14 +885,15 @@ threading::Value* Manager::ValToLogVal(std::optional& val, Type* ty) { const char* s = ty->AsEnumType()->Lookup(val->AsInt()); if ( s ) { - lval->val.string_val.data = util::copy_string(s); - lval->val.string_val.length = strlen(s); + auto len = strlen(s); + lval->val.string_val.data = util::copy_string(s, len); + lval->val.string_val.length = len; } else { auto err_msg = "enum type does not contain value:" + std::to_string(val->AsInt()); ty->Error(err_msg.c_str()); - lval->val.string_val.data = util::copy_string(""); + lval->val.string_val.data = util::copy_string("", 0); lval->val.string_val.length = 0; } break; @@ -935,9 +938,10 @@ threading::Value* Manager::ValToLogVal(std::optional& val, Type* ty) { case TYPE_FILE: { const File* f = val->AsFile(); - string s = f->Name(); - lval->val.string_val.data = util::copy_string(s.c_str()); - lval->val.string_val.length = s.size(); + const char* s = f->Name(); + auto len = strlen(s); + lval->val.string_val.data = util::copy_string(s, len); + lval->val.string_val.length = len; break; } @@ -946,8 +950,9 @@ threading::Value* Manager::ValToLogVal(std::optional& val, Type* ty) { const Func* f = val->AsFunc(); f->Describe(&d); const char* s = d.Description(); - lval->val.string_val.data = util::copy_string(s); - lval->val.string_val.length = strlen(s); + auto len = strlen(s); + lval->val.string_val.data = util::copy_string(s, len); + lval->val.string_val.length = len; break; } diff --git a/src/logging/WriterBackend.cc b/src/logging/WriterBackend.cc index fefde92af1..30a12e74f0 100644 --- a/src/logging/WriterBackend.cc +++ b/src/logging/WriterBackend.cc @@ -73,9 +73,7 @@ broker::data WriterBackend::WriterInfo::ToBroker() const { auto t = broker::table(); for ( config_map::const_iterator i = config.begin(); i != config.end(); ++i ) { - auto key = std::string(i->first); - auto value = std::string(i->second); - t.insert(std::make_pair(key, value)); + t.emplace(std::string{i->first}, std::string{i->second}); } auto bppf = post_proc_func ? post_proc_func : ""; @@ -98,7 +96,7 @@ bool WriterBackend::WriterInfo::FromBroker(broker::data d) { if ( ! (bpath && brotation_base && brotation_interval && bnetwork_time && bconfig && bppf) ) return false; - path = util::copy_string(bpath->c_str()); + path = util::copy_string(bpath->c_str(), bpath->size()); post_proc_func = util::copy_string(bppf->c_str()); rotation_base = *brotation_base; rotation_interval = *brotation_interval; @@ -111,7 +109,7 @@ bool WriterBackend::WriterInfo::FromBroker(broker::data d) { if ( ! (k && v) ) return false; - auto p = std::make_pair(util::copy_string(k->c_str()), util::copy_string(v->c_str())); + auto p = std::make_pair(util::copy_string(k->c_str(), k->size()), util::copy_string(v->c_str(), v->size())); config.insert(p); } diff --git a/src/script_opt/CPP/RuntimeInits.cc b/src/script_opt/CPP/RuntimeInits.cc index d5757a776c..80c2337f63 100644 --- a/src/script_opt/CPP/RuntimeInits.cc +++ b/src/script_opt/CPP/RuntimeInits.cc @@ -405,7 +405,7 @@ int CPP_FieldMapping::ComputeOffset(InitsManager* im) const { if ( fm_offset < 0 ) { // field does not exist, create it fm_offset = r->NumFields(); - auto id = util::copy_string(field_name.c_str()); + auto id = util::copy_string(field_name.c_str(), field_name.size()); auto type = im->Types(field_type); AttributesPtr attrs; diff --git a/src/spicy/runtime-support.cc b/src/spicy/runtime-support.cc index 58fa96041c..497abb91ea 100644 --- a/src/spicy/runtime-support.cc +++ b/src/spicy/runtime-support.cc @@ -146,7 +146,7 @@ TypePtr rt::create_record_type(const std::string& ns, const std::string& id, attrs->AddAttr(std::move(log_)); } - decls->append(new TypeDecl(util::copy_string(f.id.c_str()), f.type, std::move(attrs))); + decls->append(new TypeDecl(util::copy_string(f.id.c_str(), f.id.size()), f.type, std::move(attrs))); } return make_intrusive(decls.release()); diff --git a/src/threading/MsgThread.cc b/src/threading/MsgThread.cc index 83c55014ec..a1d2e5e3da 100644 --- a/src/threading/MsgThread.cc +++ b/src/threading/MsgThread.cc @@ -72,7 +72,7 @@ public: ReporterMessage(Type arg_type, MsgThread* thread, std::string_view arg_msg) : OutputMessage("ReporterMessage", thread) { type = arg_type; - msg = util::copy_string(arg_msg.data()); + msg = util::copy_string(arg_msg.data(), arg_msg.size()); } ~ReporterMessage() override { delete[] msg; } @@ -104,7 +104,7 @@ public: DebugMessage(DebugStream arg_stream, MsgThread* thread, std::string_view arg_msg) : OutputMessage("DebugMessage", thread) { stream = arg_stream; - msg = util::copy_string(arg_msg.data()); + msg = util::copy_string(arg_msg.data(), arg_msg.size()); } ~DebugMessage() override { delete[] msg; } diff --git a/src/threading/SerialTypes.cc b/src/threading/SerialTypes.cc index c55c82378c..44cddf4636 100644 --- a/src/threading/SerialTypes.cc +++ b/src/threading/SerialTypes.cc @@ -30,7 +30,7 @@ bool Field::Read(detail::SerializationFormat* fmt) { if ( ! fmt->Read(&tmp_secondary_name, "secondary_name") ) return false; - secondary_name = util::copy_string(tmp_secondary_name.c_str()); + secondary_name = util::copy_string(tmp_secondary_name.c_str(), tmp_secondary_name.size()); } else secondary_name = nullptr; @@ -41,7 +41,7 @@ bool Field::Read(detail::SerializationFormat* fmt) { if ( ! success ) return false; - name = util::copy_string(tmp_name.c_str()); + name = util::copy_string(tmp_name.c_str(), tmp_name.size()); type = static_cast(t); subtype = static_cast(st); diff --git a/src/threading/formatters/Ascii.cc b/src/threading/formatters/Ascii.cc index b296e5b8af..22ef1e8efb 100644 --- a/src/threading/formatters/Ascii.cc +++ b/src/threading/formatters/Ascii.cc @@ -287,7 +287,7 @@ Value* Ascii::ParseValue(const string& s, const string& name, TypeTag type, Type // Remove the '/'s candidate.erase(0, 1); candidate.erase(candidate.size() - 1); - val->val.pattern_text_val = util::copy_string(candidate.c_str()); + val->val.pattern_text_val = util::copy_string(candidate.c_str(), candidate.size()); break; } } diff --git a/src/util.cc b/src/util.cc index 5ef75ae00e..152c12a3f7 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1022,6 +1022,16 @@ std::string get_escaped_string(const char* str, size_t len, bool escape_all) { return get_escaped_string(&d, str, len, escape_all)->Description(); } +char* copy_string(const char* str, size_t len) { + if ( ! str ) + return nullptr; + + char* c = new char[len + 1]; + memcpy(c, str, len); + c[len] = '\0'; + return c; +} + char* copy_string(const char* s) { if ( ! s ) return nullptr; diff --git a/src/util.h b/src/util.h index 8b89dd838c..3a69293ad2 100644 --- a/src/util.h +++ b/src/util.h @@ -330,6 +330,7 @@ std::vector* tokenize_string(std::string_view input, std::string_vi std::vector tokenize_string(std::string_view input, const char delim) noexcept; +extern char* copy_string(const char* str, size_t len); extern char* copy_string(const char* s); extern bool streq(const char* s1, const char* s2); extern bool starts_with(std::string_view s, std::string_view beginning); diff --git a/src/zeek-setup.cc b/src/zeek-setup.cc index 12ecb51676..d215da56a8 100644 --- a/src/zeek-setup.cc +++ b/src/zeek-setup.cc @@ -434,15 +434,14 @@ static std::vector get_script_signature_files() { std::vector rval; // Parse rule files defined on the script level. - char* script_signature_files = util::copy_string(id::find_val("signature_files")->AsString()->CheckString()); + auto script_signature_files = id::find_val("signature_files")->AsStringVal()->ToStdString(); - char* tmp = script_signature_files; + char* tmp = script_signature_files.data(); char* s; while ( (s = strsep(&tmp, " \t")) ) if ( *s ) rval.emplace_back(s); - delete[] script_signature_files; return rval; } diff --git a/src/zeek.bif b/src/zeek.bif index e4d59690ff..0ab356e829 100644 --- a/src/zeek.bif +++ b/src/zeek.bif @@ -448,7 +448,8 @@ static bool prepare_environment(zeek::TableVal* tbl, bool set) return false; } - char* tmp = zeek::util::copy_string(key->AsString()->CheckString()); +auto [data,len] = key->AsString()->CheckStringWithSize(); + char* tmp = zeek::util::copy_string(data, len); zeek::util::to_upper(tmp); std::string var1 = zeek::util::fmt("ZEEK_ARG_%s", tmp); delete [] tmp; diff --git a/src/zeekygen/Target.cc b/src/zeekygen/Target.cc index 1837ff73d6..519f4ccd24 100644 --- a/src/zeekygen/Target.cc +++ b/src/zeekygen/Target.cc @@ -439,9 +439,8 @@ vector dir_contents_recursive(string dir) { while ( dir[dir.size() - 1] == '/' ) dir.erase(dir.size() - 1, 1); - char* dir_copy = util::copy_string(dir.c_str()); char** scan_path = new char*[2]; - scan_path[0] = dir_copy; + scan_path[0] = dir.data(); scan_path[1] = NULL; FTS* fts = fts_open(scan_path, FTS_NOCHDIR, 0); @@ -449,7 +448,6 @@ vector dir_contents_recursive(string dir) { if ( ! fts ) { reporter->Error("fts_open failure: %s", strerror(errno)); delete[] scan_path; - delete[] dir_copy; return rval; } @@ -467,7 +465,6 @@ vector dir_contents_recursive(string dir) { reporter->Error("fts_close failure: %s", strerror(errno)); delete[] scan_path; - delete[] dir_copy; return rval; } diff --git a/testing/btest/plugins/reader-plugin/src/Foo.cc b/testing/btest/plugins/reader-plugin/src/Foo.cc index 88d7070111..1c981e7b5a 100644 --- a/testing/btest/plugins/reader-plugin/src/Foo.cc +++ b/testing/btest/plugins/reader-plugin/src/Foo.cc @@ -73,7 +73,7 @@ zeek::threading::Value* Foo::EntryToVal(zeek::TypeTag type, zeek::TypeTag subtyp case zeek::TYPE_STRING: { std::string rnd = RandomString(10); - val->val.string_val.data = zeek::util::copy_string(rnd.c_str()); + val->val.string_val.data = zeek::util::copy_string(rnd.c_str(), rnd.size()); val->val.string_val.length = rnd.size(); break; }