retention of superseded AST elements to prevent pointer mis-aliasing

This commit is contained in:
Vern Paxson 2023-11-05 16:28:13 -08:00 committed by Arne Welzel
parent 280acc51bf
commit 4ec9a23ce6
9 changed files with 103 additions and 23 deletions

View file

@ -8,6 +8,7 @@ extend-ignore-re = [
"des-ede3-cbc-Env-OID",
"Remove in v6.1.*SupressWeird",
"max_repititions:.*Remove in v6.1",
"mis-aliasing of",
# On purpose
"\"THE NETBIOS NAM\"",
# NFS stuff.

50
src/script_opt/ObjMgr.h Normal file
View file

@ -0,0 +1,50 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Classes for tracking a collection of "const Obj*" pointers for memory
// management purposes. In particular, script optimization often has to
// deal with bare const pointers (because the traversal infrastructure is
// oriented around those, and because of the need to track AST nodes in
// containers, which don't support IntrusivePtr's). During optimization
// AST nodes are both created and replaced/discarded, which can lead to
// mis-aliasing of old instances of those pointers with new ones.
//
// Note, this functionality is only required for data structures with
// lifetimes that span AST-rewriting steps. Those that are germane only
// for a fixed AST instance (such as ProfileFunc and management of
// confluence blocks) don't need to use these.
#pragma once
#include <zeek/Obj.h>
#include <unordered_map>
namespace zeek::detail {
// A class that keeps a const Obj* pointer live - used to isolate instances
// of const_cast.
class ObjWrapper {
public:
ObjWrapper(const Obj* wrappee) {
auto non_const_w = const_cast<Obj*>(wrappee);
wrappee_ptr = {NewRef{}, non_const_w};
}
private:
IntrusivePtr<Obj> wrappee_ptr;
};
// Manages a bunch of const Obj* pointers collectively.
class ObjMgr {
public:
void AddObj(const Obj* o) {
if ( obj_collection.count(o) == 0 )
obj_collection.emplace(std::pair<const Obj*, ObjWrapper>{o, ObjWrapper(o)});
}
private:
std::unordered_map<const Obj*, ObjWrapper> obj_collection;
};
} // namespace zeek::detail

View file

@ -557,6 +557,7 @@ ConstExprPtr Reducer::Fold(ExprPtr e) {
}
void Reducer::FoldedTo(ExprPtr e, ConstExprPtr c) {
om.AddObj(e.get());
constant_exprs[e.get()] = std::move(c);
folded_exprs.push_back(std::move(e));
}
@ -712,6 +713,8 @@ IDPtr Reducer::GenTemporary(TypePtr t, ExprPtr rhs, IDPtr id) {
temp_id->SetType(t);
temps.push_back(temp);
om.AddObj(temp_id.get());
ids_to_temps[temp_id.get()] = temp;
return temp_id;
@ -760,6 +763,7 @@ IDPtr Reducer::GenLocal(const IDPtr& orig) {
prev = orig_to_new_locals[orig.get()];
AddNewLocal(local_id);
om.AddObj(orig.get());
orig_to_new_locals[orig.get()] = local_id;
if ( ! block_locals.empty() && ret_vars.count(orig.get()) == 0 )

View file

@ -6,6 +6,7 @@
#include "zeek/Scope.h"
#include "zeek/Stmt.h"
#include "zeek/Traverse.h"
#include "zeek/script_opt/ObjMgr.h"
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek::detail {
@ -21,7 +22,10 @@ public:
void SetReadyToOptimize() { opt_ready = true; }
void SetCurrStmt(const Stmt* stmt) { curr_stmt = stmt; }
void SetCurrStmt(const Stmt* stmt) {
om.AddObj(stmt);
curr_stmt = stmt;
}
ExprPtr GenTemporaryExpr(const TypePtr& t, ExprPtr rhs);
@ -108,11 +112,17 @@ public:
// Tells the reducer to prune the given statement during the
// next reduction pass.
void AddStmtToOmit(const Stmt* s) { omitted_stmts.insert(s); }
void AddStmtToOmit(const Stmt* s) {
om.AddObj(s);
omitted_stmts.insert(s);
}
// Tells the reducer to replace the given statement during the
// next reduction pass.
void AddStmtToReplace(const Stmt* s_old, StmtPtr s_new) { replaced_stmts[s_old] = std::move(s_new); }
void AddStmtToReplace(const Stmt* s_old, StmtPtr s_new) {
om.AddObj(s_old);
replaced_stmts[s_old] = std::move(s_new);
}
// Tells the reducer that it can reclaim the storage associated
// with the omitted statements.
@ -280,6 +290,10 @@ protected:
// if any. When we pop the block, we restore the previous mapping.
std::vector<std::unordered_map<const ID*, IDPtr>> block_locals;
// Memory management for AST elements that might change during
// the reduction/optimization processes.
ObjMgr om;
// Tracks how deeply we are in "bifurcation", i.e., duplicating
// code for if-else cascades. We need to cap this at a certain
// depth or else we can get functions whose size blows up

View file

@ -36,6 +36,7 @@ static std::string CPP_dir; // where to generate C++ code
static std::unordered_map<const ScriptFunc*, LambdaExpr*> lambdas;
static std::unordered_set<const ScriptFunc*> when_lambdas;
static ScriptFuncPtr global_stmts;
static size_t global_stmts_ind; // index into Funcs corresponding to global_stmts
void analyze_func(ScriptFuncPtr f) {
// Even if we're analyzing only a subset of the scripts, we still
@ -56,7 +57,7 @@ bool is_lambda(const ScriptFunc* f) { return lambdas.count(f) > 0; }
bool is_when_lambda(const ScriptFunc* f) { return when_lambdas.count(f) > 0; }
size_t analyze_global_stmts(Stmt* stmts) {
void analyze_global_stmts(Stmt* stmts) {
// We ignore analysis_options.only_{files,funcs} - if they're in use, later
// logic will keep this function from being compiled, but it's handy
// now to enter it into "funcs" so we have a FuncInfo to return.
@ -71,11 +72,15 @@ size_t analyze_global_stmts(Stmt* stmts) {
global_stmts = make_intrusive<ScriptFunc>(id);
global_stmts->AddBody(stmts->ThisPtr(), empty_inits, sc->Length());
global_stmts_ind = funcs.size();
funcs.emplace_back(global_stmts, sc, stmts->ThisPtr(), 0);
return funcs.size() - 1;
}
const FuncInfo& get_global_stmts(size_t global_ind) { return funcs[global_ind]; }
std::pair<StmtPtr, ScopePtr> get_global_stmts() {
ASSERT(global_stmts);
auto& fi = funcs[global_stmts_ind];
return std::pair<StmtPtr, ScopePtr>{fi.Body(), fi.Scope()};
}
void add_func_analysis_pattern(AnalyOpt& opts, const char* pat) {
try {

View file

@ -175,17 +175,11 @@ extern void analyze_when_lambda(LambdaExpr* f);
extern bool is_lambda(const ScriptFunc* f);
extern bool is_when_lambda(const ScriptFunc* f);
// Analyze the given top-level statement(s) for optimization. Returns
// an opaque value that can be provided to get_global_stmts() to retrieve
// the corresponding FuncInfo, which reflects an argument-less quasi-function
// that can be Invoked, or its body executed directly, to execute the
// statements. This indirection is used in case in between the call to
// analyze_global_stmts() and using the value subsequently, the FuncInfo may
// have been updated or shifted. a pointer to a FuncInfo for an argument-less
// quasi-function that can be Invoked, or its body executed directly, to
// execute the statements.
extern size_t analyze_global_stmts(Stmt* stmts);
extern const FuncInfo& get_global_stmts(size_t handle);
// Analyze the given top-level statement(s) for optimization.
extern void analyze_global_stmts(Stmt* stmts);
// Returns the body and scope for the previously analyzed global statements.
extern std::pair<StmtPtr, ScopePtr> get_global_stmts();
// Add a pattern to the "only_funcs" list.
extern void add_func_analysis_pattern(AnalyOpt& opts, const char* pat);

View file

@ -169,8 +169,10 @@ bool UseDefs::CheckIfUnused(const Stmt* s, const ID* id, bool report) {
}
UDs UseDefs::PropagateUDs(const Stmt* s, UDs succ_UDs, const Stmt* succ_stmt, bool second_pass) {
if ( ! second_pass )
if ( ! second_pass ) {
om.AddObj(s);
stmts.push_back(s);
}
switch ( s->Tag() ) {
case STMT_LIST: {
@ -184,8 +186,10 @@ UDs UseDefs::PropagateUDs(const Stmt* s, UDs succ_UDs, const Stmt* succ_stmt, bo
if ( i == int(stmts.size()) - 1 ) { // Very last statement.
succ = succ_stmt;
if ( successor2.find(s) != successor2.end() )
if ( successor2.find(s) != successor2.end() ) {
om.AddObj(s_i);
successor2[s_i] = successor2[s];
}
}
else
succ = stmts[i + 1].get();
@ -326,6 +330,7 @@ UDs UseDefs::PropagateUDs(const Stmt* s, UDs succ_UDs, const Stmt* succ_stmt, bo
// The loop body has two potential successors, itself
// and the successor of the entire "for" statement.
om.AddObj(body);
successor2[body] = succ_stmt;
auto body_UDs = PropagateUDs(body, succ_UDs, body, second_pass);
@ -359,6 +364,7 @@ UDs UseDefs::PropagateUDs(const Stmt* s, UDs succ_UDs, const Stmt* succ_stmt, bo
// See note above for STMT_FOR regarding propagating
// around the loop.
auto succ = cond_stmt ? cond_stmt : body;
om.AddObj(body.get());
successor2[body.get()] = succ_stmt;
auto body_UDs = PropagateUDs(body.get(), succ_UDs, succ.get(), second_pass);
@ -617,6 +623,7 @@ UDs UseDefs::UD_Union(const UDs& u1, const UDs& u2, const UDs& u3) const {
}
UDs UseDefs::UseUDs(const Stmt* s, UDs uds) {
om.AddObj(s);
use_defs_map[s] = uds;
UDs_are_copies.insert(s);
return uds;
@ -630,6 +637,7 @@ UDs UseDefs::CreateExprUDs(const Stmt* s, const Expr* e, const UDs& uds) {
}
UDs UseDefs::CreateUDs(const Stmt* s, UDs uds) {
om.AddObj(s);
use_defs_map[s] = uds;
UDs_are_copies.erase(s);
return uds;

View file

@ -7,6 +7,7 @@
#include <vector>
#include "zeek/Expr.h"
#include "zeek/script_opt/ObjMgr.h"
namespace zeek::detail {
@ -166,6 +167,8 @@ private:
// in only one or the other.
std::unordered_map<const Stmt*, const Stmt*> successor2;
ObjMgr om;
StmtPtr body;
std::shared_ptr<Reducer> rc;
FuncTypePtr ft;

View file

@ -923,7 +923,8 @@ SetupResult setup(int argc, char** argv, Options* zopts) {
exit(reporter->Errors() != 0);
}
auto init_stmts_handle = stmts ? analyze_global_stmts(stmts) : 0;
if ( stmts )
analyze_global_stmts(stmts);
analyze_scripts(options.no_unused_warnings);
@ -1026,13 +1027,13 @@ SetupResult setup(int argc, char** argv, Options* zopts) {
ZEEK_LSAN_ENABLE();
if ( stmts ) {
auto& init_stmts = get_global_stmts(init_stmts_handle);
auto [body, scope] = get_global_stmts();
StmtFlowType flow;
Frame f(init_stmts.Scope()->Length(), nullptr, nullptr);
Frame f(scope->Length(), nullptr, nullptr);
g_frame_stack.push_back(&f);
try {
init_stmts.Body()->Exec(&f, flow);
body->Exec(&f, flow);
} catch ( InterpreterException& ) {
reporter->FatalError("failed to execute script statements at top-level scope");
}