CSE across function calls

This commit is contained in:
Vern Paxson 2023-11-30 13:26:55 -08:00
parent ba9781d83e
commit f8478def21
7 changed files with 132 additions and 174 deletions

View file

@ -264,7 +264,7 @@ public:
* If this function is an instantiated lambda, returns the primary from * If this function is an instantiated lambda, returns the primary from
* which it was constructed. Otherwise, returns this function itself. * which it was constructed. Otherwise, returns this function itself.
*/ */
const ScriptFunc* Primary() { return primary ? primary.get() : this; } const ScriptFunc* Primary() const { return primary ? primary.get() : this; }
/** /**
* Returns the function's frame size. * Returns the function's frame size.

View file

@ -15,90 +15,6 @@ static std::unordered_set<std::string> side_effects_free_BiFs = {
"Analyzer::__register_for_port", "Analyzer::__register_for_port",
"Analyzer::__schedule_analyzer", "Analyzer::__schedule_analyzer",
"Analyzer::__tag", "Analyzer::__tag",
"Broker::__append",
"Broker::__auto_publish",
"Broker::__auto_unpublish",
"Broker::__clear",
"Broker::__close",
"Broker::__create_clone",
"Broker::__create_master",
"Broker::__data",
"Broker::__data_type",
"Broker::__decrement",
"Broker::__erase",
"Broker::__exists",
"Broker::__flush_logs",
"Broker::__forward",
"Broker::__get",
"Broker::__get_index_from_value",
"Broker::__increment",
"Broker::__insert_into_set",
"Broker::__insert_into_table",
"Broker::__is_closed",
"Broker::__keys",
"Broker::__listen",
"Broker::__node_id",
"Broker::__opaque_clone_through_serialization",
"Broker::__peer",
"Broker::__peer_no_retry",
"Broker::__peers",
"Broker::__pop",
"Broker::__publish_id",
"Broker::__push",
"Broker::__put",
"Broker::__put_unique",
"Broker::__record_assign",
"Broker::__record_create",
"Broker::__record_iterator",
"Broker::__record_iterator_last",
"Broker::__record_iterator_next",
"Broker::__record_iterator_value",
"Broker::__record_lookup",
"Broker::__record_size",
"Broker::__remove_from",
"Broker::__set_clear",
"Broker::__set_contains",
"Broker::__set_create",
"Broker::__set_insert",
"Broker::__set_iterator",
"Broker::__set_iterator_last",
"Broker::__set_iterator_next",
"Broker::__set_iterator_value",
"Broker::__set_metrics_export_endpoint_name",
"Broker::__set_metrics_export_interval",
"Broker::__set_metrics_export_prefixes",
"Broker::__set_metrics_export_topic",
"Broker::__set_metrics_import_topics",
"Broker::__set_remove",
"Broker::__set_size",
"Broker::__store_name",
"Broker::__subscribe",
"Broker::__table_clear",
"Broker::__table_contains",
"Broker::__table_create",
"Broker::__table_insert",
"Broker::__table_iterator",
"Broker::__table_iterator_last",
"Broker::__table_iterator_next",
"Broker::__table_iterator_value",
"Broker::__table_lookup",
"Broker::__table_remove",
"Broker::__table_size",
"Broker::__unpeer",
"Broker::__unsubscribe",
"Broker::__vector_clear",
"Broker::__vector_create",
"Broker::__vector_insert",
"Broker::__vector_iterator",
"Broker::__vector_iterator_last",
"Broker::__vector_iterator_next",
"Broker::__vector_iterator_value",
"Broker::__vector_lookup",
"Broker::__vector_remove",
"Broker::__vector_replace",
"Broker::__vector_size",
"Broker::make_event",
"Broker::publish",
"FileExtract::__set_limit", "FileExtract::__set_limit",
"Files::__add_analyzer", "Files::__add_analyzer",
"Files::__analyzer_enabled", "Files::__analyzer_enabled",
@ -544,6 +460,9 @@ static std::unordered_set<std::string> side_effects_free_BiFs = {
// Ones not listed: // Ones not listed:
// //
// Broker::*
// These can manipulate unspecified (at script level) records.
//
// Cluster::publish_hrw // Cluster::publish_hrw
// Cluster::publish_rr // Cluster::publish_rr
// Call script functions to get topic names. // Call script functions to get topic names.

View file

@ -1137,34 +1137,11 @@ bool ProfileFuncs::AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, std
bool& is_unknown) { bool& is_unknown) {
std::shared_ptr<ProfileFunc> pf; std::shared_ptr<ProfileFunc> pf;
if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC ) { if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC )
// This occurs when the expression is itself a function name, and return GetCallSideEffects(e->AsNameExpr(), non_local_ids, aggrs, is_unknown);
// in an attribute context indicates an implicit call.
auto fid = e->AsNameExpr()->Id();
auto fv = fid->GetVal();
if ( ! fv || ! fid->IsConst() ) { ASSERT(expr_profs.count(e.get()) != 0);
// The value is unavailable (likely a bug), or might change pf = expr_profs[e.get()];
// at run-time.
is_unknown = true;
return true;
}
auto func = fv->AsFunc();
if ( func->GetKind() == Func::BUILTIN_FUNC ) {
if ( ! is_side_effect_free(func->Name()) )
is_unknown = true;
return true;
}
auto sf = static_cast<ScriptFunc*>(func)->Primary();
ASSERT(func_profs.count(sf) != 0);
pf = func_profs[sf];
}
else {
ASSERT(expr_profs.count(e.get()) != 0);
pf = expr_profs[e.get()];
}
return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown); return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown);
} }
@ -1207,6 +1184,13 @@ bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids
} }
for ( auto& f : pf->ScriptCalls() ) { for ( auto& f : pf->ScriptCalls() ) {
if ( f->Flavor() != FUNC_FLAVOR_FUNCTION ) {
// A hook (since events can't be called) - not something
// to analyze further.
is_unknown = true;
return true;
}
auto pff = func_profs[f]; auto pff = func_profs[f];
if ( active_func_profiles.count(pff) > 0 ) if ( active_func_profiles.count(pff) > 0 )
continue; continue;
@ -1305,4 +1289,73 @@ bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type*
return false; return false;
} }
std::shared_ptr<SideEffectsOp> ProfileFuncs::GetCallSideEffects(const ScriptFunc* sf) {
sf = sf->Primary();
auto sf_se = func_side_effects.find(sf);
if ( sf_se != func_side_effects.end() )
return sf_se->second;
bool is_unknown = false;
IDSet nla;
std::unordered_set<const Type*> mod_aggrs;
ASSERT(func_profs.count(sf) != 0);
auto pf = func_profs[sf];
if ( ! AssessSideEffects(pf.get(), nla, mod_aggrs, is_unknown) )
// Can't figure it out yet.
return nullptr;
auto seo = std::make_shared<SideEffectsOp>(SideEffectsOp::CALL);
seo->AddModNonGlobal(nla);
seo->AddModAggrs(mod_aggrs);
if ( is_unknown )
seo->SetUnknownChanges();
func_side_effects[sf] = seo;
return seo;
}
bool ProfileFuncs::GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, std::unordered_set<const Type*>& aggrs,
bool& is_unknown) {
// This occurs when the expression is itself a function name, and
// in an attribute context indicates an implicit call.
auto fid = n->Id();
auto fv = fid->GetVal();
if ( ! fv || ! fid->IsConst() ) {
// The value is unavailable (likely a bug), or might change
// at run-time.
is_unknown = true;
return true;
}
auto func = fv->AsFunc();
if ( func->GetKind() == Func::BUILTIN_FUNC ) {
if ( ! is_side_effect_free(func->Name()) )
is_unknown = true;
return true;
}
auto sf = static_cast<ScriptFunc*>(func);
auto seo = GetCallSideEffects(sf);
if ( ! seo )
return false;
if ( seo->HasUnknownChanges() )
is_unknown = true;
for ( auto a : seo->ModAggrs() )
aggrs.insert(a);
for ( auto nl : seo->ModNonLocals() )
non_local_ids.insert(nl);
return true;
}
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -328,6 +328,13 @@ public:
bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids, bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
std::unordered_set<const Type*>& aggrs) const; std::unordered_set<const Type*>& aggrs) const;
// Returns nil if side effects are not available. That should never be
// the case after we've done our initial analysis, but is provided
// as a signal so that this method can also be used during that analysis.
bool GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, std::unordered_set<const Type*>& aggrs,
bool& is_unknown);
std::shared_ptr<SideEffectsOp> GetCallSideEffects(const ScriptFunc* f);
const auto& AttrSideEffects() const { return attr_side_effects; } const auto& AttrSideEffects() const { return attr_side_effects; }
const auto& RecordConstructorEffects() const { return record_constr_with_side_effects; } const auto& RecordConstructorEffects() const { return record_constr_with_side_effects; }
@ -440,6 +447,9 @@ protected:
// appear in "when" clauses), and in that context it suffices. // appear in "when" clauses), and in that context it suffices.
std::unordered_map<const ScriptFunc*, std::shared_ptr<ProfileFunc>> func_profs; std::unordered_map<const ScriptFunc*, std::shared_ptr<ProfileFunc>> func_profs;
// ### ScriptFunc not Func, assumes is_side_effect_free() is cheap
std::unordered_map<const ScriptFunc*, std::shared_ptr<SideEffectsOp>> func_side_effects;
// Maps expressions to their profiles. This is only germane // Maps expressions to their profiles. This is only germane
// externally for LambdaExpr's, but internally it abets memory // externally for LambdaExpr's, but internally it abets memory
// management. // management.

View file

@ -11,6 +11,7 @@
#include "zeek/Stmt.h" #include "zeek/Stmt.h"
#include "zeek/Var.h" #include "zeek/Var.h"
#include "zeek/script_opt/ExprOptInfo.h" #include "zeek/script_opt/ExprOptInfo.h"
#include "zeek/script_opt/FuncInfo.h"
#include "zeek/script_opt/StmtOptInfo.h" #include "zeek/script_opt/StmtOptInfo.h"
#include "zeek/script_opt/TempVar.h" #include "zeek/script_opt/TempVar.h"
@ -951,32 +952,7 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
break; break;
case EXPR_CALL: case EXPR_CALL:
if ( have_sensitive_IDs ) { if ( CheckCall(e->AsCallExpr()) ) {
auto c = e->AsCallExpr();
auto func = c->Func();
std::string desc;
if ( func->Tag() == EXPR_NAME ) {
auto f = func->AsNameExpr()->Id();
if ( f->IsGlobal() ) {
auto func_v = f->GetVal();
if ( func_v ) {
auto func_vf = func_v->AsFunc();
if ( func_vf->GetKind() == Func::SCRIPT_FUNC )
desc = "script";
else
desc = "BiF";
}
else
desc = "missing";
}
else
desc = "indirect";
}
else
desc = "compound-indirect";
// printf("call sensitivity: %s %s\n", desc.c_str(), obj_desc(e).c_str());
is_valid = false; is_valid = false;
return TC_ABORTALL; return TC_ABORTALL;
} }
@ -990,13 +966,13 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
case EXPR_RECORD_COERCE: case EXPR_RECORD_COERCE:
case EXPR_RECORD_CONSTRUCTOR: case EXPR_RECORD_CONSTRUCTOR:
// Note, record coercion behaves like constructors in terms of // Note, record coercion behaves like constructors in terms of
// potentially executing &default functions. In either case, // potentially executing &default functions. In either case,
// the type of the expression reflects the type we want to analyze // the type of the expression reflects the type we want to analyze
// for side effects. // for side effects.
if ( CheckRecordConstructor(e->GetType()) ) { if ( CheckRecordConstructor(e->GetType()) ) {
is_valid = false; is_valid = false;
return TC_ABORTALL; return TC_ABORTALL;
} }
break; break;
@ -1022,18 +998,6 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
return TC_ABORTALL; return TC_ABORTALL;
} }
} }
#if 0
if ( t == EXPR_INDEX && have_sensitive_IDs ) {
// Unfortunately in isolation we can't statically determine
// whether this table has a &default associated with it.
// In principle we could track all instances of the table
// type seen (across the entire set of scripts), and note
// whether any of those include an expression, but that's a
// lot of work for what might be minimal gain.
is_valid = false;
return TC_ABORTALL;
}
#endif
} }
break; break;
@ -1095,8 +1059,23 @@ bool CSE_ValidityChecker::CheckTableMod(const TypePtr& t) const {
return CheckSideEffects(SideEffectsOp::WRITE, t); return CheckSideEffects(SideEffectsOp::WRITE, t);
} }
bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) const { bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) const { return CheckSideEffects(SideEffectsOp::READ, t); }
return CheckSideEffects(SideEffectsOp::READ, t);
bool CSE_ValidityChecker::CheckCall(const CallExpr* c) const {
auto func = c->Func();
std::string desc;
if ( func->Tag() != EXPR_NAME )
// Can't analyze indirect calls.
return true;
IDSet non_local_ids;
std::unordered_set<const Type*> aggrs;
bool is_unknown = false;
auto resolved = pfs.GetCallSideEffects(func->AsNameExpr(), non_local_ids, aggrs, is_unknown);
ASSERT(resolved);
return is_unknown || CheckSideEffects(non_local_ids, aggrs);
} }
bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const { bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const {
@ -1106,23 +1085,24 @@ bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, con
if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) ) if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) )
return true; return true;
return CheckSideEffects(non_local_ids, aggrs);
}
bool CSE_ValidityChecker::CheckSideEffects(const IDSet& non_local_ids,
const std::unordered_set<const Type*>& aggrs) const {
if ( non_local_ids.empty() && aggrs.empty() ) if ( non_local_ids.empty() && aggrs.empty() )
// This is far and away the most common case. // This is far and away the most common case.
return false; return false;
for ( auto i : ids ) { for ( auto i : ids ) {
for ( auto nli : non_local_ids ) for ( auto nli : non_local_ids )
if ( nli == i ) { if ( nli == i )
// printf("non-local ID on %d: %s\n", access, i->Name());
return true; return true;
}
auto i_t = i->GetType(); auto i_t = i->GetType();
for ( auto a : aggrs ) for ( auto a : aggrs )
if ( same_type(a, i_t.get()) ) { if ( same_type(a, i_t.get()) )
// printf("aggr type on %d: %s\n", access, i->Name());
return true; return true;
}
} }
return false; return false;

View file

@ -355,7 +355,9 @@ protected:
bool CheckRecordConstructor(const TypePtr& t) const; bool CheckRecordConstructor(const TypePtr& t) const;
bool CheckTableMod(const TypePtr& t) const; bool CheckTableMod(const TypePtr& t) const;
bool CheckTableRef(const TypePtr& t) const; bool CheckTableRef(const TypePtr& t) const;
bool CheckCall(const CallExpr* c) const;
bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const; bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const;
bool CheckSideEffects(const IDSet& non_local_ids, const std::unordered_set<const Type*>& aggrs) const;
// Profile across all script functions. // Profile across all script functions.
ProfileFuncs& pfs; ProfileFuncs& pfs;

View file

@ -14,18 +14,12 @@ namespace zeek::detail {
// modifications. // modifications.
class SideEffectsOp { class SideEffectsOp {
public: public:
// ### remove NONE? enum AccessType { NONE, CALL, CONSTRUCTION, READ, WRITE };
enum AccessType { NONE, READ, WRITE, CONSTRUCTION };
SideEffectsOp() : access(NONE), type(nullptr) {} // Type can be left off for CALL access.
SideEffectsOp(AccessType at, const Type* t) : access(at), type(t) {} SideEffectsOp(AccessType at = NONE, const Type* t = nullptr) : access(at), type(t) {}
auto GetAccessType() const { return access; } auto GetAccessType() const { return access; }
bool NoSideEffects() const { return access == NONE; }
bool OnReadAccess() const { return access == READ; }
bool OnWriteAccess() const { return access == WRITE; }
bool OnConstruction() const { return access == CONSTRUCTION; }
const Type* GetType() const { return type; } const Type* GetType() const { return type; }
void SetUnknownChanges() { has_unknown_changes = true; } void SetUnknownChanges() { has_unknown_changes = true; }