CSE across function calls

This commit is contained in:
Vern Paxson 2023-11-30 13:26:55 -08:00
parent ba9781d83e
commit f8478def21
7 changed files with 132 additions and 174 deletions

View file

@ -264,7 +264,7 @@ public:
* If this function is an instantiated lambda, returns the primary from
* which it was constructed. Otherwise, returns this function itself.
*/
const ScriptFunc* Primary() { return primary ? primary.get() : this; }
const ScriptFunc* Primary() const { return primary ? primary.get() : this; }
/**
* Returns the function's frame size.

View file

@ -15,90 +15,6 @@ static std::unordered_set<std::string> side_effects_free_BiFs = {
"Analyzer::__register_for_port",
"Analyzer::__schedule_analyzer",
"Analyzer::__tag",
"Broker::__append",
"Broker::__auto_publish",
"Broker::__auto_unpublish",
"Broker::__clear",
"Broker::__close",
"Broker::__create_clone",
"Broker::__create_master",
"Broker::__data",
"Broker::__data_type",
"Broker::__decrement",
"Broker::__erase",
"Broker::__exists",
"Broker::__flush_logs",
"Broker::__forward",
"Broker::__get",
"Broker::__get_index_from_value",
"Broker::__increment",
"Broker::__insert_into_set",
"Broker::__insert_into_table",
"Broker::__is_closed",
"Broker::__keys",
"Broker::__listen",
"Broker::__node_id",
"Broker::__opaque_clone_through_serialization",
"Broker::__peer",
"Broker::__peer_no_retry",
"Broker::__peers",
"Broker::__pop",
"Broker::__publish_id",
"Broker::__push",
"Broker::__put",
"Broker::__put_unique",
"Broker::__record_assign",
"Broker::__record_create",
"Broker::__record_iterator",
"Broker::__record_iterator_last",
"Broker::__record_iterator_next",
"Broker::__record_iterator_value",
"Broker::__record_lookup",
"Broker::__record_size",
"Broker::__remove_from",
"Broker::__set_clear",
"Broker::__set_contains",
"Broker::__set_create",
"Broker::__set_insert",
"Broker::__set_iterator",
"Broker::__set_iterator_last",
"Broker::__set_iterator_next",
"Broker::__set_iterator_value",
"Broker::__set_metrics_export_endpoint_name",
"Broker::__set_metrics_export_interval",
"Broker::__set_metrics_export_prefixes",
"Broker::__set_metrics_export_topic",
"Broker::__set_metrics_import_topics",
"Broker::__set_remove",
"Broker::__set_size",
"Broker::__store_name",
"Broker::__subscribe",
"Broker::__table_clear",
"Broker::__table_contains",
"Broker::__table_create",
"Broker::__table_insert",
"Broker::__table_iterator",
"Broker::__table_iterator_last",
"Broker::__table_iterator_next",
"Broker::__table_iterator_value",
"Broker::__table_lookup",
"Broker::__table_remove",
"Broker::__table_size",
"Broker::__unpeer",
"Broker::__unsubscribe",
"Broker::__vector_clear",
"Broker::__vector_create",
"Broker::__vector_insert",
"Broker::__vector_iterator",
"Broker::__vector_iterator_last",
"Broker::__vector_iterator_next",
"Broker::__vector_iterator_value",
"Broker::__vector_lookup",
"Broker::__vector_remove",
"Broker::__vector_replace",
"Broker::__vector_size",
"Broker::make_event",
"Broker::publish",
"FileExtract::__set_limit",
"Files::__add_analyzer",
"Files::__analyzer_enabled",
@ -544,6 +460,9 @@ static std::unordered_set<std::string> side_effects_free_BiFs = {
// Ones not listed:
//
// Broker::*
// These can manipulate unspecified (at script level) records.
//
// Cluster::publish_hrw
// Cluster::publish_rr
// Call script functions to get topic names.

View file

@ -1137,34 +1137,11 @@ bool ProfileFuncs::AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, std
bool& is_unknown) {
std::shared_ptr<ProfileFunc> pf;
if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC ) {
// This occurs when the expression is itself a function name, and
// in an attribute context indicates an implicit call.
auto fid = e->AsNameExpr()->Id();
auto fv = fid->GetVal();
if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC )
return GetCallSideEffects(e->AsNameExpr(), non_local_ids, aggrs, is_unknown);
if ( ! fv || ! fid->IsConst() ) {
// The value is unavailable (likely a bug), or might change
// at run-time.
is_unknown = true;
return true;
}
auto func = fv->AsFunc();
if ( func->GetKind() == Func::BUILTIN_FUNC ) {
if ( ! is_side_effect_free(func->Name()) )
is_unknown = true;
return true;
}
auto sf = static_cast<ScriptFunc*>(func)->Primary();
ASSERT(func_profs.count(sf) != 0);
pf = func_profs[sf];
}
else {
ASSERT(expr_profs.count(e.get()) != 0);
pf = expr_profs[e.get()];
}
ASSERT(expr_profs.count(e.get()) != 0);
pf = expr_profs[e.get()];
return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown);
}
@ -1207,6 +1184,13 @@ bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids
}
for ( auto& f : pf->ScriptCalls() ) {
if ( f->Flavor() != FUNC_FLAVOR_FUNCTION ) {
// A hook (since events can't be called) - not something
// to analyze further.
is_unknown = true;
return true;
}
auto pff = func_profs[f];
if ( active_func_profiles.count(pff) > 0 )
continue;
@ -1305,4 +1289,73 @@ bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type*
return false;
}
std::shared_ptr<SideEffectsOp> ProfileFuncs::GetCallSideEffects(const ScriptFunc* sf) {
sf = sf->Primary();
auto sf_se = func_side_effects.find(sf);
if ( sf_se != func_side_effects.end() )
return sf_se->second;
bool is_unknown = false;
IDSet nla;
std::unordered_set<const Type*> mod_aggrs;
ASSERT(func_profs.count(sf) != 0);
auto pf = func_profs[sf];
if ( ! AssessSideEffects(pf.get(), nla, mod_aggrs, is_unknown) )
// Can't figure it out yet.
return nullptr;
auto seo = std::make_shared<SideEffectsOp>(SideEffectsOp::CALL);
seo->AddModNonGlobal(nla);
seo->AddModAggrs(mod_aggrs);
if ( is_unknown )
seo->SetUnknownChanges();
func_side_effects[sf] = seo;
return seo;
}
bool ProfileFuncs::GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, std::unordered_set<const Type*>& aggrs,
bool& is_unknown) {
// This occurs when the expression is itself a function name, and
// in an attribute context indicates an implicit call.
auto fid = n->Id();
auto fv = fid->GetVal();
if ( ! fv || ! fid->IsConst() ) {
// The value is unavailable (likely a bug), or might change
// at run-time.
is_unknown = true;
return true;
}
auto func = fv->AsFunc();
if ( func->GetKind() == Func::BUILTIN_FUNC ) {
if ( ! is_side_effect_free(func->Name()) )
is_unknown = true;
return true;
}
auto sf = static_cast<ScriptFunc*>(func);
auto seo = GetCallSideEffects(sf);
if ( ! seo )
return false;
if ( seo->HasUnknownChanges() )
is_unknown = true;
for ( auto a : seo->ModAggrs() )
aggrs.insert(a);
for ( auto nl : seo->ModNonLocals() )
non_local_ids.insert(nl);
return true;
}
} // namespace zeek::detail

View file

@ -328,6 +328,13 @@ public:
bool GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
std::unordered_set<const Type*>& aggrs) const;
// Returns nil if side effects are not available. That should never be
// the case after we've done our initial analysis, but is provided
// as a signal so that this method can also be used during that analysis.
bool GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, std::unordered_set<const Type*>& aggrs,
bool& is_unknown);
std::shared_ptr<SideEffectsOp> GetCallSideEffects(const ScriptFunc* f);
const auto& AttrSideEffects() const { return attr_side_effects; }
const auto& RecordConstructorEffects() const { return record_constr_with_side_effects; }
@ -440,6 +447,9 @@ protected:
// appear in "when" clauses), and in that context it suffices.
std::unordered_map<const ScriptFunc*, std::shared_ptr<ProfileFunc>> func_profs;
// ### ScriptFunc not Func, assumes is_side_effect_free() is cheap
std::unordered_map<const ScriptFunc*, std::shared_ptr<SideEffectsOp>> func_side_effects;
// Maps expressions to their profiles. This is only germane
// externally for LambdaExpr's, but internally it abets memory
// management.

View file

@ -11,6 +11,7 @@
#include "zeek/Stmt.h"
#include "zeek/Var.h"
#include "zeek/script_opt/ExprOptInfo.h"
#include "zeek/script_opt/FuncInfo.h"
#include "zeek/script_opt/StmtOptInfo.h"
#include "zeek/script_opt/TempVar.h"
@ -951,32 +952,7 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
break;
case EXPR_CALL:
if ( have_sensitive_IDs ) {
auto c = e->AsCallExpr();
auto func = c->Func();
std::string desc;
if ( func->Tag() == EXPR_NAME ) {
auto f = func->AsNameExpr()->Id();
if ( f->IsGlobal() ) {
auto func_v = f->GetVal();
if ( func_v ) {
auto func_vf = func_v->AsFunc();
if ( func_vf->GetKind() == Func::SCRIPT_FUNC )
desc = "script";
else
desc = "BiF";
}
else
desc = "missing";
}
else
desc = "indirect";
}
else
desc = "compound-indirect";
// printf("call sensitivity: %s %s\n", desc.c_str(), obj_desc(e).c_str());
if ( CheckCall(e->AsCallExpr()) ) {
is_valid = false;
return TC_ABORTALL;
}
@ -990,13 +966,13 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
case EXPR_RECORD_COERCE:
case EXPR_RECORD_CONSTRUCTOR:
// Note, record coercion behaves like constructors in terms of
// potentially executing &default functions. In either case,
// the type of the expression reflects the type we want to analyze
// for side effects.
// Note, record coercion behaves like constructors in terms of
// potentially executing &default functions. In either case,
// the type of the expression reflects the type we want to analyze
// for side effects.
if ( CheckRecordConstructor(e->GetType()) ) {
is_valid = false;
return TC_ABORTALL;
is_valid = false;
return TC_ABORTALL;
}
break;
@ -1022,18 +998,6 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) {
return TC_ABORTALL;
}
}
#if 0
if ( t == EXPR_INDEX && have_sensitive_IDs ) {
// Unfortunately in isolation we can't statically determine
// whether this table has a &default associated with it.
// In principle we could track all instances of the table
// type seen (across the entire set of scripts), and note
// whether any of those include an expression, but that's a
// lot of work for what might be minimal gain.
is_valid = false;
return TC_ABORTALL;
}
#endif
}
break;
@ -1095,8 +1059,23 @@ bool CSE_ValidityChecker::CheckTableMod(const TypePtr& t) const {
return CheckSideEffects(SideEffectsOp::WRITE, t);
}
bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) const {
return CheckSideEffects(SideEffectsOp::READ, t);
bool CSE_ValidityChecker::CheckTableRef(const TypePtr& t) const { return CheckSideEffects(SideEffectsOp::READ, t); }
bool CSE_ValidityChecker::CheckCall(const CallExpr* c) const {
auto func = c->Func();
std::string desc;
if ( func->Tag() != EXPR_NAME )
// Can't analyze indirect calls.
return true;
IDSet non_local_ids;
std::unordered_set<const Type*> aggrs;
bool is_unknown = false;
auto resolved = pfs.GetCallSideEffects(func->AsNameExpr(), non_local_ids, aggrs, is_unknown);
ASSERT(resolved);
return is_unknown || CheckSideEffects(non_local_ids, aggrs);
}
bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const {
@ -1106,23 +1085,24 @@ bool CSE_ValidityChecker::CheckSideEffects(SideEffectsOp::AccessType access, con
if ( pfs.GetSideEffects(access, t.get(), non_local_ids, aggrs) )
return true;
return CheckSideEffects(non_local_ids, aggrs);
}
bool CSE_ValidityChecker::CheckSideEffects(const IDSet& non_local_ids,
const std::unordered_set<const Type*>& aggrs) const {
if ( non_local_ids.empty() && aggrs.empty() )
// This is far and away the most common case.
// This is far and away the most common case.
return false;
for ( auto i : ids ) {
for ( auto nli : non_local_ids )
if ( nli == i ) {
// printf("non-local ID on %d: %s\n", access, i->Name());
if ( nli == i )
return true;
}
auto i_t = i->GetType();
for ( auto a : aggrs )
if ( same_type(a, i_t.get()) ) {
// printf("aggr type on %d: %s\n", access, i->Name());
if ( same_type(a, i_t.get()) )
return true;
}
}
return false;

View file

@ -355,7 +355,9 @@ protected:
bool CheckRecordConstructor(const TypePtr& t) const;
bool CheckTableMod(const TypePtr& t) const;
bool CheckTableRef(const TypePtr& t) const;
bool CheckCall(const CallExpr* c) const;
bool CheckSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const;
bool CheckSideEffects(const IDSet& non_local_ids, const std::unordered_set<const Type*>& aggrs) const;
// Profile across all script functions.
ProfileFuncs& pfs;

View file

@ -14,18 +14,12 @@ namespace zeek::detail {
// modifications.
class SideEffectsOp {
public:
// ### remove NONE?
enum AccessType { NONE, READ, WRITE, CONSTRUCTION };
enum AccessType { NONE, CALL, CONSTRUCTION, READ, WRITE };
SideEffectsOp() : access(NONE), type(nullptr) {}
SideEffectsOp(AccessType at, const Type* t) : access(at), type(t) {}
// Type can be left off for CALL access.
SideEffectsOp(AccessType at = NONE, const Type* t = nullptr) : access(at), type(t) {}
auto GetAccessType() const { return access; }
bool NoSideEffects() const { return access == NONE; }
bool OnReadAccess() const { return access == READ; }
bool OnWriteAccess() const { return access == WRITE; }
bool OnConstruction() const { return access == CONSTRUCTION; }
const Type* GetType() const { return type; }
void SetUnknownChanges() { has_unknown_changes = true; }