zeek/src/script_opt/ProfileFunc.cc

// See the file "COPYING" in the main distribution directory for copyright.

#include "zeek/script_opt/ProfileFunc.h"

#include <unistd.h>
#include <cerrno>

#include "zeek/Desc.h"
#include "zeek/Func.h"
#include "zeek/Stmt.h"
#include "zeek/script_opt/FuncInfo.h"
#include "zeek/script_opt/IDOptInfo.h"

namespace zeek::detail {

// Computes the profiling hash of a Obj based on its (deterministic)
// description.
p_hash_type p_hash(const Obj* o) {
    ODesc d;
    d.SetDeterminism(true);
    o->Describe(&d);
    return p_hash(d.Description());
}

ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields) {
    profiled_func = func;
    profiled_body = body.get();
    abs_rec_fields = _abs_rec_fields;

    auto ft = func->GetType()->AsFuncType();
    auto& fcaps = ft->GetCaptures();

    if ( fcaps ) {
        int offset = 0;

        for ( auto& c : *fcaps ) {
            auto cid = c.Id().get();
            captures.insert(cid);
            captures_offsets[cid] = offset++;
        }
    }

    Profile(ft, body);
}

ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields) {
    profiled_body = s;
    abs_rec_fields = _abs_rec_fields;
    s->Traverse(this);
}

ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) {
    profiled_expr = e;

    abs_rec_fields = _abs_rec_fields;

    if ( e->Tag() == EXPR_LAMBDA ) {
        auto func = e->AsLambdaExpr();

        int offset = 0;

        for ( auto oid : func->OuterIDs() ) {
            captures.insert(oid);
            captures_offsets[oid] = offset++;
        }

        Profile(func->GetType()->AsFuncType(), func->Ingredients()->Body());
    }

    else
        // We don't have a function type, so do the traversal
        // directly.
        e->Traverse(this);
}

void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body) {
    num_params = ft->Params()->NumFields();
    TrackType(ft);
    body->Traverse(this);
}

TraversalCode ProfileFunc::PreStmt(const Stmt* s) {
    stmts.push_back(s);

    switch ( s->Tag() ) {
        case STMT_INIT:
            for ( const auto& id : s->AsInitStmt()->Inits() ) {
                inits.insert(id.get());

                auto& t = id->GetType();
                TrackType(t);

                auto attrs = id->GetAttrs();
                if ( attrs )
                    constructor_attrs[attrs.get()] = t;

                if ( t->Tag() == TYPE_RECORD )
                    CheckRecordConstructor(t);
            }

            // Don't traverse further into the statement, since we
            // don't want to view the identifiers as locals unless
            // they're also used elsewhere.
            return TC_ABORTSTMT;

        case STMT_WHEN: {
            ++num_when_stmts;

            auto w = s->AsWhenStmt();
            auto wi = w->Info();

            for ( auto wl : wi->WhenNewLocals() )
                when_locals.insert(wl);
        } break;

        case STMT_FOR: {
            auto sf = s->AsForStmt();
            auto loop_vars = sf->LoopVars();
            auto value_var = sf->ValueVar();

            for ( auto id : *loop_vars )
                locals.insert(id);

            if ( value_var )
                locals.insert(value_var.get());
        } break;

        case STMT_SWITCH: {
            // If this is a type-case switch statement, then find the
            // identifiers created so we can add them to our list of
            // locals.  Ideally this wouldn't be necessary since *surely*
            // if one bothers to define such an identifier then it'll be
            // subsequently used, and we'll pick up the local that way ...
            // but if for some reason it's not, then we would have an
            // incomplete list of locals that need to be tracked.

            auto sw = s->AsSwitchStmt();
            bool is_type_switch = false;

            for ( auto& c : *sw->Cases() ) {
                auto idl = c->TypeCases();
                if ( idl ) {
                    for ( auto id : *idl )
                        // Make sure it's not a placeholder
                        // identifier, used when there's
                        // no explicit one.
                        if ( id->Name() )
                            locals.insert(id);

                    is_type_switch = true;
                }
            }

            if ( is_type_switch )
                type_switches.insert(sw);
            else
                expr_switches.insert(sw);
        } break;

        default: break;
    }

    return TC_CONTINUE;
}

TraversalCode ProfileFunc::PreExpr(const Expr* e) {
    exprs.push_back(e);

    TrackType(e->GetType());

    switch ( e->Tag() ) {
        case EXPR_CONST: constants.push_back(e->AsConstExpr()); break;

        case EXPR_NAME: {
            auto n = e->AsNameExpr();
            auto id = n->Id();

            // Turns out that NameExpr's can be constructed using a
            // different Type* than that of the identifier itself,
            // so be sure we track the latter too.
            TrackType(id->GetType());

            if ( id->IsGlobal() ) {
                globals.insert(id);
                all_globals.insert(id);

                const auto& t = id->GetType();
                if ( t->Tag() == TYPE_FUNC )
                    if ( t->AsFuncType()->Flavor() == FUNC_FLAVOR_EVENT )
                        events.insert(id->Name());

                break;
            }

            // This is a tad ugly.  Unfortunately due to the weird way
            // that Zeek function *declarations* work, there's no reliable
            // way to get the list of parameters for a function *definition*,
            // since they can have different names than what's present in the
            // declaration.  So we identify them directly, by knowing that
            // they come at the beginning of the frame ... and being careful
            // to avoid misconfusing a lambda capture with a low frame offset
            // as a parameter.
            if ( captures.count(id) == 0 && id->Offset() < num_params )
                params.insert(id);

            locals.insert(id);

            break;
        }

        case EXPR_FIELD:
            if ( abs_rec_fields ) {
                auto f = e->AsFieldExpr()->Field();
                addl_hashes.push_back(p_hash(f));
            }
            else {
                auto fn = e->AsFieldExpr()->FieldName();
                addl_hashes.push_back(p_hash(fn));
            }
            break;

        case EXPR_HAS_FIELD:
            if ( abs_rec_fields ) {
                auto f = e->AsHasFieldExpr()->Field();
                addl_hashes.push_back(std::hash<int>{}(f));
            }
            else {
                auto fn = e->AsHasFieldExpr()->FieldName();
                addl_hashes.push_back(std::hash<std::string>{}(fn));
            }
            break;

        case EXPR_INDEX: {
            auto lhs_t = e->GetOp1()->GetType();
            if ( lhs_t->Tag() == TYPE_TABLE )
                tbl_refs.insert(lhs_t.get());
        } break;

        case EXPR_INCR:
        case EXPR_DECR:
        case EXPR_ADD_TO:
        case EXPR_REMOVE_FROM:
        case EXPR_ASSIGN: {
            auto lhs = e->GetOp1();
            bool is_assign = e->Tag() == EXPR_ASSIGN;

            if ( is_assign ) {
                // Check for this being an assignment to a function (as
                // opposed to a call). If so, then the function can be
                // used indirectly.
                auto rhs = e->GetOp2();
                if ( rhs->Tag() == EXPR_NAME ) {
                    auto& rhs_id = rhs->AsNameExpr()->IdPtr();
                    const auto& t = rhs_id->GetType();
                    if ( t->Tag() == TYPE_FUNC && t->AsFuncType()->Flavor() == FUNC_FLAVOR_FUNCTION )
                        indirect_funcs.insert(rhs_id.get());
                }
            }

            if ( lhs->Tag() == EXPR_REF )
                lhs = lhs->GetOp1();

            else if ( is_assign )
                // This isn't a direct assignment, but instead an overloaded
                // use of "=" such as in a table constructor.
                break;

            auto lhs_t = lhs->GetType();

            switch ( lhs->Tag() ) {
                case EXPR_NAME: {
                    auto id = lhs->AsNameExpr()->Id();
                    TrackAssignment(id);

                    if ( is_assign ) {
                        auto a_e = static_cast<const AssignExpr*>(e);
                        auto& av = a_e->AssignVal();
                        if ( av )
                            // This is a funky "local" assignment
                            // inside a when clause.
                            when_locals.insert(id);
                    }
                    else if ( IsAggr(lhs_t->Tag()) )
                        aggr_mods.insert(lhs_t.get());
                } break;

                case EXPR_INDEX: {
                    auto lhs_aggr = lhs->GetOp1();
                    auto lhs_aggr_t = lhs_aggr->GetType();

                    // Determine which aggregate is being modified.  For an
                    // assignment "a[b] = aggr", it's not a[b]'s type but
                    // rather a's type. However, for any of the others,
                    // e.g. "a[b] -= aggr" it is a[b]'s type.
                    if ( is_assign )
                        aggr_mods.insert(lhs_aggr_t.get());
                    else
                        aggr_mods.insert(lhs_t.get());

                    if ( lhs_aggr_t->Tag() == TYPE_TABLE ) {
                        // We don't want the default recursion into the
                        // expression's LHS because that will treat this
                        // table modification as a reference instead. So
                        // do it manually. Given that, we need to do the
                        // expression's RHS manually too.
                        lhs->GetOp1()->Traverse(this);
                        lhs->GetOp2()->Traverse(this);

                        auto rhs = e->GetOp2();
                        if ( rhs )
                            rhs->Traverse(this);

                        return TC_ABORTSTMT;
                    }
                } break;

                case EXPR_FIELD: aggr_mods.insert(lhs_t.get()); break;

                case EXPR_LIST: {
                    for ( auto id : lhs->AsListExpr()->Exprs() ) {
                        auto id_t = id->GetType();
                        if ( IsAggr(id_t->Tag()) )
                            aggr_mods.insert(id_t.get());
                    }
                } break;

                default: reporter->InternalError("bad expression in ProfileFunc: %s", obj_desc(e).c_str());
            }
        } break;

        case EXPR_AGGR_ADD:
        case EXPR_AGGR_DEL: {
            auto lhs = e->GetOp1();
            if ( lhs )
                aggr_mods.insert(lhs->GetType().get());
            else
                aggr_mods.insert(e->GetType().get());
        } break;

        case EXPR_CALL: {
            auto c = e->AsCallExpr();
            auto args = c->Args();
            auto f = c->Func();

            const NameExpr* n = nullptr;
            const ID* func = nullptr;

            if ( f->Tag() == EXPR_NAME ) {
                n = f->AsNameExpr();
                func = n->Id();

                if ( ! func->IsGlobal() )
                    does_indirect_calls = true;
            }
            else
                does_indirect_calls = true;

            // Check for whether any of the arguments is a bare function.
            // If so, then note that that function may be used indirectly,
            // unless the function being called is known to be idempotent.
            if ( does_indirect_calls || ! is_idempotent(func->Name()) ) {
                for ( auto& arg : args->Exprs() )
                    if ( arg->Tag() == EXPR_NAME ) {
                        auto& arg_id = arg->AsNameExpr()->IdPtr();
                        const auto& t = arg_id->GetType();
                        if ( t->Tag() == TYPE_FUNC && t->AsFuncType()->Flavor() == FUNC_FLAVOR_FUNCTION )
                            indirect_funcs.insert(arg_id.get());
                    }
            }

            if ( does_indirect_calls )
                // We waited on doing this until after checking for
                // indirect functions.
                return TC_CONTINUE;

            all_globals.insert(func);

            auto func_v = func->GetVal();
            if ( func_v ) {
                auto func_vf = func_v->AsFunc();

                if ( func_vf->GetKind() == Func::SCRIPT_FUNC ) {
                    auto sf = static_cast<ScriptFunc*>(func_vf);
                    script_calls.insert(sf);
                }
                else
                    BiF_globals.insert(func);
            }
            else {
                // We could complain, but for now we don't, because
                // if we're invoked prior to full Zeek initialization,
                // the value might indeed not there yet.
                // printf("no function value for global %s\n", func->Name());
            }

            // Recurse into the arguments.
            args->Traverse(this);

            // Do the following explicitly, since we won't be recursing
            // into the LHS global.

            // Note that the type of the expression and the type of the
            // function can actually be *different* due to the NameExpr
            // being constructed based on a forward reference and then
            // the global getting a different (constructed) type when
            // the function is actually declared.  Geez.  So hedge our
            // bets.
            TrackType(n->GetType());
            TrackType(func->GetType());

            TrackID(func);

            return TC_ABORTSTMT;
        }

        case EXPR_EVENT: {
            auto ev = e->AsEventExpr()->Name();
            events.insert(ev);
            addl_hashes.push_back(p_hash(ev));
        } break;

        case EXPR_LAMBDA: {
            auto l = e->AsLambdaExpr();
            lambdas.push_back(l);

            for ( const auto& i : l->OuterIDs() ) {
                locals.insert(i);
                TrackID(i);

                // See above re EXPR_NAME regarding the following
                // logic.
                if ( captures.count(i) == 0 && i->Offset() < num_params )
                    params.insert(i);
            }

            // In general, we don't want to recurse into the body.
            // However, we still want to *profile* it so we can
            // identify calls within it.
            auto pf = std::make_shared<ProfileFunc>(l->Ingredients()->Body().get(), false);
            script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end());

            return TC_ABORTSTMT;
        }

        case EXPR_RECORD_CONSTRUCTOR:
        case EXPR_REC_CONSTRUCT_WITH_REC: CheckRecordConstructor(e->GetType()); break;

        case EXPR_SET_CONSTRUCTOR: {
            auto sc = static_cast<const SetConstructorExpr*>(e);
            const auto& attrs = sc->GetAttrs();

            if ( attrs )
                constructor_attrs[attrs.get()] = sc->GetType();
        } break;

        case EXPR_TABLE_CONSTRUCTOR: {
            auto tc = static_cast<const TableConstructorExpr*>(e);
            const auto& attrs = tc->GetAttrs();

            if ( attrs )
                constructor_attrs[attrs.get()] = tc->GetType();
        } break;

        case EXPR_RECORD_COERCE:
            // This effectively does a record construction of the target
            // type, so check that.
            CheckRecordConstructor(e->GetType());
            break;

        case EXPR_TABLE_COERCE: {
            // This is written without casting so it can work with other
            // types if needed.
            auto res_type = e->GetType().get();
            auto orig_type = e->GetOp1()->GetType().get();
            if ( type_aliases.count(res_type) == 0 )
                type_aliases[orig_type] = {res_type};
            else
                type_aliases[orig_type].insert(res_type);
        } break;

        default: break;
    }

    return TC_CONTINUE;
}

TraversalCode ProfileFunc::PreID(const ID* id) {
    TrackID(id);

    // There's no need for any further analysis of this ID.
    return TC_ABORTSTMT;
}

void ProfileFunc::TrackType(const Type* t) {
    if ( ! t )
        return;

    auto [it, inserted] = types.insert(t);

    if ( ! inserted )
        // We've already tracked it.
        return;

    ordered_types.push_back(t);
}

void ProfileFunc::TrackID(const ID* id) {
    if ( ! id )
        return;

    auto [it, inserted] = ids.insert(id);

    if ( ! inserted )
        // Already tracked.
        return;

    ordered_ids.push_back(id);
}

void ProfileFunc::TrackAssignment(const ID* id) {
    if ( assignees.count(id) > 0 )
        ++assignees[id];
    else
        assignees[id] = 1;

    if ( id->IsGlobal() || captures.count(id) > 0 )
        non_local_assignees.insert(id);
}

void ProfileFunc::CheckRecordConstructor(TypePtr t) {
    auto rt = cast_intrusive<RecordType>(t);
    for ( auto td : *rt->Types() )
        if ( td->attrs ) {
            // In principle we could figure out whether this particular
            // constructor happens to explicitly specify &default fields, and
            // not include those attributes if it does since they won't come
            // into play. However that seems like added complexity for almost
            // surely no ultimate gain.
            auto attrs = td->attrs.get();
            constructor_attrs[attrs] = rt;

            if ( rec_constructor_attrs.count(rt.get()) == 0 )
                rec_constructor_attrs[rt.get()] = {attrs};
            else
                rec_constructor_attrs[rt.get()].insert(attrs);
        }
}

ProfileFuncs::ProfileFuncs(std::vector<FuncInfo>& funcs, is_compilable_pred pred, bool _compute_func_hashes,
                           bool _full_record_hashes) {
    compute_func_hashes = _compute_func_hashes;
    full_record_hashes = _full_record_hashes;

    for ( auto& f : funcs ) {
        auto pf = std::make_shared<ProfileFunc>(f.Func(), f.Body(), full_record_hashes);

        if ( ! pred || (*pred)(pf.get(), nullptr) )
            MergeInProfile(pf.get());

        // Track the profile even if we're not compiling the function, since
        // the AST optimizer will still need it to reason about function-call
        // side effects.

        // Propagate previous hash if requested.
        if ( ! compute_func_hashes && f.Profile() )
            pf->SetHashVal(f.Profile()->HashVal());

        f.SetProfile(std::move(pf));
        func_profs[f.Func()] = f.ProfilePtr();
    }

    // We now have the main (starting) types used by all of the
    // functions.  Recursively compute their hashes.
    ComputeTypeHashes(main_types);

    do {
        // Computing the hashes can have marked expressions (seen in
        // record attributes) for further analysis.  Likewise, when
        // doing the profile merges above we may have noted lambda
        // expressions.  Analyze these, and iteratively any further
        // expressions that the analysis uncovers.
        DrainPendingExprs();

        // We now have all the information we need to form definitive,
        // deterministic hashes.
        ComputeBodyHashes(funcs);

        // Computing those hashes could have led to traversals that
        // create more pending expressions to analyze.
    } while ( ! pending_exprs.empty() );

    // Now that we have everything profiled, we can proceed to analyses
    // that require full global information.
    ComputeSideEffects();
}

bool ProfileFuncs::IsTableWithDefaultAggr(const Type* t) {
    auto analy = tbl_has_aggr_default.find(t);
    if ( analy != tbl_has_aggr_default.end() )
        // We already have the answer.
        return analy->second;

    // See whether an alias for the type has already been resolved.
    if ( t->AsTableType()->Yield() ) {
        for ( auto& at : tbl_has_aggr_default )
            if ( same_type(at.first, t) ) {
                tbl_has_aggr_default[t] = at.second;
                return at.second;
            }
    }

    tbl_has_aggr_default[t] = false;
    return false;
}

bool ProfileFuncs::HasSideEffects(SideEffectsOp::AccessType access, const TypePtr& t) const {
    IDSet nli;
    TypeSet aggrs;

    if ( GetSideEffects(access, t.get(), nli, aggrs) )
        return true;

    return ! nli.empty() || ! aggrs.empty();
}

bool ProfileFuncs::GetSideEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
                                  TypeSet& aggrs) const {
    for ( auto se : side_effects_ops )
        if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) )
            return true;

    return false;
}

bool ProfileFuncs::GetCallSideEffects(const NameExpr* n, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
    auto fid = n->Id();
    auto fv = fid->GetVal();

    if ( ! fv || ! fid->IsConst() ) {
        // The value is unavailable (likely a bug), or might change at run-time.
        is_unknown = true;
        return true;
    }

    auto func = fv->AsFunc();
    if ( func->GetKind() == Func::BUILTIN_FUNC ) {
        if ( has_script_side_effects(func->Name()) )
            is_unknown = true;
        return true;
    }

    auto sf = static_cast<ScriptFunc*>(func);
    auto seo = GetCallSideEffects(sf);
    if ( ! seo )
        return false;

    if ( seo->HasUnknownChanges() )
        is_unknown = true;

    for ( auto a : seo->ModAggrs() )
        aggrs.insert(a);
    for ( auto nl : seo->ModNonLocals() )
        non_local_ids.insert(nl);

    return true;
}

void ProfileFuncs::MergeInProfile(ProfileFunc* pf) {
    all_globals.insert(pf->AllGlobals().begin(), pf->AllGlobals().end());

    for ( auto& g : pf->Globals() ) {
        auto [it, inserted] = globals.emplace(g);

        if ( ! inserted )
            continue;

        TraverseValue(g->GetVal());

        const auto& t = g->GetType();
        if ( t->Tag() == TYPE_TYPE )
            (void)HashType(t->AsTypeType()->GetType());

        auto& init_exprs = g->GetOptInfo()->GetInitExprs();
        for ( const auto& i_e : init_exprs )
            if ( i_e ) {
                pending_exprs.push_back(i_e.get());

                if ( i_e->Tag() == EXPR_LAMBDA )
                    lambdas.insert(i_e->AsLambdaExpr());
            }

        auto& attrs = g->GetAttrs();
        if ( attrs )
            AnalyzeAttrs(attrs.get(), t.get());
    }

    constants.insert(pf->Constants().begin(), pf->Constants().end());
    main_types.insert(main_types.end(), pf->OrderedTypes().begin(), pf->OrderedTypes().end());
    script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end());
    BiF_globals.insert(pf->BiFGlobals().begin(), pf->BiFGlobals().end());
    events.insert(pf->Events().begin(), pf->Events().end());

    for ( auto& i : pf->Lambdas() ) {
        lambdas.insert(i);
        pending_exprs.push_back(i);
    }

    for ( auto& a : pf->ConstructorAttrs() )
        AnalyzeAttrs(a.first, a.second.get());

    for ( auto& ta : pf->TypeAliases() ) {
        if ( type_aliases.count(ta.first) == 0 )
            type_aliases[ta.first] = std::set<const Type*>{};
        type_aliases[ta.first].insert(ta.second.begin(), ta.second.end());
    }
}

void ProfileFuncs::TraverseValue(const ValPtr& v) {
    if ( ! v )
        return;

    const auto& t = v->GetType();
    (void)HashType(t);

    switch ( t->Tag() ) {
        case TYPE_ADDR:
        case TYPE_ANY:
        case TYPE_BOOL:
        case TYPE_COUNT:
        case TYPE_DOUBLE:
        case TYPE_ENUM:
        case TYPE_ERROR:
        case TYPE_FILE:
        case TYPE_FUNC:
        case TYPE_INT:
        case TYPE_INTERVAL:
        case TYPE_OPAQUE:
        case TYPE_PATTERN:
        case TYPE_PORT:
        case TYPE_STRING:
        case TYPE_SUBNET:
        case TYPE_TIME:
        case TYPE_VOID: break;

        case TYPE_RECORD: {
            auto r = cast_intrusive<RecordVal>(v);
            auto n = r->NumFields();

            for ( auto i = 0u; i < n; ++i )
                TraverseValue(r->GetField(i));
        } break;

        case TYPE_TABLE: {
            auto tv = cast_intrusive<TableVal>(v);
            auto tv_map = tv->ToMap();

            for ( auto& tv_i : tv_map ) {
                TraverseValue(tv_i.first);
                TraverseValue(tv_i.second);
            }
        } break;

        case TYPE_LIST: {
            auto lv = cast_intrusive<ListVal>(v);
            auto n = lv->Length();

            for ( auto i = 0; i < n; ++i )
                TraverseValue(lv->Idx(i));
        } break;

        case TYPE_VECTOR: {
            auto vv = cast_intrusive<VectorVal>(v);
            auto n = vv->Size();

            for ( auto i = 0u; i < n; ++i )
                TraverseValue(vv->ValAt(i));
        } break;

        case TYPE_TYPE: (void)HashType(t->AsTypeType()->GetType()); break;
    }
}

void ProfileFuncs::DrainPendingExprs() {
    while ( pending_exprs.size() > 0 ) {
        // Copy the pending expressions so we can loop over them
        // while accruing additions.
        auto pe = pending_exprs;
        pending_exprs.clear();

        for ( auto e : pe ) {
            auto pf = std::make_shared<ProfileFunc>(e, full_record_hashes);

            expr_profs[e] = pf;
            MergeInProfile(pf.get());

            // It's important to compute the hashes over the
            // ordered types rather than the unordered.  If type
            // T1 depends on a recursive type T2, then T1's hash
            // will vary with depending on whether we arrive at
            // T1 via an in-progress traversal of T2 (in which
            // case T1 will see the "stub" in-progress hash for
            // T2), or via a separate type T3 (in which case it
            // will see the full hash).
            ComputeTypeHashes(pf->OrderedTypes());
        }
    }
}

void ProfileFuncs::ComputeTypeHashes(const std::vector<const Type*>& types) {
    for ( auto t : types )
        (void)HashType(t);
}

void ProfileFuncs::ComputeBodyHashes(std::vector<FuncInfo>& funcs) {
    if ( compute_func_hashes )
        for ( auto& f : funcs )
            if ( ! f.ShouldSkip() )
                ComputeProfileHash(f.ProfilePtr());

    for ( auto& l : lambdas ) {
        auto pf = ExprProf(l);
        func_profs[l->PrimaryFunc().get()] = pf;
        lambda_primaries[l->Name()] = l->PrimaryFunc().get();

        if ( compute_func_hashes )
            ComputeProfileHash(pf);
    }
}

void ProfileFuncs::ComputeProfileHash(std::shared_ptr<ProfileFunc> pf) {
    p_hash_type h = 0;

    // We add markers between each class of hash component, to
    // prevent collisions due to elements with simple hashes
    // (such as Stmt's or Expr's that are only represented by
    // the hash of their tag).
    h = merge_p_hashes(h, p_hash("stmts"));
    for ( auto i : pf->Stmts() )
        h = merge_p_hashes(h, p_hash(i->Tag()));

    h = merge_p_hashes(h, p_hash("exprs"));
    for ( auto i : pf->Exprs() )
        h = merge_p_hashes(h, p_hash(i->Tag()));

    h = merge_p_hashes(h, p_hash("ids"));
    for ( auto i : pf->OrderedIdentifiers() )
        h = merge_p_hashes(h, p_hash(i->Name()));

    h = merge_p_hashes(h, p_hash("constants"));
    for ( auto i : pf->Constants() )
        h = merge_p_hashes(h, p_hash(i->Value()));

    h = merge_p_hashes(h, p_hash("types"));
    for ( auto i : pf->OrderedTypes() )
        h = merge_p_hashes(h, HashType(i));

    h = merge_p_hashes(h, p_hash("lambdas"));
    for ( auto i : pf->Lambdas() )
        h = merge_p_hashes(h, p_hash(i));

    h = merge_p_hashes(h, p_hash("addl"));
    for ( auto i : pf->AdditionalHashes() )
        h = merge_p_hashes(h, i);

    pf->SetHashVal(h);
}

p_hash_type ProfileFuncs::HashType(const Type* t) {
    if ( ! t )
        return 0;

    auto it = type_hashes.find(t);

    if ( it != type_hashes.end() )
        // We've already done this Type*.
        return it->second;

    auto& tn = t->GetName();
    if ( ! tn.empty() ) {
        auto seen_it = seen_type_names.find(tn);

        if ( seen_it != seen_type_names.end() ) {
            // We've already done a type with the same name, even
            // though with a different Type*.  Reuse its results.
            auto seen_t = seen_it->second;
            auto h = type_hashes[seen_t];

            type_hashes[t] = h;
            type_to_rep[t] = type_to_rep[seen_t];

            return h;
        }
    }

    auto h = p_hash(t->Tag());
    if ( ! tn.empty() )
        h = merge_p_hashes(h, p_hash(tn));

    // Enter an initial value for this type's hash.  We'll update it
    // at the end, but having it here first will prevent recursive
    // records from leading to infinite recursion as we traverse them.
    // It's okay that the initial value is degenerate, because if we access
    // it during the traversal that will only happen due to a recursive
    // type, in which case the other elements of that type will serve
    // to differentiate its hash.
    type_hashes[t] = h;

    switch ( t->Tag() ) {
        case TYPE_ADDR:
        case TYPE_ANY:
        case TYPE_BOOL:
        case TYPE_COUNT:
        case TYPE_DOUBLE:
        case TYPE_ENUM:
        case TYPE_ERROR:
        case TYPE_INT:
        case TYPE_INTERVAL:
        case TYPE_OPAQUE:
        case TYPE_PATTERN:
        case TYPE_PORT:
        case TYPE_STRING:
        case TYPE_SUBNET:
        case TYPE_TIME:
        case TYPE_VOID: h = merge_p_hashes(h, p_hash(t)); break;

        case TYPE_RECORD: {
            const auto& ft = t->AsRecordType();
            auto n = ft->NumFields();
            auto orig_n = ft->NumOrigFields();

            h = merge_p_hashes(h, p_hash("record"));

            if ( full_record_hashes )
                h = merge_p_hashes(h, p_hash(n));
            else
                h = merge_p_hashes(h, p_hash(orig_n));

            for ( auto i = 0; i < n; ++i ) {
                bool do_hash = full_record_hashes;
                if ( ! do_hash )
                    do_hash = (i < orig_n);

                const auto& f = ft->FieldDecl(i);
                auto type_h = HashType(f->type);

                if ( do_hash ) {
                    h = merge_p_hashes(h, p_hash(f->id));
                    h = merge_p_hashes(h, type_h);
                }

                h = merge_p_hashes(h, p_hash(f->id));
                h = merge_p_hashes(h, HashType(f->type));

                // We don't hash the field name, as in some contexts
                // those are ignored.

                if ( f->attrs ) {
                    if ( do_hash )
                        h = merge_p_hashes(h, HashAttrs(f->attrs));
                    AnalyzeAttrs(f->attrs.get(), ft);
                }
            }
        } break;

        case TYPE_TABLE: {
            auto tbl = t->AsTableType();
            h = merge_p_hashes(h, p_hash("table"));
            h = merge_p_hashes(h, p_hash("indices"));
            h = merge_p_hashes(h, HashType(tbl->GetIndices()));
            h = merge_p_hashes(h, p_hash("tbl-yield"));
            h = merge_p_hashes(h, HashType(tbl->Yield()));
        } break;

        case TYPE_FUNC: {
            auto ft = t->AsFuncType();
            auto flv = ft->FlavorString();
            h = merge_p_hashes(h, p_hash(flv));
            h = merge_p_hashes(h, p_hash("params"));
            h = merge_p_hashes(h, HashType(ft->Params()));
            h = merge_p_hashes(h, p_hash("func-yield"));
            h = merge_p_hashes(h, HashType(ft->Yield()));
        } break;

        case TYPE_LIST: {
            auto& tl = t->AsTypeList()->GetTypes();

            h = merge_p_hashes(h, p_hash("list"));
            h = merge_p_hashes(h, p_hash(tl.size()));

            for ( const auto& tl_i : tl )
                h = merge_p_hashes(h, HashType(tl_i));
        } break;

        case TYPE_VECTOR:
            h = merge_p_hashes(h, p_hash("vec"));
            h = merge_p_hashes(h, HashType(t->AsVectorType()->Yield()));
            break;

        case TYPE_FILE:
            h = merge_p_hashes(h, p_hash("file"));
            h = merge_p_hashes(h, HashType(t->AsFileType()->Yield()));
            break;

        case TYPE_TYPE:
            h = merge_p_hashes(h, p_hash("type"));
            h = merge_p_hashes(h, HashType(t->AsTypeType()->GetType()));
            break;
    }

    type_hashes[t] = h;

    auto [rep_it, rep_inserted] = type_hash_reps.emplace(h, t);

    if ( rep_inserted ) { // No previous rep, so use this Type* for that.
        type_to_rep[t] = t;
        rep_types.push_back(t);
    }
    else
        type_to_rep[t] = rep_it->second;

    if ( ! tn.empty() )
        seen_type_names[tn] = t;

    return h;
}

p_hash_type ProfileFuncs::HashAttrs(const AttributesPtr& Attrs) {
    // It's tempting to just use p_hash, but that won't work
    // if the attributes wind up with extensible records in their
    // descriptions, if we're not doing full record hashes.
    auto attrs = Attrs->GetAttrs();
    p_hash_type h = 0;

    for ( const auto& a : attrs ) {
        h = merge_p_hashes(h, p_hash(a->Tag()));
        auto e = a->GetExpr();

        // We don't try to hash an associated expression, since those
        // can vary in structure due to compilation of elements.  We
        // do though enforce consistency for their types.
        if ( e ) {
            h = merge_p_hashes(h, HashType(e->GetType()));
            h = merge_p_hashes(h, p_hash(e.get()));
        }
    }

    return h;
}

void ProfileFuncs::AnalyzeAttrs(const Attributes* attrs, const Type* t) {
    for ( const auto& a : attrs->GetAttrs() ) {
        auto& e = a->GetExpr();

        if ( ! e )
            continue;

        pending_exprs.push_back(e.get());

        auto prev_ea = expr_attrs.find(a.get());
        if ( prev_ea == expr_attrs.end() )
            expr_attrs[a.get()] = {t};
        else {
            // Add it if new. This is rare, but can arise due to attributes
            // being shared for example from initializers with a variable
            // itself.
            bool found = false;
            for ( auto ea : prev_ea->second )
                if ( ea == t ) {
                    found = true;
                    break;
                }

            if ( ! found )
                prev_ea->second.push_back(t);
        }

        if ( e->Tag() == EXPR_LAMBDA )
            lambdas.insert(e->AsLambdaExpr());
    }
}

void ProfileFuncs::ComputeSideEffects() {
    // Computing side effects is an iterative process, because whether
    // a given expression has a side effect can depend on whether it
    // includes accesses to types that themselves have side effects.

    // Step one: assemble the candidate pool of attributes to assess.
    for ( auto& ea : expr_attrs ) {
        // Is this an attribute that can be triggered by
        // statement/expression execution?
        auto a = ea.first;
        auto at = a->Tag();
        if ( at == ATTR_DEFAULT || at == ATTR_DEFAULT_INSERT || at == ATTR_ON_CHANGE ) {
            if ( at == ATTR_DEFAULT ) {
                // Look for tables with &default's returning aggregate values.
                for ( auto t : ea.second ) {
                    if ( t->Tag() != TYPE_TABLE )
                        continue;

                    auto y = t->AsTableType()->Yield();

                    if ( y && IsAggr(y->Tag()) ) {
                        tbl_has_aggr_default[t] = true;
                        for ( auto ta : type_aliases[t] )
                            tbl_has_aggr_default[ta] = true;
                    }
                }
            }

            // Weed out very-common-and-completely-safe expressions.
            if ( ! DefinitelyHasNoSideEffects(a->GetExpr()) )
                candidates.insert(a);
        }
    }

    // At this point, very often there are no candidates and we're done.
    // However, if we have candidates then we need to process them in an
    // iterative fashion because it's possible that the side effects of
    // some of them depend on the side effects of other candidates.

    while ( ! candidates.empty() ) {
        // For which attributes have we resolved their status.
        AttrSet made_decision;

        for ( auto c : candidates ) {
            IDSet non_local_ids;
            TypeSet aggrs;
            bool is_unknown = false;

            // Track the candidate we're currently analyzing, since sometimes
            // it's self-referential and we need to identify that fact.
            curr_candidate = c;

            if ( ! AssessSideEffects(c->GetExpr(), non_local_ids, aggrs, is_unknown) )
                // Can't make a decision yet.
                continue;

            // We've resolved this candidate.
            made_decision.insert(c);
            SetSideEffects(c, non_local_ids, aggrs, is_unknown);
        }

        if ( made_decision.empty() ) {
            // We weren't able to make forward progress. This happens when
            // the pending candidates are mutually dependent. While in
            // principle we could scope the worst-case resolution of their
            // side effects, this is such an unlikely situation that we just
            // mark them all as unknown.

            // We keep these empty.
            IDSet non_local_ids;
            TypeSet aggrs;

            for ( auto c : candidates )
                SetSideEffects(c, non_local_ids, aggrs, true);

            // We're now all done.
            break;
        }

        for ( auto md : made_decision )
            candidates.erase(md);
    }
}

bool ProfileFuncs::DefinitelyHasNoSideEffects(const ExprPtr& e) const {
    if ( e->Tag() == EXPR_CONST || e->Tag() == EXPR_VECTOR_CONSTRUCTOR )
        return true;

    if ( e->Tag() == EXPR_NAME )
        return e->GetType()->Tag() != TYPE_FUNC;

    auto ep = expr_profs.find(e.get());
    ASSERT(ep != expr_profs.end());

    const auto& pf = ep->second;

    if ( ! pf->NonLocalAssignees().empty() || ! pf->TableRefs().empty() || ! pf->AggrMods().empty() ||
         ! pf->ScriptCalls().empty() )
        return false;

    for ( auto& b : pf->BiFGlobals() )
        if ( has_script_side_effects(b->Name()) )
            return false;

    return true;
}

void ProfileFuncs::SetSideEffects(const Attr* a, IDSet& non_local_ids, TypeSet& aggrs, bool is_unknown) {
    auto seo_vec = std::vector<std::shared_ptr<SideEffectsOp>>{};
    bool is_rec = expr_attrs[a][0]->Tag() == TYPE_RECORD;

    SideEffectsOp::AccessType at;
    if ( is_rec )
        at = SideEffectsOp::CONSTRUCTION;
    else if ( a->Tag() == ATTR_ON_CHANGE )
        at = SideEffectsOp::WRITE;
    else
        at = SideEffectsOp::READ;

    if ( non_local_ids.empty() && aggrs.empty() && ! is_unknown )
        // Definitely no side effects.
        seo_vec.push_back(std::make_shared<SideEffectsOp>());
    else {
        attrs_with_side_effects.insert(a);

        // Set side effects for all of the types associated with this attribute.
        for ( auto ea_t : expr_attrs[a] ) {
            auto seo = std::make_shared<SideEffectsOp>(at, ea_t);
            seo->AddModNonGlobal(non_local_ids);
            seo->AddModAggrs(aggrs);

            if ( is_unknown )
                seo->SetUnknownChanges();

            side_effects_ops.push_back(seo);
            seo_vec.push_back(std::move(seo));
        }
    }

    if ( is_rec )
        record_constr_with_side_effects[a] = std::move(seo_vec);
    else
        aggr_side_effects[a] = std::move(seo_vec);
}

AttrVec ProfileFuncs::AssociatedAttrs(const Type* t) {
    AttrVec assoc_attrs;

    // Search both the pending candidates and the ones already identified.
    // You might think we'd just do the latter, but we want to include the
    // pending ones, too, so we can identify not-yet-resolved dependencies.
    FindAssociatedAttrs(candidates, t, assoc_attrs);
    FindAssociatedAttrs(attrs_with_side_effects, t, assoc_attrs);

    return assoc_attrs;
}

void ProfileFuncs::FindAssociatedAttrs(const AttrSet& attrs, const Type* t, AttrVec& assoc_attrs) {
    for ( auto a : attrs ) {
        for ( auto ea_t : expr_attrs[a] ) {
            if ( same_type(t, ea_t) ) {
                assoc_attrs.push_back(a);
                break;
            }

            for ( auto ta : type_aliases[ea_t] )
                if ( same_type(t, ta) ) {
                    assoc_attrs.push_back(a);
                    break;
                }
        }
    }
}

bool ProfileFuncs::AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
    if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC )
        // This occurs when the expression is itself a function name, and
        // in an attribute context indicates an implicit call.
        return GetCallSideEffects(e->AsNameExpr(), non_local_ids, aggrs, is_unknown);

    ASSERT(expr_profs.count(e.get()) != 0);
    auto pf = expr_profs[e.get()];
    return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown);
}

bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids, TypeSet& aggrs, bool& is_unknown) {
    if ( pf->DoesIndirectCalls() ) {
        is_unknown = true;
        return true;
    }

    for ( auto& b : pf->BiFGlobals() )
        if ( has_script_side_effects(b->Name()) ) {
            is_unknown = true;
            return true;
        }

    IDSet nla;
    TypeSet mod_aggrs;

    for ( auto& a : pf->NonLocalAssignees() )
        nla.insert(a);

    for ( auto& r : pf->RecordConstructorAttrs() )
        if ( ! AssessAggrEffects(SideEffectsOp::CONSTRUCTION, r.first, nla, mod_aggrs, is_unknown) )
            // Not enough information yet to know all of the side effects.
            return false;

    for ( auto& tr : pf->TableRefs() )
        if ( ! AssessAggrEffects(SideEffectsOp::READ, tr, nla, mod_aggrs, is_unknown) )
            return false;

    for ( auto& tm : pf->AggrMods() ) {
        if ( tm->Tag() == TYPE_TABLE && ! AssessAggrEffects(SideEffectsOp::WRITE, tm, nla, mod_aggrs, is_unknown) )
            return false;

        mod_aggrs.insert(tm);
    }

    for ( auto& f : pf->ScriptCalls() ) {
        if ( f->Flavor() != FUNC_FLAVOR_FUNCTION ) {
            // A hook (since events can't be called) - not something
            // to analyze further.
            is_unknown = true;
            return true;
        }

        auto pff = func_profs[f];
        if ( active_func_profiles.count(pff) > 0 )
            // We're already processing this function and arrived here via
            // recursion. Skip further analysis here, we'll do it instead
            // for the original instance.
            continue;

        // Track this analysis so we can detect recursion.
        active_func_profiles.insert(pff);
        auto a = AssessSideEffects(pff.get(), nla, mod_aggrs, is_unknown);
        active_func_profiles.erase(pff);

        if ( ! a )
            return false;
    }

    non_local_ids.insert(nla.begin(), nla.end());
    aggrs.insert(mod_aggrs.begin(), mod_aggrs.end());

    return true;
}

bool ProfileFuncs::AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, IDSet& non_local_ids,
                                     TypeSet& aggrs, bool& is_unknown) {
    auto assoc_attrs = AssociatedAttrs(t);

    for ( auto a : assoc_attrs ) {
        if ( a == curr_candidate )
            // Self-reference - don't treat the absence of any determination
            // for it as meaning we can't resolve the candidate.
            continue;

        // See whether we've already determined the side affects associated
        // with this attribute.
        auto ase = aggr_side_effects.find(a);
        if ( ase == aggr_side_effects.end() ) {
            ase = record_constr_with_side_effects.find(a);
            if ( ase == record_constr_with_side_effects.end() )
                // Haven't resolved it yet, so can't resolve current candidate.
                return false;
        }

        for ( auto& se : ase->second )
            if ( AssessSideEffects(se.get(), access, t, non_local_ids, aggrs) ) {
                is_unknown = true;
                return true;
            }
    }

    return true;
}

bool ProfileFuncs::AssessSideEffects(const SideEffectsOp* se, SideEffectsOp::AccessType access, const Type* t,
                                     IDSet& non_local_ids, TypeSet& aggrs) const {
    // First determine whether the SideEffectsOp applies.
    if ( se->GetAccessType() != access )
        return false;

    if ( ! same_type(se->GetType(), t) )
        return false;

    // It applies, return its effects.
    if ( se->HasUnknownChanges() )
        return true;

    for ( auto a : se->ModAggrs() )
        aggrs.insert(a);
    for ( auto nl : se->ModNonLocals() )
        non_local_ids.insert(nl);

    return false;
}

std::shared_ptr<SideEffectsOp> ProfileFuncs::GetCallSideEffects(const ScriptFunc* sf) {
    if ( lambda_primaries.count(sf->Name()) > 0 )
        sf = lambda_primaries[sf->Name()];

    auto sf_se = func_side_effects.find(sf);
    if ( sf_se != func_side_effects.end() )
        // Return cached result.
        return sf_se->second;

    bool is_unknown = false;
    IDSet nla;
    TypeSet mod_aggrs;

    ASSERT(func_profs.count(sf) != 0);
    auto pf = func_profs[sf];
    if ( ! AssessSideEffects(pf.get(), nla, mod_aggrs, is_unknown) )
        // Can't figure it out yet.
        return nullptr;

    auto seo = std::make_shared<SideEffectsOp>(SideEffectsOp::CALL);
    seo->AddModNonGlobal(nla);
    seo->AddModAggrs(mod_aggrs);

    if ( is_unknown )
        seo->SetUnknownChanges();

    func_side_effects[sf] = seo;

    return seo;
}

// We associate modules with filenames, and take the first one we see.
static std::unordered_map<std::string, std::string> filename_module;

void switch_to_module(const char* module_name) {
    auto loc = GetCurrentLocation();
    if ( loc.first_line != 0 && filename_module.count(loc.filename) == 0 )
        filename_module[loc.filename] = module_name;
}

std::string func_name_at_loc(std::string fname, const Location* loc) {
    auto find_module = filename_module.find(loc->filename);
    if ( find_module == filename_module.end() )
        // No associated module.
        return fname;

    auto& module = find_module->second;
    if ( module.empty() || module == "GLOBAL" )
        // Trivial associated module.
        return fname;

    auto mod_prefix = module + "::";

    if ( fname.find(mod_prefix) == 0 )
        return fname; // it already has the module name

    return mod_prefix + fname;
}

TraversalCode SetBlockLineNumbers::PreStmt(const Stmt* s) {
    auto loc = const_cast<Location*>(s->GetLocationInfo());
    UpdateLocInfo(loc);
    block_line_range.emplace_back(std::pair<int, int>{loc->first_line, loc->last_line});
    return TC_CONTINUE;
}

TraversalCode SetBlockLineNumbers::PostStmt(const Stmt* s) {
    auto loc = const_cast<Location*>(s->GetLocationInfo());
    auto r = block_line_range.back();
    loc->first_line = r.first;
    loc->last_line = r.second;

    block_line_range.pop_back();

    if ( ! block_line_range.empty() ) {
        // We may have widened our range, propagate that to our parent.
        auto& r_p = block_line_range.back();
        r_p.first = std::min(r_p.first, r.first);
        r_p.second = std::max(r_p.second, r.second);
    }

    return TC_CONTINUE;
}

TraversalCode SetBlockLineNumbers::PreExpr(const Expr* e) {
    ASSERT(! block_line_range.empty());
    UpdateLocInfo(const_cast<Location*>(e->GetLocationInfo()));
    return TC_CONTINUE;
}

void SetBlockLineNumbers::UpdateLocInfo(Location* loc) {
    // Sometimes locations are generated with inverted line coverage.
    if ( loc->first_line > loc->last_line )
        std::swap(loc->first_line, loc->last_line);

    auto first_line = loc->first_line;
    auto last_line = loc->last_line;

    if ( ! block_line_range.empty() ) {
        auto& r = block_line_range.back();
        r.first = std::min(r.first, first_line);
        r.second = std::max(r.second, last_line);
    }
}

ASTBlockAnalyzer::ASTBlockAnalyzer(std::vector<FuncInfo>& funcs) {
    for ( auto& f : funcs ) {
        if ( ! f.ShouldAnalyze() )
            continue;

        auto func = f.Func();
        std::string fn = func->Name();
        auto body = f.Body();

        // First get the line numbers all sorted out.
        SetBlockLineNumbers sbln;
        body->Traverse(&sbln);

        auto body_loc = body->GetLocationInfo();
        fn = func_name_at_loc(fn, body_loc);

        parents.emplace_back(std::pair<std::string, std::string>{fn, fn});
        func_name_prefix = fn + ":";
        body->Traverse(this);
        parents.pop_back();
    }

    // This should never appear!
    func_name_prefix = "<MISSING>:";
}

static bool is_compound_stmt(const Stmt* s) {
    static std::set<StmtTag> compound_stmts = {STMT_FOR, STMT_IF, STMT_LIST, STMT_SWITCH, STMT_WHEN, STMT_WHILE};
    return compound_stmts.count(s->Tag()) > 0;
}

TraversalCode ASTBlockAnalyzer::PreStmt(const Stmt* s) {
    auto loc = s->GetLocationInfo();
    auto ls = BuildExpandedDescription(loc);

    if ( is_compound_stmt(s) )
        parents.push_back(std::pair<std::string, std::string>{LocWithFunc(loc), std::move(ls)});

    return TC_CONTINUE;
}

TraversalCode ASTBlockAnalyzer::PostStmt(const Stmt* s) {
    if ( is_compound_stmt(s) )
        parents.pop_back();

    return TC_CONTINUE;
}

TraversalCode ASTBlockAnalyzer::PreExpr(const Expr* e) {
    (void)BuildExpandedDescription(e->GetLocationInfo());
    return TC_CONTINUE;
}

std::string ASTBlockAnalyzer::BuildExpandedDescription(const Location* loc) {
    ASSERT(loc && loc->first_line != 0);

    auto ls = LocWithFunc(loc);
    if ( ! parents.empty() ) {
        auto& parent_pair = parents.back();
        if ( parent_pair.first == ls )
            ls = parent_pair.second;
        else
            ls = parent_pair.second + ";" + ls;
    }

    auto lk = LocKey(loc);
    if ( exp_desc.count(lk) == 0 )
        exp_desc[lk] = ls;

    return ls;
}

std::unique_ptr<ASTBlockAnalyzer> AST_blocks;

} // namespace zeek::detail