diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3987c604a5..ea41271c16 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -250,6 +250,36 @@ set(_gen_zeek_script_cpp ${CMAKE_CURRENT_BINARY_DIR}/../CPP-gen.cc) add_custom_command(OUTPUT ${_gen_zeek_script_cpp} COMMAND ${CMAKE_COMMAND} -E touch ${_gen_zeek_script_cpp}) +# define a command that's used to run the ZAM instruction generator; +# building the zeek binary depends on the outputs of this script +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ZAM-AssignFlavorsDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Conds.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-DirectDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-EvalDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-EvalMacros.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC1.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC2.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC3.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsV.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsC1.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsC2.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsV.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDecls.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Op1FlavorsDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpSideEffects.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpsDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpsNamesDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Vec1EvalDefs.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-Vec2EvalDefs.h + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/Gen-ZAM + ARGS ${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/Ops.in + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Gen-ZAM + ${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/Ops.in + COMMENT "[sh] Generating ZAM operations" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) + set_source_files_properties(nb_dns.c PROPERTIES COMPILE_FLAGS -fno-strict-aliasing) @@ -396,6 +426,20 @@ set(MAIN_SRCS script_opt/TempVar.cc script_opt/UseDefs.cc + script_opt/ZAM/AM-Opt.cc + script_opt/ZAM/Branches.cc + script_opt/ZAM/BuiltIn.cc + script_opt/ZAM/Driver.cc + script_opt/ZAM/Expr.cc + script_opt/ZAM/Inst-Gen.cc + script_opt/ZAM/Low-Level.cc + script_opt/ZAM/Stmt.cc + script_opt/ZAM/Support.cc + script_opt/ZAM/Vars.cc + script_opt/ZAM/ZBody.cc + script_opt/ZAM/ZInst.cc + script_opt/ZAM/ZOp.cc + nb_dns.c digest.h ) @@ -404,6 +448,10 @@ set(THIRD_PARTY_SRCS 3rdparty/sqlite3.c ) +set(GEN_ZAM_SRCS + script_opt/ZAM/Gen-ZAM.cc +) + # Highwayhash. Highwayhash is a bit special since it has architecture dependent code... set(HH_SRCS @@ -470,12 +518,14 @@ set(zeek_SRCS ${FLEX_Scanner_INPUT} ${BISON_Parser_INPUT} ${CMAKE_CURRENT_BINARY_DIR}/DebugCmdConstants.h + ${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDecls.h ${THIRD_PARTY_SRCS} ${HH_SRCS} ${MAIN_SRCS} ) collect_headers(zeek_HEADERS ${zeek_SRCS}) +collect_headers(GEN_ZAM_HEADERS ${GEN_ZAM_SRCS}) add_library(zeek_objs OBJECT ${zeek_SRCS}) @@ -491,6 +541,8 @@ set_target_properties(zeek PROPERTIES ENABLE_EXPORTS TRUE) install(TARGETS zeek DESTINATION bin) +add_executable(Gen-ZAM ${GEN_ZAM_SRCS} ${GEN_ZAM_HEADERS}) + # Install wrapper script for Bro-to-Zeek renaming. include(InstallSymlink) InstallSymlink("${CMAKE_INSTALL_PREFIX}/bin/zeek-wrapper" "${CMAKE_INSTALL_PREFIX}/bin/bro") diff --git a/src/script_opt/ZAM/AM-Opt.cc b/src/script_opt/ZAM/AM-Opt.cc new file mode 100644 index 0000000000..6d165a0d2e --- /dev/null +++ b/src/script_opt/ZAM/AM-Opt.cc @@ -0,0 +1,1070 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Logic associated with optimization of the low-level Abstract Machine, +// i.e., code improvement that's done after the compiler has generated +// an initial, complete intermediary function body. + +#include "zeek/input.h" +#include "zeek/Reporter.h" +#include "zeek/Desc.h" +#include "zeek/script_opt/Reduce.h" +#include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/ZAM/Compile.h" + +namespace zeek::detail { + +// Tracks per function its maximum remapped interpreter frame size. We +// can't do this when compiling individual functions since for event handlers +// and hooks it needs to be computed across all of their bodies. +// +// Note, this is now not really needed, because we no longer use any +// interpreter frame entries other than those for the function's arguments. +// We keep the code in case that changes, for example when deciding to +// compile functions that include "return when" conditions. +std::unordered_map remapped_intrp_frame_sizes; + +void finalize_functions(const std::vector& funcs) + { + // Given we've now compiled all of the function bodies, we + // can reset the interpreter frame sizes of each function + // to be the maximum needed to accommodate all of its + // remapped bodies. + + // Find any functions with bodies that weren't compiled and + // make sure we don't reduce their frame size. For any loaded + // from ZAM save files, use the associated maximum interpreter + // frame size as a minimum. + for ( auto& f : funcs ) + { + auto func = f.Func(); + + // If we have non-compiled versions of the function's body, + // preserve the size they need. + int size = func->FrameSize(); + + if ( f.Body()->Tag() != STMT_ZAM && + remapped_intrp_frame_sizes.count(func) > 0 && + size > remapped_intrp_frame_sizes[func] ) + remapped_intrp_frame_sizes[func] = size; + } + + for ( auto& f : funcs ) + { + auto func = f.Func(); + + if ( remapped_intrp_frame_sizes.count(func) == 0 ) + // No entry for this function, keep current frame size. + continue; + + // Note, functions with multiple bodies appear in "funcs" + // multiple times, but the following doesn't hurt to do + // more than once. + func->SetFrameSize(remapped_intrp_frame_sizes[func]); + } + } + + +// The following is for activating detailed dumping for debugging +// optimizer problems. +static bool dump_intermediaries = false; + +void ZAMCompiler::OptimizeInsts() + { + // Do accounting for targeted statements. + for ( auto& i : insts1 ) + { + if ( i->target && i->target->live ) + ++(i->target->num_labels); + } + + TallySwitchTargets(int_casesI); + TallySwitchTargets(uint_casesI); + TallySwitchTargets(double_casesI); + TallySwitchTargets(str_casesI); + + for ( unsigned int i = 0; i < insts1.size(); ++i ) + if ( insts1[i]->op == OP_NOP ) + // We can always get rid of these. + KillInst(i); + + if ( analysis_options.dump_ZAM ) + { + printf("Original ZAM code for %s:\n", func->Name()); + DumpInsts1(nullptr); + } + + bool something_changed; + + do + { + something_changed = false; + + while ( RemoveDeadCode() ) + { + something_changed = true; + + if ( dump_intermediaries ) + { + printf("Removed some dead code:\n"); + DumpInsts1(nullptr); + } + } + + while ( CollapseGoTos() ) + { + something_changed = true; + + if ( dump_intermediaries ) + { + printf("Did some collapsing:\n"); + DumpInsts1(nullptr); + } + } + + ComputeFrameLifetimes(); + + if ( PruneUnused() ) + { + something_changed = true; + + if ( dump_intermediaries ) + { + printf("Did some pruning:\n"); + DumpInsts1(nullptr); + } + } + } + while ( something_changed ); + + ReMapFrame(); + ReMapInterpreterFrame(); + } + +template +void ZAMCompiler::TallySwitchTargets(const CaseMapsI& switches) + { + for ( auto& targs : switches ) + for ( auto& targ : targs ) + ++(targ.second->num_labels); + } + +bool ZAMCompiler::RemoveDeadCode() + { + if ( insts1.size() == 0 ) + return false; + + bool did_removal = false; + + for ( unsigned int i = 0; i < insts1.size() - 1; ++i ) + { + auto i0 = insts1[i]; + if ( ! i0->live ) + continue; + + auto i1 = NextLiveInst(i0); + + // Look for degenerate branches. + auto t = i0->target; + + if ( t == pending_inst && ! i1 ) + { + // This is a branch-to-end, and that's where we'll + // wind up anyway. + KillInst(i0); + did_removal = true; + continue; + } + + if ( t && t->inst_num > i0->inst_num && + (! i1 || t->inst_num <= i1->inst_num) ) + { + // This is effectively a branch to the next + // instruction. Even if i0 is conditional, there's + // no point executing it because regardless of the + // outcome of the conditional, we go to the next + // successive live instruction (and we don't have + // conditionals with side effects). + KillInst(i0); + did_removal = true; + continue; + } + + if ( i0->DoesNotContinue() && i1 && i1->num_labels == 0 ) + { + // i1 can't be reached - nor anything unlabeled + // after it. + KillInsts(i1); + did_removal = true; + } + } + + return did_removal; + } + +bool ZAMCompiler::CollapseGoTos() + { + bool did_change = false; + + for ( unsigned int i = 0; i < insts1.size(); ++i ) + { + auto i0 = insts1[i]; + auto orig_t = i0->target; + + if ( ! i0->live || ! orig_t || orig_t == pending_inst ) + continue; + + // Resolve branch chains. We both do a version that + // follows branches (to jump to the end of any chains), + // and one that does (so we can do num_labels bookkeeping + // for our initial target). + auto first_branch = FirstLiveInst(orig_t, false); + if ( ! first_branch ) + // We're jump-to-end, so there's no possibility of + // a chain. + continue; + + auto t = FirstLiveInst(orig_t, true); + + if ( ! t ) + t = pending_inst; + + if ( t != orig_t ) + { + // Update branch. + if ( first_branch->live ) + --first_branch->num_labels; + i0->target = t; + ++t->num_labels; + did_change = true; + } + } + + return did_change; + } + +bool ZAMCompiler::PruneUnused() + { + bool did_prune = false; + + for ( unsigned int i = 0; i < insts1.size(); ++i ) + { + auto inst = insts1[i]; + + if ( ! inst->live ) + { + ASSERT(inst->num_labels == 0); + continue; + } + + if ( inst->IsLoad() && ! VarIsUsed(inst->v1) ) + { + did_prune = true; + KillInst(i); + } + + if ( inst->IsGlobalLoad() ) + { + // Any straight-line load of the same global + // is redundant. + for ( unsigned int j = i + 1; j < insts1.size(); ++j ) + { + auto i1 = insts1[j]; + + if ( ! i1->live ) + continue; + + if ( i1->DoesNotContinue() ) + // End of straight-line block. + break; + + if ( i1->num_labels > 0 ) + // Inbound branch ends block. + break; + + if ( i1->aux && i1->aux->can_change_globals ) + break; + + if ( ! i1->IsGlobalLoad() ) + continue; + + if ( i1->v2 == inst->v2 ) + { // Same global + did_prune = true; + KillInst(i1); + } + } + } + + if ( ! inst->AssignsToSlot1() ) + continue; + + int slot = inst->v1; + if ( denizen_ending.count(slot) > 0 ) + // Variable is used, keep assignment. + continue; + + if ( frame_denizens[slot]->IsGlobal() ) + { + // Extend the global's range to the end of the + // function. + denizen_ending[slot] = insts1.back(); + continue; + } + + // Assignment to a local that isn't otherwise used. + if ( ! inst->HasSideEffects() ) + { + did_prune = true; + // We don't use this assignment. + KillInst(i); + continue; + } + + // If we get here then there's a dead assignment but we + // can't remove the instruction entirely because it has + // side effects. Transform the instruction into its flavor + // that doesn't make an assignment. + if ( assignmentless_op.count(inst->op) == 0 ) + reporter->InternalError("inconsistency in re-flavoring instruction with side effects"); + + inst->op_type = assignmentless_op_type[inst->op]; + inst->op = assignmentless_op[inst->op]; + + inst->v1 = inst->v2; + inst->v2 = inst->v3; + inst->v3 = inst->v4; + + // While we didn't prune the instruction, we did prune the + // assignment, so we'll want to reassess variable lifetimes. + did_prune = true; + } + + return did_prune; + } + +void ZAMCompiler::ComputeFrameLifetimes() + { + // Start analysis from scratch, since we might do this repeatedly. + inst_beginnings.clear(); + inst_endings.clear(); + + denizen_beginning.clear(); + denizen_ending.clear(); + + for ( unsigned int i = 0; i < insts1.size(); ++i ) + { + auto inst = insts1[i]; + if ( ! inst->live ) + continue; + + if ( inst->AssignsToSlot1() ) + CheckSlotAssignment(inst->v1, inst); + + // Some special-casing. + switch ( inst->op ) { + case OP_NEXT_TABLE_ITER_VV: + case OP_NEXT_TABLE_ITER_VAL_VAR_VVV: + { + // These assign to an arbitrary long list of variables. + auto& iter_vars = inst->aux->loop_vars; + auto depth = inst->loop_depth; + + for ( auto v : iter_vars ) + { + CheckSlotAssignment(v, inst); + + // Also mark it as usage throughout the + // loop. Otherwise, we risk pruning the + // variable if it happens to not be used + // (which will mess up the iteration logic) + // or doubling it up with some other value + // inside the loop (which will fail when + // the loop var has memory management + // associated with it). + ExtendLifetime(v, EndOfLoop(inst, depth)); + } + + // No need to check the additional "var" associated + // with OP_NEXT_TABLE_ITER_VAL_VAR_VVV as that's + // a slot-1 assignment. However, similar to other + // loop variables, mark this as a usage. + if ( inst->op == OP_NEXT_TABLE_ITER_VAL_VAR_VVV ) + ExtendLifetime(inst->v1, EndOfLoop(inst, depth)); + } + break; + + case OP_NEXT_TABLE_ITER_NO_VARS_VV: + break; + + case OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_VVV: + { + auto depth = inst->loop_depth; + ExtendLifetime(inst->v1, EndOfLoop(inst, depth)); + } + break; + + case OP_NEXT_VECTOR_ITER_VVV: + case OP_NEXT_STRING_ITER_VVV: + // Sometimes loops are written that don't actually + // use the iteration variable. However, we still + // need to mark the variable as having usage + // throughout the loop, lest we elide the iteration + // instruction. An alternative would be to transform + // such iterators into variable-less versions. That + // optimization hardly seems worth the trouble, though, + // given the presumed rarity of such loops. + ExtendLifetime(inst->v1, + EndOfLoop(inst, inst->loop_depth)); + break; + + case OP_INIT_TABLE_LOOP_VV: + case OP_INIT_VECTOR_LOOP_VV: + case OP_INIT_STRING_LOOP_VV: + { + // For all of these, the scope of the aggregate being + // looped over is the entire loop, even if it doesn't + // directly appear in it, and not just the initializer. + // For all three, the aggregate is in v1. + ASSERT(i < insts1.size() - 1); + auto succ = insts1[i+1]; + ASSERT(succ->live); + auto depth = succ->loop_depth; + ExtendLifetime(inst->v1, EndOfLoop(succ, depth)); + + // Important: we skip the usual UsesSlots analysis + // below since we've already set it, and don't want + // to perturb ExtendLifetime's consistency check. + continue; + } + + case OP_STORE_GLOBAL_V: + { + // Use of the global goes to here. + auto slot = frame_layout1[globalsI[inst->v1].id.get()]; + ExtendLifetime(slot, EndOfLoop(inst, 1)); + break; + } + + default: + // Look for slots in auxiliary information. + auto aux = inst->aux; + if ( ! aux || ! aux->slots ) + break; + + for ( auto j = 0; j < aux->n; ++j ) + { + if ( aux->slots[j] < 0 ) + continue; + + ExtendLifetime(aux->slots[j], + EndOfLoop(inst, 1)); + } + break; + } + + int s1, s2, s3, s4; + + if ( ! inst->UsesSlots(s1, s2, s3, s4) ) + continue; + + CheckSlotUse(s1, inst); + CheckSlotUse(s2, inst); + CheckSlotUse(s3, inst); + CheckSlotUse(s4, inst); + } + } + +void ZAMCompiler::ReMapFrame() + { + // General approach: go sequentially through the instructions, + // see which variables begin their lifetime at each, and at + // that point remap the variables to a suitable frame slot. + + frame1_to_frame2.resize(frame_layout1.size(), -1); + managed_slotsI.clear(); + + for ( unsigned int i = 0; i < insts1.size(); ++i ) + { + auto inst = insts1[i]; + + if ( inst_beginnings.count(inst) == 0 ) + continue; + + auto vars = inst_beginnings[inst]; + for ( auto v : vars ) + { + // Don't remap variables whose values aren't actually + // used. + int slot = frame_layout1[v]; + if ( denizen_ending.count(slot) > 0 ) + ReMapVar(v, slot, i); + } + } + +#if 0 + // Low-level debugging code. + printf("%s frame remapping:\n", func->Name()); + + for ( unsigned int i = 0; i < shared_frame_denizens.size(); ++i ) + { + auto& s = shared_frame_denizens[i]; + printf("*%d (%s) %lu [%d->%d]:", + i, s.is_managed ? "M" : "N", + s.ids.size(), s.id_start[0], s.scope_end); + + for ( auto j = 0; j < s.ids.size(); ++j ) + printf(" %s (%d)", s.ids[j]->Name(), s.id_start[j]); + + printf("\n"); + } +#endif + + // Update the globals we track, where we prune globals that + // didn't wind up being used. + std::vector used_globals; + std::vector remapped_globals; + + for ( unsigned int i = 0; i < globalsI.size(); ++i ) + { + auto& g = globalsI[i]; + g.slot = frame1_to_frame2[g.slot]; + if ( g.slot >= 0 ) + { + remapped_globals.push_back(used_globals.size()); + used_globals.push_back(g); + } + else + remapped_globals.push_back(-1); + } + + globalsI = used_globals; + + // Gulp - now rewrite every instruction to update its slot usage. + // In the process, if an instruction becomes a direct assignment + // of = , then we remove it. + + int n1_slots = frame1_to_frame2.size(); + + for ( unsigned int i = 0; i < insts1.size(); ++i ) + { + auto inst = insts1[i]; + + if ( ! inst->live ) + continue; + + if ( inst->AssignsToSlot1() ) + { + auto v1 = inst->v1; + ASSERT(v1 >= 0 && v1 < n1_slots); + inst->v1 = frame1_to_frame2[v1]; + } + + // Handle special cases. + switch ( inst->op ) { + case OP_NEXT_TABLE_ITER_VV: + case OP_NEXT_TABLE_ITER_VAL_VAR_VVV: + { + // Rewrite iteration variables. + auto& iter_vars = inst->aux->loop_vars; + for ( auto& v : iter_vars ) + { + ASSERT(v >= 0 && v < n1_slots); + v = frame1_to_frame2[v]; + } + } + break; + + default: + // Update slots in auxiliary information. + auto aux = inst->aux; + if ( ! aux || ! aux->slots ) + break; + + for ( auto j = 0; j < aux->n; ++j ) + { + auto& slot = aux->slots[j]; + + if ( slot < 0 ) + // This is instead a constant. + continue; + + auto new_slot = frame1_to_frame2[slot]; + + if ( new_slot < 0 ) + { + ODesc d; + inst->stmt->GetLocationInfo()->Describe(&d); + reporter->Error("%s: value used but not set: %s", d.Description(), frame_denizens[slot]->Name()); + } + + slot = new_slot; + } + break; + } + + if ( inst->IsGlobalLoad() ) + { + // Slot v2 of these is the index into globals[] + // rather than a frame. + int g = inst->v2; + ASSERT(remapped_globals[g] >= 0); + inst->v2 = remapped_globals[g]; + + // We *don't* want to UpdateSlots below as that's + // based on interpreting v2 as slots rather than an + // index into globals. + continue; + } + + if ( inst->IsGlobalStore() ) + { // Slot v1 of these is the index into globals[]. + int g = inst->v1; + ASSERT(remapped_globals[g] >= 0); + inst->v1 = remapped_globals[g]; + + // We don't have any other slots to update. + continue; + } + + inst->UpdateSlots(frame1_to_frame2); + + if ( inst->IsDirectAssignment() && inst->v1 == inst->v2 ) + KillInst(i); + } + + frame_sizeI = shared_frame_denizens.size(); + } + +void ZAMCompiler::ReMapInterpreterFrame() + { + // First, track function parameters. We could elide this if we + // decide to alter the calling sequence for compiled functions. + auto args = scope->OrderedVars(); + int nparam = func->GetType()->Params()->NumFields(); + int next_interp_slot = 0; + + for ( const auto& a : args ) + { + if ( --nparam < 0 ) + break; + + ASSERT(a->Offset() == next_interp_slot); + ++next_interp_slot; + } + + // Update frame sizes for functions that might have more than + // one body. + if ( remapped_intrp_frame_sizes.count(func) == 0 || + remapped_intrp_frame_sizes[func] < next_interp_slot ) + remapped_intrp_frame_sizes[func] = next_interp_slot; + } + +void ZAMCompiler::ReMapVar(ID* id, int slot, int inst) + { + // A greedy algorithm for this is to simply find the first suitable + // frame slot. We do that with one twist: we also look for a + // compatible slot for which its current end-of-scope is exactly + // the start-of-scope for the new identifier. The advantage of + // doing so is that this commonly occurs for code like "a.1 = a" + // from resolving parameters to inlined functions, and if "a.1" and + // "a" share the same slot then we can elide the assignment. + // + // In principle we could perhaps do better than greedy using a more + // powerful allocation method like graph coloring. However, far and + // away the bulk of our variables are short-lived temporaries, + // for which greedy should work fine. + bool is_managed = ZVal::IsManagedType(id->GetType()); + + int apt_slot = -1; + for ( unsigned int i = 0; i < shared_frame_denizens.size(); ++i ) + { + auto& s = shared_frame_denizens[i]; + + // Note that the following test is <= rather than <. + // This is because assignment in instructions happens after + // using any variables to compute the value to assign. + // ZAM instructions are careful to allow operands and + // assignment destinations to refer to the same slot. + + if ( s.scope_end <= inst && s.is_managed == is_managed ) + { // It's compatible. + if ( s.scope_end == inst ) + { // It ends right on the money. + apt_slot = i; + break; + } + + else if ( apt_slot < 0 ) + // We haven't found a candidate yet, take + // this one, but keep looking. + apt_slot = i; + } + } + + int scope_end = denizen_ending[slot]->inst_num; + + if ( apt_slot < 0 ) + { + // No compatible existing slot. Create a new one. + apt_slot = shared_frame_denizens.size(); + + FrameSharingInfo info; + info.is_managed = is_managed; + shared_frame_denizens.push_back(info); + + if ( is_managed ) + managed_slotsI.push_back(apt_slot); + } + + auto& s = shared_frame_denizens[apt_slot]; + + s.ids.push_back(id); + s.id_start.push_back(inst); + s.scope_end = scope_end; + + frame1_to_frame2[slot] = apt_slot; + } + +void ZAMCompiler::CheckSlotAssignment(int slot, const ZInstI* inst) + { + ASSERT(slot >= 0 && slot < frame_denizens.size()); + + // We construct temporaries such that their values are never used + // earlier than their definitions in loop bodies. For other + // denizens, however, they can be, so in those cases we expand the + // lifetime beginning to the start of any loop region. + if ( ! reducer->IsTemporary(frame_denizens[slot]) ) + inst = BeginningOfLoop(inst, 1); + + SetLifetimeStart(slot, inst); + } + +void ZAMCompiler::SetLifetimeStart(int slot, const ZInstI* inst) + { + if ( denizen_beginning.count(slot) > 0 ) + { + // Beginning of denizen's lifetime already seen, nothing + // more to do other than check for consistency. + ASSERT(denizen_beginning[slot]->inst_num <= inst->inst_num); + } + + else + { // denizen begins here + denizen_beginning[slot] = inst; + + if ( inst_beginnings.count(inst) == 0 ) + { + // Need to create a set to track the denizens + // beginning at the instruction. + std::unordered_set denizens; + inst_beginnings[inst] = denizens; + } + + inst_beginnings[inst].insert(frame_denizens[slot]); + } + } + +void ZAMCompiler::CheckSlotUse(int slot, const ZInstI* inst) + { + if ( slot < 0 ) + return; + + ASSERT(slot < frame_denizens.size()); + + if ( denizen_beginning.count(slot) == 0 ) + { + ODesc d; + inst->stmt->GetLocationInfo()->Describe(&d); + reporter->Error("%s: value used but not set: %s", d.Description(), frame_denizens[slot]->Name()); + } + + // See comment above about temporaries not having their values + // extend around loop bodies. HOWEVER if a temporary is defined + // at a lower loop depth than that for this instruction, then we + // extend its lifetime to the end of this instruction's loop. + if ( reducer->IsTemporary(frame_denizens[slot]) ) + { + ASSERT(denizen_beginning.count(slot) > 0); + int definition_depth = denizen_beginning[slot]->loop_depth; + + if ( inst->loop_depth > definition_depth ) + inst = EndOfLoop(inst, inst->loop_depth); + } + else + inst = EndOfLoop(inst, 1); + + ExtendLifetime(slot, inst); + } + +void ZAMCompiler::ExtendLifetime(int slot, const ZInstI* inst) + { + if ( denizen_ending.count(slot) > 0 ) + { + // End of denizen's lifetime already seen. Check for + // consistency and then extend as needed. + + auto old_inst = denizen_ending[slot]; + + // Don't complain for temporaries that already have + // extended lifetimes, as that can happen if they're + // used as a "for" loop-over target, which already + // extends lifetime across the body of the loop. + if ( inst->loop_depth > 0 && + reducer->IsTemporary(frame_denizens[slot]) && + old_inst->inst_num >= inst->inst_num ) + return; + + // We expect to only be increasing the slot's lifetime ... + // *unless* we're inside a nested loop, in which case + // the slot might have already been extended to the + // end of the outer loop. + ASSERT(old_inst->inst_num <= inst->inst_num || + inst->loop_depth > 1); + + if ( old_inst->inst_num < inst->inst_num ) + { // Extend. + inst_endings[old_inst].erase(frame_denizens[slot]); + + if ( inst_endings.count(inst) == 0 ) + { + std::unordered_set denizens; + inst_endings[inst] = denizens; + } + + inst_endings[inst].insert(frame_denizens[slot]); + denizen_ending.at(slot) = inst; + } + } + + else + { // first time seeing a use of this denizen + denizen_ending[slot] = inst; + + if ( inst_endings.count(inst) == 0 ) + { + std::unordered_set denizens; + inst_endings[inst] = denizens; + } + + inst_endings[inst].insert(frame_denizens[slot]); + } + } + +const ZInstI* ZAMCompiler::BeginningOfLoop(const ZInstI* inst, int depth) const + { + auto i = inst->inst_num; + + while ( i >= 0 && insts1[i]->loop_depth >= depth ) + --i; + + if ( i == inst->inst_num ) + return inst; + + // We moved backwards to just beyond a loop that inst is part of. + // Move to that loop's (live) beginning. + ++i; + while ( i != inst->inst_num && ! insts1[i]->live ) + ++i; + + return insts1[i]; + } + +const ZInstI* ZAMCompiler::EndOfLoop(const ZInstI* inst, int depth) const + { + auto i = inst->inst_num; + + while ( i < int(insts1.size()) && insts1[i]->loop_depth >= depth ) + ++i; + + if ( i == inst->inst_num ) + return inst; + + // We moved forwards to just beyond a loop that inst is part of. + // Move to that loop's (live) end. + --i; + while ( i != inst->inst_num && ! insts1[i]->live ) + --i; + + return insts1[i]; + } + +bool ZAMCompiler::VarIsAssigned(int slot) const + { + for ( unsigned int i = 0; i < insts1.size(); ++i ) + { + auto& inst = insts1[i]; + if ( inst->live && VarIsAssigned(slot, inst) ) + return true; + } + + return false; + } + +bool ZAMCompiler::VarIsAssigned(int slot, const ZInstI* i) const + { + // Special-case for table iterators, which assign to a bunch + // of variables but they're not immediately visible in the + // instruction layout. + if ( i->op == OP_NEXT_TABLE_ITER_VAL_VAR_VVV || + i->op == OP_NEXT_TABLE_ITER_VV ) + { + auto& iter_vars = i->aux->loop_vars; + for ( auto v : iter_vars ) + if ( v == slot ) + return true; + + if ( i->op != OP_NEXT_TABLE_ITER_VAL_VAR_VVV ) + return false; + + // Otherwise fall through, since that flavor of iterate + // *does* also assign to slot 1. + } + + if ( i->op_type == OP_VV_FRAME ) + // We don't want to consider these as assigning to the + // variable, since the point of this method is to figure + // out which variables don't need storing to the frame + // because their internal value is never modified. + return false; + + return i->AssignsToSlot1() && i->v1 == slot; + } + +bool ZAMCompiler::VarIsUsed(int slot) const + { + for ( unsigned int i = 0; i < insts1.size(); ++i ) + { + auto& inst = insts1[i]; + if ( inst->live && inst->UsesSlot(slot) ) + return true; + + auto aux = inst->aux; + if ( aux && aux->slots ) + { + for ( int j = 0; j < aux->n; ++j ) + if ( aux->slots[j] == slot ) + return true; + } + } + + return false; + } + +ZInstI* ZAMCompiler::FirstLiveInst(ZInstI* i, bool follow_gotos) + { + if ( i == pending_inst ) + return nullptr; + + auto n = FirstLiveInst(i->inst_num, follow_gotos); + if ( n < insts1.size() ) + return insts1[n]; + else + return nullptr; + } + +int ZAMCompiler::FirstLiveInst(int i, bool follow_gotos) + { + int num_inspected = 0; + while ( i < insts1.size() ) + { + auto i0 = insts1[i]; + if ( i0->live ) + { + if ( follow_gotos && i0->IsUnconditionalBranch() ) + { + if ( ++num_inspected > insts1.size() ) + { + reporter->Error("%s contains an infinite loop", func->Name()); + return i; + } + + i = i0->target->inst_num; + continue; + } + + return i; + } + + ++i; + ++num_inspected; + } + + return i; + } + +void ZAMCompiler::KillInst(int i) + { + auto inst = insts1[i]; + + ASSERT(inst->live); + + inst->live = false; + auto t = inst->target; + if ( t ) + { + if ( t->live ) + { + --(t->num_labels); + ASSERT(t->num_labels >= 0); + } + else + ASSERT(t->num_labels == 0); + } + + int num_labels = inst->num_labels; + // We're about to transfer its labels. + inst->num_labels = 0; + + if ( inst->IsUnconditionalBranch() ) + { + // No direct flow after this point ... unless we're + // branching to the next immediate live instruction. + auto after_inst = NextLiveInst(inst, true); + auto live_target = FirstLiveInst(t, true); + + if ( after_inst != live_target ) + { + // No flow after inst. Don't propagate its labels. + // Given that, it had better not have any! + ASSERT(num_labels == 0); + } + } + + if ( num_labels == 0 ) + // No labels to propagate. + return; + + for ( auto j = i + 1; j < insts1.size(); ++j ) + { + auto succ = insts1[j]; + if ( succ->live ) + { + succ->num_labels += num_labels; + break; + } + } + } + +void ZAMCompiler::KillInsts(int i) + { + auto inst = insts1[i]; + + ASSERT(inst->num_labels == 0); + + KillInst(i); + + for ( auto j = i + 1; j < insts1.size(); ++j ) + { + auto succ = insts1[j]; + if ( succ->live ) + { + if ( succ->num_labels == 0 ) + KillInst(j); + else + // Found viable succeeding code. + break; + } + } + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/Branches.cc b/src/script_opt/ZAM/Branches.cc new file mode 100644 index 0000000000..090ab4ade8 --- /dev/null +++ b/src/script_opt/ZAM/Branches.cc @@ -0,0 +1,174 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for dealing with ZAM branches. + +#include "zeek/Reporter.h" +#include "zeek/Desc.h" +#include "zeek/script_opt/ZAM/Compile.h" + +namespace zeek::detail { + + +void ZAMCompiler::PushGoTos(GoToSets& gotos) + { + std::vector vi; + gotos.push_back(vi); + } + +void ZAMCompiler::ResolveGoTos(GoToSets& gotos, const InstLabel l) + { + auto& g = gotos.back(); + + for ( auto i = 0U; i < g.size(); ++i ) + SetGoTo(g[i], l); + + gotos.pop_back(); + } + +ZAMStmt ZAMCompiler::GenGoTo(GoToSet& v) + { + auto g = GoToStub(); + v.push_back(g.stmt_num); + + return g; + } + +ZAMStmt ZAMCompiler::GoToStub() + { + ZInstI z(OP_GOTO_V, 0); + z.op_type = OP_V_I1; + return AddInst(z); + } + +ZAMStmt ZAMCompiler::GoTo(const InstLabel l) + { + ZInstI inst(OP_GOTO_V, 0); + inst.target = l; + inst.target_slot = 1; + inst.op_type = OP_V_I1; + return AddInst(inst); + } + +InstLabel ZAMCompiler::GoToTarget(const ZAMStmt s) + { + return insts1[s.stmt_num]; + } + +InstLabel ZAMCompiler::GoToTargetBeyond(const ZAMStmt s) + { + int n = s.stmt_num; + + if ( n == int(insts1.size()) - 1 ) + { + if ( ! pending_inst ) + pending_inst = new ZInstI(); + + return pending_inst; + } + + return insts1[n+1]; + } + +void ZAMCompiler::SetTarget(ZInstI* inst, const InstLabel l, int slot) + { + inst->target = l; + inst->target_slot = slot; + } + +ZInstI* ZAMCompiler::FindLiveTarget(ZInstI* goto_target) + { + if ( goto_target == pending_inst ) + return goto_target; + + int idx = goto_target->inst_num; + ASSERT(idx >= 0 && idx <= insts1.size()); + + while ( idx < int(insts1.size()) && ! insts1[idx]->live ) + ++idx; + + if ( idx == int(insts1.size()) ) + return pending_inst; + else + return insts1[idx]; + } + +void ZAMCompiler::ConcretizeBranch(ZInstI* inst, ZInstI* target, + int target_slot) + { + int t; // instruction number of target + + if ( target == pending_inst ) + { + if ( insts2.size() == 0 ) + // We're doing this in the context of concretizing + // intermediary instructions for dumping them out. + t = insts1.size(); + else + t = insts2.size(); + } + else + t = target->inst_num; + + switch ( target_slot ) { + case 1: inst->v1 = t; break; + case 2: inst->v2 = t; break; + case 3: inst->v3 = t; break; + case 4: inst->v4 = t; break; + + default: + reporter->InternalError("bad GoTo target"); + } + } + +void ZAMCompiler::SetV1(ZAMStmt s, const InstLabel l) + { + auto inst = insts1[s.stmt_num]; + SetTarget(inst, l, 1); + ASSERT(inst->op_type == OP_V || inst->op_type == OP_V_I1); + inst->op_type = OP_V_I1; + } + +void ZAMCompiler::SetV2(ZAMStmt s, const InstLabel l) + { + auto inst = insts1[s.stmt_num]; + SetTarget(inst, l, 2); + + auto& ot = inst->op_type; + + if ( ot == OP_VV ) + ot = OP_VV_I2; + + else if ( ot == OP_VC || ot == OP_VVC ) + ot = OP_VVC_I2; + + else + ASSERT(ot == OP_VV_I2 || ot == OP_VV_I1_I2 || ot == OP_VVC_I2); + } + +void ZAMCompiler::SetV3(ZAMStmt s, const InstLabel l) + { + auto inst = insts1[s.stmt_num]; + SetTarget(inst, l, 3); + + auto ot = inst->op_type; + + if ( ot == OP_VVV_I2_I3 || ot == OP_VVVC_I3 ) + return; + + ASSERT(ot == OP_VV || ot == OP_VVV || ot == OP_VVV_I3); + inst->op_type = OP_VVV_I3; + } + +void ZAMCompiler::SetV4(ZAMStmt s, const InstLabel l) + { + auto inst = insts1[s.stmt_num]; + SetTarget(inst, l, 4); + + auto ot = inst->op_type; + + ASSERT(ot == OP_VVVV || ot == OP_VVVV_I4); + if ( ot != OP_VVVV_I4 ) + inst->op_type = OP_VVVV_I4; + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/BuiltIn.cc b/src/script_opt/ZAM/BuiltIn.cc new file mode 100644 index 0000000000..7ee89f40e6 --- /dev/null +++ b/src/script_opt/ZAM/BuiltIn.cc @@ -0,0 +1,447 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// ZAM methods associated with instructions that replace calls to +// built-in functions. + +#include "zeek/Func.h" +#include "zeek/Reporter.h" +#include "zeek/script_opt/ZAM/Compile.h" + +namespace zeek::detail { + +bool ZAMCompiler::IsZAM_BuiltIn(const Expr* e) + { + // The expression e is either directly a call (in which case there's + // no return value), or an assignment to a call. + const CallExpr* c; + + if ( e->Tag() == EXPR_CALL ) + c = e->AsCallExpr(); + else + c = e->GetOp2()->AsCallExpr(); + + auto func_expr = c->Func(); + if ( func_expr->Tag() != EXPR_NAME ) + // An indirect call. + return false; + + auto func_val = func_expr->AsNameExpr()->Id()->GetVal(); + if ( ! func_val ) + // A call to a function that hasn't been defined. + return false; + + auto func = func_val->AsFunc(); + if ( func->GetKind() != BuiltinFunc::BUILTIN_FUNC ) + return false; + + auto& args = c->Args()->Exprs(); + + const NameExpr* n = nullptr; // name to assign to, if any + + if ( e->Tag() != EXPR_CALL ) + n = e->GetOp1()->AsRefExpr()->GetOp1()->AsNameExpr(); + + using GenBuiltIn = bool (ZAMCompiler::*)(const NameExpr* n, + const ExprPList& args); + static std::vector> builtins = { + { "Analyzer::__name", &ZAMCompiler::BuiltIn_Analyzer__name }, + { "Broker::__flush_logs", + &ZAMCompiler::BuiltIn_Broker__flush_logs }, + { "Files::__enable_reassembly", + &ZAMCompiler::BuiltIn_Files__enable_reassembly }, + { "Files::__set_reassembly_buffer", + &ZAMCompiler::BuiltIn_Files__set_reassembly_buffer }, + { "Log::__write", &ZAMCompiler::BuiltIn_Log__write }, + { "current_time", &ZAMCompiler::BuiltIn_current_time }, + { "get_port_transport_proto", + &ZAMCompiler::BuiltIn_get_port_etc }, + { "network_time", &ZAMCompiler::BuiltIn_network_time }, + { "reading_live_traffic", + &ZAMCompiler::BuiltIn_reading_live_traffic }, + { "reading_traces", &ZAMCompiler::BuiltIn_reading_traces }, + { "strstr", &ZAMCompiler::BuiltIn_strstr }, + { "sub_bytes", &ZAMCompiler::BuiltIn_sub_bytes }, + { "to_lower", &ZAMCompiler::BuiltIn_to_lower }, + }; + + for ( auto& b : builtins ) + if ( util::streq(func->Name(), b.first) ) + return (this->*(b.second))(n ,args); + + return false; + } + + +bool ZAMCompiler::BuiltIn_Analyzer__name(const NameExpr* n, + const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + if ( args[0]->Tag() == EXPR_CONST ) + // Doesn't seem worth developing a variant for this weird + // usage cast. + return false; + + int nslot = Frame1Slot(n, OP1_WRITE); + auto arg_t = args[0]->AsNameExpr(); + + auto z = ZInstI(OP_ANALYZER__NAME_VV, nslot, FrameSlot(arg_t)); + z.SetType(args[0]->GetType()); + + AddInst(z); + + return true; + } + +bool ZAMCompiler::BuiltIn_Broker__flush_logs(const NameExpr* n, + const ExprPList& args) + { + if ( n ) + AddInst(ZInstI(OP_BROKER_FLUSH_LOGS_V, + Frame1Slot(n, OP1_WRITE))); + else + AddInst(ZInstI(OP_BROKER_FLUSH_LOGS_X)); + + return true; + } + +bool ZAMCompiler::BuiltIn_Files__enable_reassembly(const NameExpr* n, + const ExprPList& args) + { + if ( n ) + // While this built-in nominally returns a value, existing + // script code ignores it, so for now we don't bother + // special-casing the possibility that it doesn't. + return false; + + if ( args[0]->Tag() == EXPR_CONST ) + // Weird! + return false; + + auto arg_f = args[0]->AsNameExpr(); + + AddInst(ZInstI(OP_FILES__ENABLE_REASSEMBLY_V, FrameSlot(arg_f))); + + return true; + } + +bool ZAMCompiler::BuiltIn_Files__set_reassembly_buffer(const NameExpr* n, + const ExprPList& args) + { + if ( n ) + // See above for enable_reassembly + return false; + + if ( args[0]->Tag() == EXPR_CONST ) + // Weird! + return false; + + auto arg_f = FrameSlot(args[0]->AsNameExpr()); + + ZInstI z; + + if ( args[1]->Tag() == EXPR_CONST ) + { + auto arg_cnt = args[1]->AsConstExpr()->Value()->AsCount(); + z = ZInstI(OP_FILES__SET_REASSEMBLY_BUFFER_VC, arg_f, arg_cnt); + z.op_type = OP_VV_I2; + } + else + z = ZInstI(OP_FILES__SET_REASSEMBLY_BUFFER_VV, arg_f, + FrameSlot(args[1]->AsNameExpr())); + + AddInst(z); + + return true; + } + +bool ZAMCompiler::BuiltIn_Log__write(const NameExpr* n, const ExprPList& args) + { + auto id = args[0]; + auto columns = args[1]; + + if ( columns->Tag() != EXPR_NAME ) + return false; + + auto columns_n = columns->AsNameExpr(); + auto col_slot = FrameSlot(columns_n); + + bool const_id = (id->Tag() == EXPR_CONST); + + ZInstAux* aux = nullptr; + + if ( const_id ) + { + aux = new ZInstAux(1); + aux->Add(0, id->AsConstExpr()->ValuePtr()); + } + + ZInstI z; + + if ( n ) + { + int nslot = Frame1Slot(n, OP1_WRITE); + if ( const_id ) + { + z = ZInstI(OP_LOG_WRITEC_VV, nslot, col_slot); + z.aux = aux; + } + else + z = ZInstI(OP_LOG_WRITE_VVV, nslot, + FrameSlot(id->AsNameExpr()), col_slot); + } + else + { + if ( const_id ) + { + z = ZInstI(OP_LOG_WRITEC_V, col_slot, id->AsConstExpr()); + z.aux = aux; + } + else + z = ZInstI(OP_LOG_WRITE_VV, FrameSlot(id->AsNameExpr()), + col_slot); + } + + z.SetType(columns_n->GetType()); + + AddInst(z); + + return true; + } + +bool ZAMCompiler::BuiltIn_current_time(const NameExpr* n, const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + int nslot = Frame1Slot(n, OP1_WRITE); + + AddInst(ZInstI(OP_CURRENT_TIME_V, nslot)); + + return true; + } + +bool ZAMCompiler::BuiltIn_get_port_etc(const NameExpr* n, const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + auto p = args[0]; + + if ( p->Tag() != EXPR_NAME ) + return false; + + auto pn = p->AsNameExpr(); + int nslot = Frame1Slot(n, OP1_WRITE); + + AddInst(ZInstI(OP_GET_PORT_TRANSPORT_PROTO_VV, nslot, FrameSlot(pn))); + + return true; + } + +bool ZAMCompiler::BuiltIn_network_time(const NameExpr* n, const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + int nslot = Frame1Slot(n, OP1_WRITE); + + AddInst(ZInstI(OP_NETWORK_TIME_V, nslot)); + + return true; + } + +bool ZAMCompiler::BuiltIn_reading_live_traffic(const NameExpr* n, + const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + int nslot = Frame1Slot(n, OP1_WRITE); + + AddInst(ZInstI(OP_READING_LIVE_TRAFFIC_V, nslot)); + + return true; + } + +bool ZAMCompiler::BuiltIn_reading_traces(const NameExpr* n, + const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + int nslot = Frame1Slot(n, OP1_WRITE); + + AddInst(ZInstI(OP_READING_TRACES_V, nslot)); + + return true; + } + +bool ZAMCompiler::BuiltIn_strstr(const NameExpr* n, const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + auto big = args[0]; + auto little = args[1]; + + auto big_n = big->Tag() == EXPR_NAME ? big->AsNameExpr() : nullptr; + auto little_n = little->Tag() == EXPR_NAME ? + little->AsNameExpr() : nullptr; + + ZInstI z; + + if ( big_n && little_n ) + z = GenInst(OP_STRSTR_VVV, n, big_n, little_n); + else if ( big_n ) + z = GenInst(OP_STRSTR_VVC, n, big_n, little->AsConstExpr()); + else if ( little_n ) + z = GenInst(OP_STRSTR_VCV, n, little_n, big->AsConstExpr()); + else + return false; + + AddInst(z); + + return true; + } + +bool ZAMCompiler::BuiltIn_sub_bytes(const NameExpr* n, const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + auto arg_s = args[0]; + auto arg_start = args[1]; + auto arg_n = args[2]; + + int nslot = Frame1Slot(n, OP1_WRITE); + + int v2 = FrameSlotIfName(arg_s); + int v3 = ConvertToCount(arg_start); + int v4 = ConvertToInt(arg_n); + + auto c = arg_s->Tag() == EXPR_CONST ? arg_s->AsConstExpr() : nullptr; + + ZInstI z; + + switch ( ConstArgsMask(args, 3) ) { + case 0x0: // all variable + z = ZInstI(OP_SUB_BYTES_VVVV, nslot, v2, v3, v4); + z.op_type = OP_VVVV; + break; + + case 0x1: // last argument a constant + z = ZInstI(OP_SUB_BYTES_VVVi, nslot, v2, v3, v4); + z.op_type = OP_VVVV_I4; + break; + + case 0x2: // 2nd argument a constant; flip! + z = ZInstI(OP_SUB_BYTES_VViV, nslot, v2, v4, v3); + z.op_type = OP_VVVV_I4; + break; + + case 0x3: // both 2nd and third are constants + z = ZInstI(OP_SUB_BYTES_VVii, nslot, v2, v3, v4); + z.op_type = OP_VVVV_I3_I4; + break; + + case 0x4: // first argument a constant + z = ZInstI(OP_SUB_BYTES_VVVC, nslot, v3, v4, c); + z.op_type = OP_VVVC; + break; + + case 0x5: // first and third constant + z = ZInstI(OP_SUB_BYTES_VViC, nslot, v3, v4, c); + z.op_type = OP_VVVC_I3; + break; + + case 0x6: // first and second constant - flip! + z = ZInstI(OP_SUB_BYTES_ViVC, nslot, v4, v3, c); + z.op_type = OP_VVVC_I3; + break; + + case 0x7: // whole shebang + z = ZInstI(OP_SUB_BYTES_ViiC, nslot, v3, v4, c); + z.op_type = OP_VVVC_I2_I3; + break; + + default: + reporter->InternalError("bad constant mask"); + } + + AddInst(z); + + return true; + } + +bool ZAMCompiler::BuiltIn_to_lower(const NameExpr* n, const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + int nslot = Frame1Slot(n, OP1_WRITE); + + if ( args[0]->Tag() == EXPR_CONST ) + { + auto arg_c = args[0]->AsConstExpr()->Value()->AsStringVal(); + ValPtr arg_lc = {AdoptRef{}, ZAM_to_lower(arg_c)}; + auto arg_lce = make_intrusive(arg_lc); + auto z = ZInstI(OP_ASSIGN_CONST_VC, nslot, arg_lce.get()); + z.is_managed = true; + AddInst(z); + } + + else + { + auto arg_s = args[0]->AsNameExpr(); + AddInst(ZInstI(OP_TO_LOWER_VV, nslot, FrameSlot(arg_s))); + } + + return true; + } + +bro_uint_t ZAMCompiler::ConstArgsMask(const ExprPList& args, int nargs) const + { + ASSERT(args.length() == nargs); + + bro_uint_t mask = 0; + + for ( int i = 0; i < nargs; ++i ) + { + mask <<= 1; + if ( args[i]->Tag() == EXPR_CONST ) + mask |= 1; + } + + return mask; + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/BuiltIn.h b/src/script_opt/ZAM/BuiltIn.h new file mode 100644 index 0000000000..08467dcd95 --- /dev/null +++ b/src/script_opt/ZAM/BuiltIn.h @@ -0,0 +1,27 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// ZAM compiler method declarations for built-in functions. +// +// This file is only included by ZAM.h, in the context of the ZAM class +// declaration (so these are methods, not standalone functions). We maintain +// it separately so that the conceptual overhead of adding a new built-in +// is lower. + +// If the given expression corresponds to a call to a ZAM built-in, +// then compiles the call and returns true. Otherwise, returns false. +bool IsZAM_BuiltIn(const Expr* e); + +// Built-ins return true if able to compile the call, false if not. +bool BuiltIn_Analyzer__name(const NameExpr* n, const ExprPList& args); +bool BuiltIn_Broker__flush_logs(const NameExpr* n, const ExprPList& args); +bool BuiltIn_Files__enable_reassembly(const NameExpr* n, const ExprPList& args); +bool BuiltIn_Files__set_reassembly_buffer(const NameExpr* n, const ExprPList& args); +bool BuiltIn_Log__write(const NameExpr* n, const ExprPList& args); +bool BuiltIn_current_time(const NameExpr* n, const ExprPList& args); +bool BuiltIn_get_port_etc(const NameExpr* n, const ExprPList& args); +bool BuiltIn_network_time(const NameExpr* n, const ExprPList& args); +bool BuiltIn_reading_live_traffic(const NameExpr* n, const ExprPList& args); +bool BuiltIn_reading_traces(const NameExpr* n, const ExprPList& args); +bool BuiltIn_strstr(const NameExpr* n, const ExprPList& args); +bool BuiltIn_sub_bytes(const NameExpr* n, const ExprPList& args); +bool BuiltIn_to_lower(const NameExpr* n, const ExprPList& args); diff --git a/src/script_opt/ZAM/Compile.h b/src/script_opt/ZAM/Compile.h new file mode 100644 index 0000000000..4b99f96619 --- /dev/null +++ b/src/script_opt/ZAM/Compile.h @@ -0,0 +1,639 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// ZAM: Zeek Abstract Machine compiler. + +#pragma once + +#include "zeek/Event.h" +#include "zeek/script_opt/UseDefs.h" +#include "zeek/script_opt/ZAM/ZBody.h" + +namespace zeek { +class EventHandler; +} + +namespace zeek::detail { + +class NameExpr; +class ConstExpr; +class FieldExpr; +class ListExpr; + +class Stmt; +class SwitchStmt; +class CatchReturnStmt; + +class ProfileFunc; + +typedef ZInstI* InstLabel; + +// Class representing a single compiled statement. (This is different from, +// but related to, the ZAM instruction(s) generated for that compilation.) +// Designed to be fully opaque, but also effective without requiring pointer +// management. +class ZAMStmt { +protected: + friend class ZAMCompiler; + + ZAMStmt() { stmt_num = -1; /* flag that it needs to be set */ } + ZAMStmt(int _stmt_num) { stmt_num = _stmt_num; } + + int stmt_num; +}; + +// Class that holds values that only have meaning to the ZAM compiler, +// but that needs to be held (opaquely, via a pointer) by external +// objects. +class OpaqueVals { +public: + OpaqueVals(ZInstAux* _aux) { aux = _aux; } + + ZInstAux* aux; +}; + +class ZAMCompiler { +public: + ZAMCompiler(ScriptFunc* f, std::shared_ptr pf, + ScopePtr scope, StmtPtr body, std::shared_ptr ud, + std::shared_ptr rd); + + StmtPtr CompileBody(); + + const FrameReMap& FrameDenizens() const + { return shared_frame_denizens_final; } + + const std::vector& ManagedSlots() const + { return managed_slotsI; } + + const std::vector& Globals() const + { return globalsI; } + + bool NonRecursive() const { return non_recursive; } + + const TableIterVec& GetTableIters() const { return table_iters; } + int NumStepIters() const { return num_step_iters; } + + template + const CaseMaps& GetCases() const + { + if constexpr ( std::is_same_v ) + return int_cases; + else if constexpr ( std::is_same_v ) + return uint_cases; + else if constexpr ( std::is_same_v ) + return double_cases; + else if constexpr ( std::is_same_v ) + return str_cases; + } + + void Dump(); + +private: + void Init(); + void InitGlobals(); + void InitArgs(); + void InitLocals(); + void TrackMemoryManagement(); + + void ResolveHookBreaks(); + void ComputeLoopLevels(); + void AdjustBranches(); + void RetargetBranches(); + void RemapFrameDenizens(const std::vector& inst1_to_inst2); + void CreateSharedFrameDenizens(); + void ConcretizeSwitches(); + + // The following are used for switch statements, mapping the + // switch value (which can be any atomic type) to a branch target. + // We have vectors of them because functions can contain multiple + // switches. + // See ZBody.h for their concrete counterparts, which we've + // already #include'd. + template using CaseMapI = std::map; + template using CaseMapsI = std::vector>; + + template + void ConcretizeSwitchTables(const CaseMapsI& abstract_cases, + CaseMaps& concrete_cases); + + template + void DumpCases(const T& cases, const char* type_name) const; + void DumpInsts1(const FrameReMap* remappings); + +#include "zeek/ZAM-MethodDecls.h" + + const ZAMStmt CompileStmt(const StmtPtr& body) + { return CompileStmt(body.get()); } + const ZAMStmt CompileStmt(const Stmt* body); + + void SetCurrStmt(const Stmt* stmt) { curr_stmt = stmt; } + + const ZAMStmt CompilePrint(const PrintStmt* ps); + const ZAMStmt CompileExpr(const ExprStmt* es); + const ZAMStmt CompileIf(const IfStmt* is); + const ZAMStmt CompileSwitch(const SwitchStmt* sw); + const ZAMStmt CompileAdd(const AddStmt* as); + const ZAMStmt CompileDel(const DelStmt* ds); + const ZAMStmt CompileWhile(const WhileStmt* ws); + const ZAMStmt CompileFor(const ForStmt* f); + const ZAMStmt CompileReturn(const ReturnStmt* r); + const ZAMStmt CompileCatchReturn(const CatchReturnStmt* cr); + const ZAMStmt CompileStmts(const StmtList* sl); + const ZAMStmt CompileInit(const InitStmt* is); + const ZAMStmt CompileWhen(const WhenStmt* ws); + + const ZAMStmt CompileNext() + { return GenGoTo(nexts.back()); } + const ZAMStmt CompileBreak() + { return GenGoTo(breaks.back()); } + const ZAMStmt CompileFallThrough() + { return GenGoTo(fallthroughs.back()); } + const ZAMStmt CompileCatchReturn() + { return GenGoTo(catches.back()); } + + const ZAMStmt IfElse(const Expr* e, const Stmt* s1, const Stmt* s2); + const ZAMStmt While(const Stmt* cond_stmt, const Expr* cond, + const Stmt* body); + + const ZAMStmt InitRecord(IDPtr id, RecordType* rt); + const ZAMStmt InitVector(IDPtr id, VectorType* vt); + const ZAMStmt InitTable(IDPtr id, TableType* tt, Attributes* attrs); + + const ZAMStmt ValueSwitch(const SwitchStmt* sw, const NameExpr* v, + const ConstExpr* c); + const ZAMStmt TypeSwitch(const SwitchStmt* sw, const NameExpr* v, + const ConstExpr* c); + + void PushNexts() { PushGoTos(nexts); } + void PushBreaks() { PushGoTos(breaks); } + void PushFallThroughs() { PushGoTos(fallthroughs); } + void PushCatchReturns() { PushGoTos(catches); } + + void ResolveNexts(const InstLabel l) + { ResolveGoTos(nexts, l); } + void ResolveBreaks(const InstLabel l) + { ResolveGoTos(breaks, l); } + void ResolveFallThroughs(const InstLabel l) + { ResolveGoTos(fallthroughs, l); } + void ResolveCatchReturns(const InstLabel l) + { ResolveGoTos(catches, l); } + + + const ZAMStmt LoopOverTable(const ForStmt* f, const NameExpr* val); + const ZAMStmt LoopOverVector(const ForStmt* f, const NameExpr* val); + const ZAMStmt LoopOverString(const ForStmt* f, const Expr* e); + + const ZAMStmt FinishLoop(const ZAMStmt iter_head, ZInstI iter_stmt, + const Stmt* body, int iter_slot, + bool is_table); + + const ZAMStmt Loop(const Stmt* body); + + + const ZAMStmt CompileExpr(const ExprPtr& e) + { return CompileExpr(e.get()); } + const ZAMStmt CompileExpr(const Expr* body); + + const ZAMStmt CompileIncrExpr(const IncrExpr* e); + const ZAMStmt CompileAppendToExpr(const AppendToExpr* e); + const ZAMStmt CompileAssignExpr(const AssignExpr* e); + const ZAMStmt CompileAssignToIndex(const NameExpr* lhs, + const IndexExpr* rhs); + const ZAMStmt CompileFieldLHSAssignExpr(const FieldLHSAssignExpr* e); + const ZAMStmt CompileScheduleExpr(const ScheduleExpr* e); + const ZAMStmt CompileSchedule(const NameExpr* n, const ConstExpr* c, + int is_interval, EventHandler* h, + const ListExpr* l); + const ZAMStmt CompileEvent(EventHandler* h, const ListExpr* l); + + const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, + const NameExpr* n3) + { return CompileInExpr(n1, n2, nullptr, n3, nullptr); } + + const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, + const ConstExpr* c) + { return CompileInExpr(n1, n2, nullptr, nullptr, c); } + + const ZAMStmt CompileInExpr(const NameExpr* n1, const ConstExpr* c, + const NameExpr* n3) + { return CompileInExpr(n1, nullptr, c, n3, nullptr); } + + // In the following, one of n2 or c2 (likewise, n3/c3) will be nil. + const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2, + const ConstExpr* c2, const NameExpr* n3, + const ConstExpr* c3); + + const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l, + const NameExpr* n2) + { return CompileInExpr(n1, l, n2, nullptr); } + + const ZAMStmt CompileInExpr(const NameExpr* n, const ListExpr* l, + const ConstExpr* c) + { return CompileInExpr(n, l, nullptr, c); } + + const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l, + const NameExpr* n2, const ConstExpr* c); + + + const ZAMStmt CompileIndex(const NameExpr* n1, const NameExpr* n2, + const ListExpr* l); + const ZAMStmt CompileIndex(const NameExpr* n1, const ConstExpr* c, + const ListExpr* l); + const ZAMStmt CompileIndex(const NameExpr* n1, int n2_slot, + const TypePtr& n2_type, const ListExpr* l); + + // Second argument is which instruction slot holds the branch target. + const ZAMStmt GenCond(const Expr* e, int& branch_v); + + const ZAMStmt Call(const ExprStmt* e); + const ZAMStmt AssignToCall(const ExprStmt* e); + const ZAMStmt DoCall(const CallExpr* c, const NameExpr* n); + + const ZAMStmt AssignVecElems(const Expr* e); + const ZAMStmt AssignTableElem(const Expr* e); + + const ZAMStmt AppendToField(const NameExpr* n1, const NameExpr* n2, + const ConstExpr* c, int offset); + + const ZAMStmt ConstructTable(const NameExpr* n, const Expr* e); + const ZAMStmt ConstructSet(const NameExpr* n, const Expr* e); + const ZAMStmt ConstructRecord(const NameExpr* n, const Expr* e); + const ZAMStmt ConstructVector(const NameExpr* n, const Expr* e); + + const ZAMStmt ArithCoerce(const NameExpr* n, const Expr* e); + const ZAMStmt RecordCoerce(const NameExpr* n, const Expr* e); + const ZAMStmt TableCoerce(const NameExpr* n, const Expr* e); + const ZAMStmt VectorCoerce(const NameExpr* n, const Expr* e); + + const ZAMStmt Is(const NameExpr* n, const Expr* e); + + +#include "zeek/script_opt/ZAM/Inst-Gen.h" +#include "zeek/script_opt/ZAM/BuiltIn.h" + + // A bit weird, but handy for switch statements used in built-in + // operations: returns a bit mask of which of the arguments in the + // given list correspond to constants, with the high-ordered bit + // being the first argument (argument "0" in the list) and the + // low-ordered bit being the last. Second parameter is the number + // of arguments that should be present. + bro_uint_t ConstArgsMask(const ExprPList& args, int nargs) const; + + int ConvertToInt(const Expr* e) + { + if ( e->Tag() == EXPR_NAME ) + return FrameSlot(e->AsNameExpr()->Id()); + else + return e->AsConstExpr()->Value()->AsInt(); + } + + int ConvertToCount(const Expr* e) + { + if ( e->Tag() == EXPR_NAME ) + return FrameSlot(e->AsNameExpr()->Id()); + else + return e->AsConstExpr()->Value()->AsCount(); + } + + + typedef std::vector GoToSet; + typedef std::vector GoToSets; + + void PushGoTos(GoToSets& gotos); + void ResolveGoTos(GoToSets& gotos, const InstLabel l); + + ZAMStmt GenGoTo(GoToSet& v); + ZAMStmt GoToStub(); + ZAMStmt GoTo(const InstLabel l); + InstLabel GoToTarget(const ZAMStmt s); + InstLabel GoToTargetBeyond(const ZAMStmt s); + + void SetTarget(ZInstI* inst, const InstLabel l, int slot); + + // Given a GoTo target, find its live equivalent (first instruction + // at that location or beyond that's live). + ZInstI* FindLiveTarget(ZInstI* goto_target); + + // Given an instruction that has a slot associated with the + // given target, updates the slot to correspond with the current + // instruction number of the target. + void ConcretizeBranch(ZInstI* inst, ZInstI* target, int target_slot); + + void SetV(ZAMStmt s, const InstLabel l, int v) + { + if ( v == 1 ) + SetV1(s, l); + else if ( v == 2 ) + SetV2(s, l); + else if ( v == 3 ) + SetV3(s, l); + else + SetV4(s, l); + } + + void SetV1(ZAMStmt s, const InstLabel l); + void SetV2(ZAMStmt s, const InstLabel l); + void SetV3(ZAMStmt s, const InstLabel l); + void SetV4(ZAMStmt s, const InstLabel l); + void SetGoTo(ZAMStmt s, const InstLabel targ) + { SetV1(s, targ); } + + + const ZAMStmt StartingBlock(); + const ZAMStmt FinishBlock(const ZAMStmt start); + + bool NullStmtOK() const; + + const ZAMStmt EmptyStmt(); + const ZAMStmt ErrorStmt(); + const ZAMStmt LastInst(); + + // Returns a handle to state associated with building + // up a list of values. + OpaqueVals* BuildVals(const ListExprPtr&); + + // "stride" is how many slots each element of l will consume. + ZInstAux* InternalBuildVals(const ListExpr* l, int stride = 1); + + // Returns how many values were added. + int InternalAddVal(ZInstAux* zi, int i, Expr* e); + + const ZAMStmt AddInst(const ZInstI& inst); + + // Returns the statement just before the given one. + ZAMStmt PrevStmt(const ZAMStmt s); + + // Returns the last (interpreter) statement in the body. + const Stmt* LastStmt(const Stmt* s) const; + + // Returns the most recent added instruction *other* than those + // added for bookkeeping. + ZInstI* TopMainInst() { return insts1[top_main_inst]; } + + + bool IsUnused(const IDPtr& id, const Stmt* where) const; + + void LoadParam(ID* id); + const ZAMStmt LoadGlobal(ID* id); + + int AddToFrame(ID*); + + int FrameSlot(const IDPtr& id) { return FrameSlot(id.get()); } + int FrameSlot(const ID* id); + int FrameSlotIfName(const Expr* e) + { + auto n = e->Tag() == EXPR_NAME ? e->AsNameExpr() : nullptr; + return n ? FrameSlot(n->Id()) : 0; + } + + int FrameSlot(const NameExpr* id) + { return FrameSlot(id->AsNameExpr()->Id()); } + int Frame1Slot(const NameExpr* id, ZOp op) + { return Frame1Slot(id->AsNameExpr()->Id(), op); } + + int Frame1Slot(const ID* id, ZOp op) + { return Frame1Slot(id, op1_flavor[op]); } + int Frame1Slot(const NameExpr* n, ZAMOp1Flavor fl) + { return Frame1Slot(n->Id(), fl); } + int Frame1Slot(const ID* id, ZAMOp1Flavor fl); + + // The slot without doing any global-related checking. + int RawSlot(const NameExpr* n) { return RawSlot(n->Id()); } + int RawSlot(const ID* id); + + bool HasFrameSlot(const ID* id) const; + + int NewSlot(const TypePtr& t) + { return NewSlot(ZVal::IsManagedType(t)); } + int NewSlot(bool is_managed); + + int TempForConst(const ConstExpr* c); + + //////////////////////////////////////////////////////////// + // The following methods relate to optimizing the low-level + // ZAM function body after it is initially generated. They're + // factored out into ZOpt.cc since they're structurally quite + // different from the methods above that relate to the initial + // compilation. + + // Optimizing the low-level compiled instructions. + void OptimizeInsts(); + + // Tracks which instructions can be branched to via the given + // set of switches. + template + void TallySwitchTargets(const CaseMapsI& switches); + + // Remove code that can't be reached. True if some removal happened. + bool RemoveDeadCode(); + + // Collapse chains of gotos. True if some something changed. + bool CollapseGoTos(); + + // Prune statements that are unnecessary. True if something got + // pruned. + bool PruneUnused(); + + // For the current state of insts1, compute lifetimes of frame + // denizens (variable(s) using a given frame slot) in terms of + // first-instruction-to-last-instruction during which they're + // relevant, including consideration for loops. + void ComputeFrameLifetimes(); + + // Given final frame lifetime information, remaps frame members + // with non-overlapping lifetimes to share slots. + void ReMapFrame(); + + // Given final frame lifetime information, remaps slots in the + // interpreter frame. (No longer strictly necessary.) + void ReMapInterpreterFrame(); + + // Computes the remapping for a variable currently in the given slot, + // whose scope begins at the given instruction. + void ReMapVar(ID* id, int slot, int inst); + + // Look to initialize the beginning of local lifetime based on slot + // assignment at instruction inst. + void CheckSlotAssignment(int slot, const ZInstI* inst); + + // Track that a local's lifetime begins at the given statement. + void SetLifetimeStart(int slot, const ZInstI* inst); + + // Look for extension of local lifetime based on slot usage + // at instruction inst. + void CheckSlotUse(int slot, const ZInstI* inst); + + // Extend (or create) the end of a local's lifetime. + void ExtendLifetime(int slot, const ZInstI* inst); + + // Returns the (live) instruction at the beginning/end of the loop(s) + // within which the given instruction lies; or that instruction + // itself if it's not inside a loop. The second argument specifies + // the loop depth. For example, a value of '2' means "extend to + // the beginning/end of any loop(s) of depth >= 2". + const ZInstI* BeginningOfLoop(const ZInstI* inst, int depth) const; + const ZInstI* EndOfLoop(const ZInstI* inst, int depth) const; + + // True if any statement other than a frame sync assigns to the + // given slot. + bool VarIsAssigned(int slot) const; + + // True if the given statement assigns to the given slot, and + // it's not a frame sync. + bool VarIsAssigned(int slot, const ZInstI* i) const; + + // True if any statement other than a frame sync uses the given slot. + bool VarIsUsed(int slot) const; + + // Find the first non-dead instruction after i (inclusive). + // If follow_gotos is true, then if that instruction is + // an unconditional branch, continues the process until + // a different instruction is found (and report if there + // are infinite loops). + // + // First form returns nil if there's nothing live after i. + // Second form returns insts1.size() in that case. + ZInstI* FirstLiveInst(ZInstI* i, bool follow_gotos = false); + int FirstLiveInst(int i, bool follow_gotos = false); + + // Same, but not including i. + ZInstI* NextLiveInst(ZInstI* i, bool follow_gotos = false) + { + if ( i->inst_num == insts1.size() - 1 ) + return nullptr; + return FirstLiveInst(insts1[i->inst_num + 1], follow_gotos); + } + int NextLiveInst(int i, bool follow_gotos = false) + { return FirstLiveInst(i + 1, follow_gotos); } + + // Mark an instruction as unnecessary and remove its influence on + // other statements. The instruction is indicated as an offset + // into insts1; any labels associated with it are transferred + // to its next live successor, if any. + void KillInst(ZInstI* i) { KillInst(i->inst_num); } + void KillInst(int i); + + // The same, but kills any successor instructions until finding + // one that's labeled. + void KillInsts(ZInstI* i) { KillInsts(i->inst_num); } + void KillInsts(int i); + + // The first of these is used as we compile down to ZInstI's. + // The second is the final intermediary code. They're separate + // to make it easy to remove dead code. + std::vector insts1; + std::vector insts2; + + // Used as a placeholder when we have to generate a GoTo target + // beyond the end of what we've compiled so far. + ZInstI* pending_inst = nullptr; + + // Indices of break/next/fallthrough/catch-return goto's, so they + // can be patched up post-facto. These are vectors-of-vectors + // so that nesting works properly. + GoToSets breaks; + GoToSets nexts; + GoToSets fallthroughs; + GoToSets catches; + + // The following tracks return variables for catch-returns. + // Can be nil if the usage doesn't include using the return value + // (and/or no return value generated). + std::vector retvars; + + ScriptFunc* func; + std::shared_ptr pf; + ScopePtr scope; + StmtPtr body; + std::shared_ptr ud; + std::shared_ptr reducer; + + // Maps identifiers to their (unique) frame location. + std::unordered_map frame_layout1; + + // Inverse mapping, used for tracking frame usage (and for dumping + // statements). + FrameMap frame_denizens; + + // The same, but for remapping identifiers to shared frame slots. + FrameReMap shared_frame_denizens; + + // The same, but renumbered to take into account removal of + // dead statements. + FrameReMap shared_frame_denizens_final; + + // Maps frame1 slots to frame2 slots. A value < 0 means the + // variable doesn't exist in frame2 - it's an error to encounter + // one of these when remapping instructions! + std::vector frame1_to_frame2; + + // A type for mapping an instruction to a set of locals associated + // with it. + typedef std::unordered_map> + AssociatedLocals; + + // Maps (live) instructions to which frame denizens begin their + // lifetime via an initialization at that instruction, if any ... + // (it can be more than one local due to extending lifetimes to + // span loop bodies) + AssociatedLocals inst_beginnings; + + // ... and which frame denizens had their last usage at the + // given instruction. (These are insts1 instructions, prior to + // removing dead instructions, compressing the frames, etc.) + AssociatedLocals inst_endings; + + // A type for inverse mappings. + typedef std::unordered_map AssociatedInsts; + + // Inverse mappings: for a given frame denizen's slot, where its + // lifetime begins and ends. + AssociatedInsts denizen_beginning; + AssociatedInsts denizen_ending; + + // In the following, member variables ending in 'I' are intermediary + // values that get finalized when constructing the corresponding + // ZBody. + std::vector globalsI; + std::unordered_map global_id_to_info; // inverse + + // Intermediary switch tables (branching to ZInst's rather + // than concrete instruction offsets). + CaseMapsI int_casesI; + CaseMapsI uint_casesI; + CaseMapsI double_casesI; + + // Note, we use this not only for strings but for addresses + // and prefixes. + CaseMapsI str_casesI; + + // Same, but for the concretized versions. + CaseMaps int_cases; + CaseMaps uint_cases; + CaseMaps double_cases; + CaseMaps str_cases; + + std::vector managed_slotsI; + + int frame_sizeI; + + TableIterVec table_iters; + int num_step_iters = 0; + + bool non_recursive = false; + + // Most recent instruction, other than for housekeeping. + int top_main_inst; + + // Used for communication between Frame1Slot and a subsequent + // AddInst. If >= 0, then upon adding the next instruction, + // it should be followed by Store-Global for the given slot. + int pending_global_store = -1; +}; + + +// Invokes after compiling all of the function bodies. +class FuncInfo; +extern void finalize_functions(const std::vector& funcs); + +} // namespace zeek::detail diff --git a/src/script_opt/ZAM/Driver.cc b/src/script_opt/ZAM/Driver.cc new file mode 100644 index 0000000000..9b61927e1b --- /dev/null +++ b/src/script_opt/ZAM/Driver.cc @@ -0,0 +1,503 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Driver (and other high-level) methods for ZAM compilation. + +#include "zeek/CompHash.h" +#include "zeek/RE.h" +#include "zeek/Frame.h" +#include "zeek/module_util.h" +#include "zeek/Scope.h" +#include "zeek/Reporter.h" +#include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/ZAM/Compile.h" + + +namespace zeek::detail { + + +ZAMCompiler::ZAMCompiler(ScriptFunc* f, std::shared_ptr _pf, + ScopePtr _scope, StmtPtr _body, + std::shared_ptr _ud, + std::shared_ptr _rd) + { + func = f; + pf = std::move(_pf); + scope = std::move(_scope); + body = std::move(_body); + ud = std::move(_ud); + reducer = std::move(_rd); + frame_sizeI = 0; + + Init(); + } + +void ZAMCompiler::Init() + { + InitGlobals(); + InitArgs(); + InitLocals(); + +#if 0 + // Complain about unused aggregates ... but not if we're inlining, + // as that can lead to optimizations where they wind up being unused + // but the original logic for using them was sound. + if ( ! analysis_options.inliner ) + for ( auto a : pf->Inits() ) + { + if ( pf->Locals().find(a) == pf->Locals().end() ) + reporter->Warning("%s unused", a->Name()); + } +#endif + + TrackMemoryManagement(); + + non_recursive = non_recursive_funcs.count(func) > 0; + } + +void ZAMCompiler::InitGlobals() + { + for ( auto g : pf->Globals() ) + { + auto non_const_g = const_cast(g); + + GlobalInfo info; + info.id = {NewRef{}, non_const_g}; + info.slot = AddToFrame(non_const_g); + global_id_to_info[non_const_g] = globalsI.size(); + globalsI.push_back(info); + } + } + +void ZAMCompiler::InitArgs() + { + auto uds = ud->HasUsage(body.get()) ? ud->GetUsage(body.get()) : + nullptr; + + auto args = scope->OrderedVars(); + int nparam = func->GetType()->Params()->NumFields(); + + push_existing_scope(scope); + + for ( auto a : args ) + { + if ( --nparam < 0 ) + break; + + auto arg_id = a.get(); + if ( uds && uds->HasID(arg_id) ) + LoadParam(arg_id); + else + { + // printf("param %s unused\n", obj_desc(arg_id.get())); + } + } + + pop_scope(); + } + +void ZAMCompiler::InitLocals() + { + // Assign slots for locals (which includes temporaries). + for ( auto l : pf->Locals() ) + { + auto non_const_l = const_cast(l); + // ### should check for unused variables. + // Don't add locals that were already added because they're + // parameters. + if ( ! HasFrameSlot(non_const_l) ) + (void) AddToFrame(non_const_l); + } + } + +void ZAMCompiler::TrackMemoryManagement() + { + for ( auto& slot : frame_layout1 ) + { + // Look for locals with values of types for which + // we do explicit memory management on (re)assignment. + auto t = slot.first->GetType(); + if ( ZVal::IsManagedType(t) ) + managed_slotsI.push_back(slot.second); + } + } + +StmtPtr ZAMCompiler::CompileBody() + { + curr_stmt = nullptr; + + if ( func->Flavor() == FUNC_FLAVOR_HOOK ) + PushBreaks(); + + (void) CompileStmt(body); + + if ( reporter->Errors() > 0 ) + return nullptr; + + ResolveHookBreaks(); + + if ( nexts.size() > 0 ) + reporter->Error("\"next\" used without an enclosing \"for\""); + + if ( fallthroughs.size() > 0 ) + reporter->Error("\"fallthrough\" used without an enclosing \"switch\""); + + if ( catches.size() > 0 ) + reporter->InternalError("untargeted inline return"); + + // Make sure we have a (pseudo-)instruction at the end so we + // can use it as a branch label. + if ( ! pending_inst ) + pending_inst = new ZInstI(); + + // Concretize instruction numbers in inst1 so we can + // easily move through the code. + for ( auto i = 0U; i < insts1.size(); ++i ) + insts1[i]->inst_num = i; + + ComputeLoopLevels(); + + if ( ! analysis_options.no_ZAM_opt ) + OptimizeInsts(); + + AdjustBranches(); + + // Construct the final program with the dead code eliminated + // and branches resolved. + + // Make sure we don't include the empty pending-instruction, if any. + if ( pending_inst ) + pending_inst->live = false; + + // Maps inst1 instructions to where they are in inst2. + // Dead instructions map to -1. + std::vector inst1_to_inst2; + + for ( auto i = 0U; i < insts1.size(); ++i ) + { + if ( insts1[i]->live ) + { + inst1_to_inst2.push_back(insts2.size()); + insts2.push_back(insts1[i]); + } + else + inst1_to_inst2.push_back(-1); + } + + // Re-concretize instruction numbers, and concretize GoTo's. + for ( auto i = 0U; i < insts2.size(); ++i ) + insts2[i]->inst_num = i; + + RetargetBranches(); + + // If we have remapped frame denizens, update them. If not, + // create them. + if ( shared_frame_denizens.size() > 0 ) + RemapFrameDenizens(inst1_to_inst2); + + else + CreateSharedFrameDenizens(); + + delete pending_inst; + + ConcretizeSwitches(); + + // Could erase insts1 here to recover memory, but it's handy + // for debugging. + +#if 0 + if ( non_recursive ) + func->UseStaticFrame(); +#endif + + auto zb = make_intrusive(func->Name(), this); + zb->SetInsts(insts2); + + return zb; + } + +void ZAMCompiler::ResolveHookBreaks() + { + if ( breaks.size() > 0 ) + { + ASSERT(breaks.size() == 1); + + if ( func->Flavor() == FUNC_FLAVOR_HOOK ) + { + // Rewrite the breaks. + for ( auto& b : breaks[0] ) + { + auto& i = insts1[b.stmt_num]; + delete i; + i = new ZInstI(OP_HOOK_BREAK_X); + } + } + + else + reporter->Error("\"break\" used without an enclosing \"for\" or \"switch\""); + } + } + +void ZAMCompiler::ComputeLoopLevels() + { + // Compute which instructions are inside loops. + for ( auto i = 0; i < int(insts1.size()); ++i ) + { + auto inst = insts1[i]; + + auto t = inst->target; + if ( ! t || t == pending_inst ) + continue; + + if ( t->inst_num < i ) + { + auto j = t->inst_num; + + if ( ! t->loop_start ) + { + // Loop is newly discovered. + t->loop_start = true; + } + else + { + // We're extending an existing loop. Find + // its current end. + auto depth = t->loop_depth; + while ( j < i && + insts1[j]->loop_depth == depth ) + ++j; + + ASSERT(insts1[j]->loop_depth == depth - 1); + } + + // Run from j's current position to i, bumping + // the loop depth. + while ( j <= i ) + { + ++insts1[j]->loop_depth; + ++j; + } + } + } + } + +void ZAMCompiler::AdjustBranches() + { + // Move branches to dead code forward to their successor live code. + for ( auto i = 0U; i < insts1.size(); ++i ) + { + auto inst = insts1[i]; + if ( ! inst->live ) + continue; + + auto t = inst->target; + + if ( ! t ) + continue; + + inst->target = FindLiveTarget(t); + } + } + +void ZAMCompiler::RetargetBranches() + { + for ( auto i = 0U; i < insts2.size(); ++i ) + { + auto inst = insts2[i]; + if ( ! inst->target ) + continue; + + ConcretizeBranch(inst, inst->target, inst->target_slot); + } + } + +void ZAMCompiler::RemapFrameDenizens(const std::vector& inst1_to_inst2) + { + for ( auto i = 0U; i < shared_frame_denizens.size(); ++i ) + { + auto& info = shared_frame_denizens[i]; + + for ( auto& start : info.id_start ) + { + // It can happen that the identifier's + // origination instruction was optimized + // away, if due to slot sharing it's of + // the form "slotX = slotX". In that + // case, look forward for the next viable + // instruction. + while ( start < int(insts1.size()) && + inst1_to_inst2[start] == -1 ) + ++start; + + ASSERT(start < insts1.size()); + start = inst1_to_inst2[start]; + } + + shared_frame_denizens_final.push_back(info); + } + } + +void ZAMCompiler::CreateSharedFrameDenizens() + { + for ( auto i = 0U; i < frame_denizens.size(); ++i ) + { + FrameSharingInfo info; + info.ids.push_back(frame_denizens[i]); + info.id_start.push_back(0); + info.scope_end = insts2.size(); + + // The following doesn't matter since the value + // is only used during compiling, not during + // execution. + info.is_managed = false; + + shared_frame_denizens_final.push_back(info); + } + } + +void ZAMCompiler::ConcretizeSwitches() + { + // Create concretized versions of any case tables. + ConcretizeSwitchTables(int_casesI, int_cases); + ConcretizeSwitchTables(uint_casesI, uint_cases); + ConcretizeSwitchTables(double_casesI, double_cases); + ConcretizeSwitchTables(str_casesI, str_cases); + } + +template +void ZAMCompiler::ConcretizeSwitchTables(const CaseMapsI& abstract_cases, + CaseMaps& concrete_cases) + { + for ( auto& targs : abstract_cases ) + { + CaseMap cm; + for ( auto& targ : targs ) + cm[targ.first] = targ.second->inst_num; + concrete_cases.push_back(cm); + } + } + + +#include "ZAM-MethodDefs.h" + + +void ZAMCompiler::Dump() + { + bool remapped_frame = ! analysis_options.no_ZAM_opt; + + if ( remapped_frame ) + printf("Original frame for %s:\n", func->Name()); + + for ( auto elem : frame_layout1 ) + printf("frame[%d] = %s\n", elem.second, elem.first->Name()); + + if ( remapped_frame ) + { + printf("Final frame for %s:\n", func->Name()); + + for ( auto i = 0U; i < shared_frame_denizens.size(); ++i ) + { + printf("frame2[%d] =", i); + for ( auto& id : shared_frame_denizens[i].ids ) + printf(" %s", id->Name()); + printf("\n"); + } + } + + if ( insts2.size() > 0 ) + printf("Pre-removal of dead code for %s:\n", func->Name()); + + auto remappings = remapped_frame ? &shared_frame_denizens : nullptr; + + DumpInsts1(remappings); + + if ( insts2.size() > 0 ) + printf("Final intermediary code for %s:\n", func->Name()); + + remappings = remapped_frame ? &shared_frame_denizens_final : nullptr; + + for ( auto i = 0U; i < insts2.size(); ++i ) + { + auto& inst = insts2[i]; + std::string liveness, depth; + + if ( inst->live ) + liveness = util::fmt("(labels %d)", inst->num_labels); + else + liveness = "(dead)"; + + if ( inst->loop_depth ) + depth = util::fmt(" (loop %d)", inst->loop_depth); + + printf("%d %s%s: ", i, liveness.c_str(), depth.c_str()); + + inst->Dump(&frame_denizens, remappings); + } + + if ( insts2.size() > 0 ) + printf("Final code for %s:\n", func->Name()); + + for ( auto i = 0U; i < insts2.size(); ++i ) + { + auto& inst = insts2[i]; + printf("%d: ", i); + inst->Dump(&frame_denizens, remappings); + } + + DumpCases(int_casesI, "int"); + DumpCases(uint_casesI, "uint"); + DumpCases(double_casesI, "double"); + DumpCases(str_casesI, "str"); + } + +template +void ZAMCompiler::DumpCases(const T& cases, const char* type_name) const + { + for ( auto i = 0U; i < cases.size(); ++i ) + { + printf("%s switch table #%d:", type_name, i); + for ( auto& m : cases[i] ) + { + std::string case_val; + if constexpr ( std::is_same_v ) + case_val = m.first; + else if constexpr ( std::is_same_v || + std::is_same_v || + std::is_same_v ) + case_val = std::to_string(m.first); + + printf(" %s->%d", case_val.c_str(), m.second->inst_num); + } + printf("\n"); + } + } + +void ZAMCompiler::DumpInsts1(const FrameReMap* remappings) + { + for ( auto i = 0U; i < insts1.size(); ++i ) + { + auto& inst = insts1[i]; + + if ( inst->target ) + // To get meaningful branch information in the dump, + // we need to concretize the branch slots + ConcretizeBranch(inst, inst->target, inst->target_slot); + + std::string liveness, depth; + + if ( inst->live ) + liveness = util::fmt("(labels %d)", inst->num_labels); + else + liveness = "(dead)"; + + if ( inst->loop_depth ) + depth = util::fmt(" (loop %d)", inst->loop_depth); + + printf("%d %s%s: ", i, liveness.c_str(), depth.c_str()); + + inst->Dump(&frame_denizens, remappings); + } + } + + +} // zeek::detail diff --git a/src/script_opt/ZAM/Expr.cc b/src/script_opt/ZAM/Expr.cc new file mode 100644 index 0000000000..400aa88bd9 --- /dev/null +++ b/src/script_opt/ZAM/Expr.cc @@ -0,0 +1,1221 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for traversing Expr AST nodes to generate ZAM code. + +#include "zeek/script_opt/ZAM/Compile.h" +#include "zeek/Reporter.h" +#include "zeek/Desc.h" + +namespace zeek::detail { + +const ZAMStmt ZAMCompiler::CompileExpr(const Expr* e) + { + switch ( e->Tag() ) { + case EXPR_INCR: + case EXPR_DECR: + return CompileIncrExpr(static_cast(e)); + + case EXPR_APPEND_TO: + return CompileAppendToExpr(static_cast(e)); + + case EXPR_ASSIGN: + return CompileAssignExpr(static_cast(e)); + + case EXPR_INDEX_ASSIGN: + { + auto iae = static_cast(e); + auto t = iae->GetOp1()->GetType()->Tag(); + if ( t == TYPE_VECTOR ) + return AssignVecElems(iae); + + ASSERT(t == TYPE_TABLE); + return AssignTableElem(iae); + } + + case EXPR_FIELD_LHS_ASSIGN: + { + auto flhs = static_cast(e); + return CompileFieldLHSAssignExpr(flhs); + } + + case EXPR_SCHEDULE: + return CompileScheduleExpr(static_cast(e)); + + case EXPR_EVENT: + { + auto ee = static_cast(e); + auto h = ee->Handler().Ptr(); + auto args = ee->Args(); + return EventHL(h, args); + } + + default: + reporter->InternalError("bad statement type in ZAMCompile::CompileExpr"); + } + } + +const ZAMStmt ZAMCompiler::CompileIncrExpr(const IncrExpr* e) + { + auto target = e->Op()->AsRefExpr()->GetOp1()->AsNameExpr(); + + auto s = EmptyStmt(); + + if ( target->GetType()->Tag() == TYPE_INT ) + { + if ( e->Tag() == EXPR_INCR ) + return IncrIV(target); + else + return DecrIV(target); + } + + if ( e->Tag() == EXPR_INCR ) + return IncrUV(target); + else + return DecrUV(target); + } + +const ZAMStmt ZAMCompiler::CompileAppendToExpr(const AppendToExpr* e) + { + auto op1 = e->GetOp1(); + auto op2 = e->GetOp2(); + + auto n2 = op2->Tag() == EXPR_NAME ? op2->AsNameExpr() : nullptr; + auto cc = op2->Tag() != EXPR_NAME ? op2->AsConstExpr() : nullptr; + + if ( op1->Tag() == EXPR_FIELD ) + { + auto f = op1->AsFieldExpr()->Field(); + auto n1 = op1->GetOp1()->AsNameExpr(); + return AppendToField(n1, n2, cc, f); + } + + auto n1 = op1->AsNameExpr(); + + return n2 ? AppendToVV(n1, n2) : AppendToVC(n1, cc); + } + +const ZAMStmt ZAMCompiler::AppendToField(const NameExpr* n1, const NameExpr* n2, + const ConstExpr* c, int offset) + { + ZInstI z; + + if ( n2 ) + { + z = ZInstI(OP_APPENDTOFIELD_VVi, FrameSlot(n1), FrameSlot(n2), + offset); + z.op_type = OP_VVV_I3; + } + else + { + z = ZInstI(OP_APPENDTOFIELD_VCi, FrameSlot(n1), offset, c); + z.op_type = OP_VVC_I2; + } + + z.SetType(n2 ? n2->GetType() : c->GetType()); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::CompileAssignExpr(const AssignExpr* e) + { + auto op1 = e->GetOp1(); + auto op2 = e->GetOp2(); + + auto lhs = op1->AsRefExpr()->GetOp1()->AsNameExpr(); + auto lt = lhs->GetType().get(); + auto rhs = op2.get(); + auto r1 = rhs->GetOp1(); + + if ( rhs->Tag() == EXPR_INDEX && + (r1->Tag() == EXPR_NAME || r1->Tag() == EXPR_CONST) ) + return CompileAssignToIndex(lhs, rhs->AsIndexExpr()); + + switch ( rhs->Tag() ) { +#include "ZAM-DirectDefs.h" + + default: + break; + } + + auto rt = rhs->GetType(); + + auto r2 = rhs->GetOp2(); + auto r3 = rhs->GetOp3(); + + if ( rhs->Tag() == EXPR_LAMBDA ) + { + // reporter->Error("lambda expressions not supported for compiling"); + return ErrorStmt(); + } + + if ( rhs->Tag() == EXPR_NAME ) + return AssignVV(lhs, rhs->AsNameExpr()); + + if ( rhs->Tag() == EXPR_CONST ) + return AssignVC(lhs, rhs->AsConstExpr()); + + if ( rhs->Tag() == EXPR_IN && r1->Tag() == EXPR_LIST ) + { + // r2 can be a constant due to propagating "const" + // globals, for example. + if ( r2->Tag() == EXPR_NAME ) + { + auto r2n = r2->AsNameExpr(); + + if ( r2->GetType()->Tag() == TYPE_TABLE ) + return L_In_TVLV(lhs, r1->AsListExpr(), r2n); + + return L_In_VecVLV(lhs, r1->AsListExpr(), r2n); + } + + auto r2c = r2->AsConstExpr(); + + if ( r2->GetType()->Tag() == TYPE_TABLE ) + return L_In_TVLC(lhs, r1->AsListExpr(), r2c); + + return L_In_VecVLC(lhs, r1->AsListExpr(), r2c); + } + + if ( rhs->Tag() == EXPR_ANY_INDEX ) + return AnyIndexVVi(lhs, r1->AsNameExpr(), + rhs->AsAnyIndexExpr()->Index()); + + if ( rhs->Tag() == EXPR_COND && r1->GetType()->Tag() == TYPE_VECTOR ) + return Bool_Vec_CondVVVV(lhs, r1->AsNameExpr(), + r2->AsNameExpr(), + r3->AsNameExpr()); + + if ( rhs->Tag() == EXPR_COND && r2->IsConst() && r3->IsConst() ) + { + // Split into two statement, given we don't support + // two constants in a single statement. + auto n1 = r1->AsNameExpr(); + auto c2 = r2->AsConstExpr(); + auto c3 = r3->AsConstExpr(); + (void) CondC1VVC(lhs, n1, c2); + return CondC2VVC(lhs, n1, c3); + } + + if ( r1 && r2 ) + { + auto v1 = IsVector(r1->GetType()->Tag()); + auto v2 = IsVector(r2->GetType()->Tag()); + + if ( v1 != v2 && rhs->Tag() != EXPR_IN ) + { + reporter->Error("deprecated mixed vector/scalar operation not supported for ZAM compiling"); + return ErrorStmt(); + } + } + + if ( r1 && r1->IsConst() ) +#include "ZAM-GenExprsDefsC1.h" + + else if ( r2 && r2->IsConst() ) +#include "ZAM-GenExprsDefsC2.h" + + else if ( r3 && r3->IsConst() ) +#include "ZAM-GenExprsDefsC3.h" + + else +#include "ZAM-GenExprsDefsV.h" + } + +const ZAMStmt ZAMCompiler::CompileAssignToIndex(const NameExpr* lhs, + const IndexExpr* rhs) + { + auto aggr = rhs->GetOp1(); + auto const_aggr = aggr->Tag() == EXPR_CONST; + + auto indexes_expr = rhs->GetOp2()->AsListExpr(); + auto indexes = indexes_expr->Exprs(); + + auto n = const_aggr ? nullptr : aggr->AsNameExpr(); + auto con = const_aggr ? aggr->AsConstExpr() : nullptr; + + if ( indexes.length() == 1 && + indexes[0]->GetType()->Tag() == TYPE_VECTOR ) + { + auto index1 = indexes[0]; + if ( index1->Tag() == EXPR_CONST ) + { + reporter->Error("constant vector indexes not supported for ZAM compiling"); + return ErrorStmt(); + } + + auto index = index1->AsNameExpr(); + auto ind_t = index->GetType()->AsVectorType(); + + if ( IsBool(ind_t->Yield()->Tag()) ) + return const_aggr ? + IndexVecBoolSelectVCV(lhs, con, index) : + IndexVecBoolSelectVVV(lhs, n, index); + + return const_aggr ? IndexVecIntSelectVCV(lhs, con, index) : + IndexVecIntSelectVVV(lhs, n, index); + } + + return const_aggr ? IndexVCL(lhs, con, indexes_expr) : + IndexVVL(lhs, n, indexes_expr); + } + +const ZAMStmt ZAMCompiler::CompileFieldLHSAssignExpr(const FieldLHSAssignExpr* e) + { + auto lhs = e->Op1()->AsNameExpr(); + auto rhs = e->Op2(); + auto field = e->Field(); + + if ( rhs->Tag() == EXPR_NAME ) + return Field_LHS_AssignFV(e, rhs->AsNameExpr()); + + if ( rhs->Tag() == EXPR_CONST ) + return Field_LHS_AssignFC(e, rhs->AsConstExpr()); + + auto r1 = rhs->GetOp1(); + auto r2 = rhs->GetOp2(); + + if ( rhs->Tag() == EXPR_FIELD ) + { + auto rhs_f = rhs->AsFieldExpr(); + if ( r1->Tag() == EXPR_NAME ) + return Field_LHS_AssignFVi(e, r1->AsNameExpr(), + rhs_f->Field()); + + return Field_LHS_AssignFCi(e, r1->AsConstExpr(), + rhs_f->Field()); + } + + if ( r1 && r1->IsConst() ) +#include "ZAM-GenFieldsDefsC1.h" + + else if ( r2 && r2->IsConst() ) +#include "ZAM-GenFieldsDefsC2.h" + + else +#include "ZAM-GenFieldsDefsV.h" + } + +const ZAMStmt ZAMCompiler::CompileScheduleExpr(const ScheduleExpr* e) + { + auto event = e->Event(); + auto when = e->When(); + + auto event_args = event->Args(); + auto handler = event->Handler(); + + bool is_interval = when->GetType()->Tag() == TYPE_INTERVAL; + + if ( when->Tag() == EXPR_NAME ) + return ScheduleViHL(when->AsNameExpr(), is_interval, + handler.Ptr(), event_args); + else + return ScheduleCiHL(when->AsConstExpr(), is_interval, + handler.Ptr(), event_args); + } + +const ZAMStmt ZAMCompiler::CompileSchedule(const NameExpr* n, + const ConstExpr* c, int is_interval, + EventHandler* h, const ListExpr* l) + { + int len = l->Exprs().length(); + ZInstI z; + + if ( len == 0 ) + { + z = n ? ZInstI(OP_SCHEDULE0_ViH, FrameSlot(n), is_interval) : + ZInstI(OP_SCHEDULE0_CiH, is_interval, c); + z.op_type = n ? OP_VV_I2 : OP_VC_I1; + } + + else + { + if ( n ) + { + z = ZInstI(OP_SCHEDULE_ViHL, FrameSlot(n), is_interval); + z.op_type = OP_VV_I2; + } + else + { + z = ZInstI(OP_SCHEDULE_CiHL, is_interval, c); + z.op_type = OP_VC_I1; + } + + z.aux = InternalBuildVals(l); + } + + z.event_handler = h; + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::CompileEvent(EventHandler* h, const ListExpr* l) + { + auto exprs = l->Exprs(); + unsigned int n = exprs.length(); + + bool all_vars = true; + for ( auto i = 0U; i < n; ++i ) + if ( exprs[i]->Tag() == EXPR_CONST ) + { + all_vars = false; + break; + } + + if ( n > 4 || ! all_vars ) + { // do generic form + ZInstI z(OP_EVENT_HL); + z.aux = InternalBuildVals(l); + z.event_handler = h; + return AddInst(z); + } + + ZInstI z; + z.event_handler = h; + + if ( n == 0 ) + { + z.op = OP_EVENT0_X; + z.op_type = OP_X; + } + + else + { + auto n0 = exprs[0]->AsNameExpr(); + z.v1 = FrameSlot(n0); + z.t = n0->GetType(); + + if ( n == 1 ) + { + z.op = OP_EVENT1_V; + z.op_type = OP_V; + } + + else + { + auto n1 = exprs[1]->AsNameExpr(); + z.v2 = FrameSlot(n1); + z.t2 = n1->GetType(); + + if ( n == 2 ) + { + z.op = OP_EVENT2_VV; + z.op_type = OP_VV; + } + + else + { + z.aux = InternalBuildVals(l); + + auto n2 = exprs[2]->AsNameExpr(); + z.v3 = FrameSlot(n2); + + if ( n == 3 ) + { + z.op = OP_EVENT3_VVV; + z.op_type = OP_VVV; + } + + else + { + z.op = OP_EVENT4_VVVV; + z.op_type = OP_VVVV; + + auto n3 = exprs[3]->AsNameExpr(); + z.v4 = FrameSlot(n3); + } + } + } + } + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, + const NameExpr* n2, + const ConstExpr* c2, + const NameExpr* n3, + const ConstExpr* c3) + { + const Expr* op2 = n2; + const Expr* op3 = n3; + + if ( ! op2 ) op2 = c2; + if ( ! op3 ) op3 = c3; + + ZOp a; + + if ( op2->GetType()->Tag() == TYPE_PATTERN ) + a = n2 ? (n3 ? OP_P_IN_S_VVV : OP_P_IN_S_VVC) : OP_P_IN_S_VCV; + + else if ( op2->GetType()->Tag() == TYPE_STRING ) + a = n2 ? (n3 ? OP_S_IN_S_VVV : OP_S_IN_S_VVC) : OP_S_IN_S_VCV; + + else if ( op2->GetType()->Tag() == TYPE_ADDR && + op3->GetType()->Tag() == TYPE_SUBNET ) + a = n2 ? (n3 ? OP_A_IN_S_VVV : OP_A_IN_S_VVC) : OP_A_IN_S_VCV; + + else if ( op3->GetType()->Tag() == TYPE_TABLE ) + a = n2 ? OP_VAL_IS_IN_TABLE_VVV : OP_CONST_IS_IN_TABLE_VCV; + + else + reporter->InternalError("bad types when compiling \"in\""); + + auto s2 = n2 ? FrameSlot(n2) : 0; + auto s3 = n3 ? FrameSlot(n3) : 0; + auto s1 = Frame1Slot(n1, a); + + ZInstI z; + + if ( n2 ) + { + if ( n3 ) + z = ZInstI(a, s1, s2, s3); + else + z = ZInstI(a, s1, s2, c3); + } + else + z = ZInstI(a, s1, s3, c2); + + TypePtr zt; + + if ( c2 ) + zt = c2->GetType(); + else if ( c3 ) + zt = c3->GetType(); + else + zt = n2->GetType(); + + z.SetType(zt); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::CompileInExpr(const NameExpr* n1, const ListExpr* l, + const NameExpr* n2, const ConstExpr* c) + { + auto& l_e = l->Exprs(); + int n = l_e.length(); + + // Look for a very common special case: l is a single-element list, + // and n2 is present rather than c. + if ( n == 1 && n2 ) + { + ZInstI z; + bool is_vec = n2->GetType()->Tag() == TYPE_VECTOR; + + if ( l_e[0]->Tag() == EXPR_NAME ) + { + auto l_e0_n = l_e[0]->AsNameExpr(); + ZOp op = is_vec ? OP_VAL_IS_IN_VECTOR_VVV : + OP_VAL_IS_IN_TABLE_VVV; + z = GenInst(op, n1, l_e0_n, n2); + } + + else + { + auto l_e0_c = l_e[0]->AsConstExpr(); + ZOp op = is_vec ? OP_CONST_IS_IN_VECTOR_VCV : + OP_CONST_IS_IN_TABLE_VCV; + z = GenInst(op, n1, l_e0_c, n2); + } + + z.t = l_e[0]->GetType(); + return AddInst(z); + } + + // Also somewhat common is a 2-element index. Here, one or both of + // the elements might be a constant, which makes things messier. + + if ( n == 2 && n2 && + (l_e[0]->Tag() == EXPR_NAME || l_e[1]->Tag() == EXPR_NAME) ) + { + auto is_name0 = l_e[0]->Tag() == EXPR_NAME; + auto is_name1 = l_e[1]->Tag() == EXPR_NAME; + + auto l_e0_n = is_name0 ? l_e[0]->AsNameExpr() : nullptr; + auto l_e1_n = is_name1 ? l_e[1]->AsNameExpr() : nullptr; + + auto l_e0_c = is_name0 ? nullptr : l_e[0]->AsConstExpr(); + auto l_e1_c = is_name1 ? nullptr : l_e[1]->AsConstExpr(); + + ZInstI z; + + if ( l_e0_n && l_e1_n ) + { + z = GenInst(OP_VAL2_IS_IN_TABLE_VVVV, + n1, l_e0_n, l_e1_n, n2); + z.t2 = l_e0_n->GetType(); + } + + else if ( l_e0_n ) + { + z = GenInst(OP_VAL2_IS_IN_TABLE_VVVC, + n1, l_e0_n, n2, l_e1_c); + z.t2 = l_e0_n->GetType(); + } + + else if ( l_e1_n ) + { + z = GenInst(OP_VAL2_IS_IN_TABLE_VVCV, + n1, l_e1_n, n2, l_e0_c); + z.t2 = l_e1_n->GetType(); + } + + else + { + // Ugh, both are constants. Assign first to + // a temporary. + auto slot = TempForConst(l_e0_c); + z = ZInstI(OP_VAL2_IS_IN_TABLE_VVVC, FrameSlot(n1), + slot, FrameSlot(n2), l_e1_c); + z.op_type = OP_VVVC; + z.t2 = l_e0_c->GetType(); + } + + return AddInst(z); + } + + auto aggr = n2 ? (Expr*) n2 : (Expr*) c; + + ASSERT(aggr->GetType()->Tag() != TYPE_VECTOR); + + ZOp op = n2 ? OP_LIST_IS_IN_TABLE_VV : OP_LIST_IS_IN_TABLE_VC; + + ZInstI z; + + if ( n2 ) + z = ZInstI(op, Frame1Slot(n1, op), FrameSlot(n2)); + else + z = ZInstI(op, Frame1Slot(n1, op), c); + + z.aux = InternalBuildVals(l); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n1, const NameExpr* n2, + const ListExpr* l) + { + return CompileIndex(n1, FrameSlot(n2), n2->GetType(), l); + } + +const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n, const ConstExpr* c, + const ListExpr* l) + { + auto tmp = TempForConst(c); + return CompileIndex(n, tmp, c->GetType(), l); + } + +const ZAMStmt ZAMCompiler::CompileIndex(const NameExpr* n1, int n2_slot, + const TypePtr& n2t, const ListExpr* l) + { + ZInstI z; + + int n = l->Exprs().length(); + auto n2tag = n2t->Tag(); + + if ( n == 1 ) + { + auto ind = l->Exprs()[0]; + auto var_ind = ind->Tag() == EXPR_NAME; + auto n3 = var_ind ? ind->AsNameExpr() : nullptr; + auto c3 = var_ind ? nullptr : ind->AsConstExpr(); + bro_uint_t c = 0; + + if ( ! var_ind ) + { + if ( ind->GetType()->Tag() == TYPE_COUNT ) + c = c3->Value()->AsCount(); + else if ( ind->GetType()->Tag() == TYPE_INT ) + c = c3->Value()->AsInt(); + } + + if ( n2tag == TYPE_STRING ) + { + if ( n3 ) + { + int n3_slot = FrameSlot(n3); + auto zop = OP_INDEX_STRING_VVV; + z = ZInstI(zop, Frame1Slot(n1, zop), + n2_slot, n3_slot); + } + else + { + auto zop = OP_INDEX_STRINGC_VVV; + z = ZInstI(zop, Frame1Slot(n1, zop), + n2_slot, c); + z.op_type = OP_VVV_I3; + } + + return AddInst(z); + } + + if ( n2tag == TYPE_VECTOR ) + { + auto n2_yt = n2t->AsVectorType()->Yield(); + bool is_any = n2_yt->Tag() == TYPE_ANY; + + if ( n3 ) + { + int n3_slot = FrameSlot(n3); + auto zop = is_any ? OP_INDEX_ANY_VEC_VVV : + OP_INDEX_VEC_VVV; + z = ZInstI(zop, Frame1Slot(n1, zop), + n2_slot, n3_slot); + } + else + { + auto zop = is_any ? OP_INDEX_ANY_VECC_VVV : + OP_INDEX_VECC_VVV; + z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, c); + z.op_type = OP_VVV_I3; + } + + z.SetType(n1->GetType()); + return AddInst(z); + } + + if ( n2tag == TYPE_TABLE ) + { + if ( n3 ) + { + int n3_slot = FrameSlot(n3); + auto zop = AssignmentFlavor(OP_TABLE_INDEX1_VVV, + n1->GetType()->Tag()); + z = ZInstI(zop, Frame1Slot(n1, zop), n2_slot, + n3_slot); + z.SetType(n3->GetType()); + } + + else + { + auto zop = AssignmentFlavor(OP_TABLE_INDEX1_VVC, + n1->GetType()->Tag()); + z = ZInstI(zop, Frame1Slot(n1, zop), + n2_slot, c3); + } + + return AddInst(z); + } + } + + auto indexes = l->Exprs(); + + ZOp op; + + switch ( n2tag ) { + case TYPE_VECTOR: + op = OP_INDEX_VEC_SLICE_VV; + z = ZInstI(op, Frame1Slot(n1, op), n2_slot); + z.SetType(n2t); + break; + + case TYPE_TABLE: + op = OP_TABLE_INDEX_VV; + z = ZInstI(op, Frame1Slot(n1, op), n2_slot); + z.SetType(n1->GetType()); + break; + + case TYPE_STRING: + op = OP_INDEX_STRING_SLICE_VV; + z = ZInstI(op, Frame1Slot(n1, op), n2_slot); + z.SetType(n1->GetType()); + break; + + default: + reporter->InternalError("bad aggregate type when compiling index"); + } + + z.aux = InternalBuildVals(l); + z.CheckIfManaged(n1->GetType()); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::AssignVecElems(const Expr* e) + { + auto index_assign = e->AsIndexAssignExpr(); + + auto op1 = index_assign->GetOp1(); + const auto& t1 = op1->GetType(); + + auto op3 = index_assign->GetOp3(); + const auto& t3 = op3->GetType(); + + auto lhs = op1->AsNameExpr(); + auto lt = lhs->GetType(); + + auto indexes_expr = index_assign->GetOp2()->AsListExpr(); + auto indexes = indexes_expr->Exprs(); + + if ( indexes.length() > 1 ) + { // Vector slice assignment. + ASSERT(op1->Tag() == EXPR_NAME); + ASSERT(op3->Tag() == EXPR_NAME); + ASSERT(t1->Tag() == TYPE_VECTOR); + ASSERT(t3->Tag() == TYPE_VECTOR); + + auto z = GenInst(OP_VECTOR_SLICE_ASSIGN_VV, + lhs, op3->AsNameExpr()); + + z.aux = InternalBuildVals(indexes_expr); + + return AddInst(z); + } + + const auto& yt1 = t1->Yield(); + auto any_vec = yt1->Tag() == TYPE_VOID || yt1->Tag() == TYPE_ANY; + auto any_val = IsAny(t3); + + auto op2 = indexes[0]; + + if ( op2->Tag() == EXPR_CONST && op3->Tag() == EXPR_CONST ) + { + // Turn into a VVC assignment by assigning the index to + // a temporary. + auto c = op2->AsConstExpr(); + auto tmp = TempForConst(c); + + auto zop = any_vec ? OP_ANY_VECTOR_ELEM_ASSIGN_VVC : + OP_VECTOR_ELEM_ASSIGN_VVC; + + return AddInst(ZInstI(zop, Frame1Slot(lhs, zop), tmp, + op3->AsConstExpr())); + } + + if ( op2->Tag() == EXPR_NAME ) + { + auto n2 = op2->AsNameExpr(); + ZAMStmt inst(0); + + if ( op3->Tag() == EXPR_NAME ) + { + auto n3 = op3->AsNameExpr(); + + if ( any_vec ) + inst = Any_Vector_Elem_AssignVVV(lhs, n2, n3); + else if ( any_val ) + inst = Vector_Elem_Assign_AnyVVV(lhs, n2, n3); + else + inst = Vector_Elem_AssignVVV(lhs, n2, n3); + } + + else + { + auto c3 = op3->AsConstExpr(); + + if ( any_vec ) + inst = Any_Vector_Elem_AssignVVC(lhs, n2, c3); + else + inst = Vector_Elem_AssignVVC(lhs, n2, c3); + } + + TopMainInst()->t = t3; + return inst; + } + + auto c2 = op2->AsConstExpr(); + auto n3 = op3->AsNameExpr(); + auto index = c2->Value()->AsCount(); + + ZAMStmt inst; + + if ( any_vec ) + inst = Any_Vector_Elem_AssignVVi(lhs, n3, index); + else if ( any_val ) + inst = Vector_Elem_Assign_AnyVVi(lhs, n3, index); + else + inst = Vector_Elem_AssignVVi(lhs, n3, index); + + TopMainInst()->t = t3; + return inst; + } + +const ZAMStmt ZAMCompiler::AssignTableElem(const Expr* e) + { + auto index_assign = e->AsIndexAssignExpr(); + + auto op1 = index_assign->GetOp1()->AsNameExpr(); + auto op2 = index_assign->GetOp2()->AsListExpr(); + auto op3 = index_assign->GetOp3(); + + ZInstI z; + + if ( op3->Tag() == EXPR_NAME ) + z = GenInst(OP_TABLE_ELEM_ASSIGN_VV, op1, op3->AsNameExpr()); + else + z = GenInst(OP_TABLE_ELEM_ASSIGN_VC, op1, op3->AsConstExpr()); + + z.aux = InternalBuildVals(op2); + z.t = op3->GetType(); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::Call(const ExprStmt* e) + { + if ( IsZAM_BuiltIn(e->StmtExpr()) ) + return LastInst(); + + return DoCall(e->StmtExpr()->AsCallExpr(), nullptr); + } + +const ZAMStmt ZAMCompiler::AssignToCall(const ExprStmt* e) + { + if ( IsZAM_BuiltIn(e->StmtExpr()) ) + return LastInst(); + + auto assign = e->StmtExpr()->AsAssignExpr(); + auto n = assign->GetOp1()->AsRefExpr()->GetOp1()->AsNameExpr(); + auto call = assign->GetOp2()->AsCallExpr(); + + return DoCall(call, n); + } + +const ZAMStmt ZAMCompiler::DoCall(const CallExpr* c, const NameExpr* n) + { + auto func = c->Func()->AsNameExpr(); + auto func_id = func->Id(); + auto& args = c->Args()->Exprs(); + + int nargs = args.length(); + int call_case = nargs; + + bool indirect = ! func_id->IsGlobal() || ! func_id->GetVal(); + + if ( indirect ) + call_case = -1; // force default of CallN + + auto nt = n ? n->GetType()->Tag() : TYPE_VOID; + auto n_slot = n ? Frame1Slot(n, OP1_WRITE) : -1; + + ZInstI z; + + if ( call_case == 0 ) + { + if ( n ) + z = ZInstI(AssignmentFlavor(OP_CALL0_V, nt), n_slot); + else + z = ZInstI(OP_CALL0_X); + } + + else if ( call_case == 1 ) + { + auto arg0 = args[0]; + auto n0 = arg0->Tag() == EXPR_NAME ? + arg0->AsNameExpr() : nullptr; + auto c0 = arg0->Tag() == EXPR_CONST ? + arg0->AsConstExpr() : nullptr; + + if ( n ) + { + if ( n0 ) + z = ZInstI(AssignmentFlavor(OP_CALL1_VV, nt), + n_slot, FrameSlot(n0)); + else + z = ZInstI(AssignmentFlavor(OP_CALL1_VC, nt), + n_slot, c0); + } + else + { + if ( n0 ) + z = ZInstI(OP_CALL1_V, FrameSlot(n0)); + else + z = ZInstI(OP_CALL1_C, c0); + } + + z.t = arg0->GetType(); + } + + else + { + auto aux = new ZInstAux(nargs); + + for ( int i = 0; i < nargs; ++i ) + { + auto ai = args[i]; + auto ai_t = ai->GetType(); + if ( ai->Tag() == EXPR_NAME ) + aux->Add(i, FrameSlot(ai->AsNameExpr()), ai_t); + else + aux->Add(i, ai->AsConstExpr()->ValuePtr()); + } + + ZOp op; + + switch ( call_case ) { + case 2: op = n ? OP_CALL2_V : OP_CALL2_X; break; + case 3: op = n ? OP_CALL3_V : OP_CALL3_X; break; + case 4: op = n ? OP_CALL4_V : OP_CALL4_X; break; + case 5: op = n ? OP_CALL5_V : OP_CALL5_X; break; + + default: + if ( indirect ) + op = n ? OP_INDCALLN_VV : OP_INDCALLN_V; + else + op = n ? OP_CALLN_V : OP_CALLN_X; + break; + } + + if ( n ) + { + op = AssignmentFlavor(op, nt); + auto n_slot = Frame1Slot(n, OP1_WRITE); + + if ( indirect ) + { + if ( func_id->IsGlobal() ) + z = ZInstI(op, n_slot, -1); + else + z = ZInstI(op, n_slot, FrameSlot(func)); + z.op_type = OP_VV; + } + + else + { + z = ZInstI(op, n_slot); + z.op_type = OP_V; + } + } + else + { + if ( indirect ) + { + if ( func_id->IsGlobal() ) + z = ZInstI(op, -1); + else + z = ZInstI(op, FrameSlot(func)); + z.op_type = OP_V; + } + else + { + z = ZInstI(op); + z.op_type = OP_X; + } + } + + z.aux = aux; + } + + if ( ! z.aux ) + z.aux = new ZInstAux(0); + + z.aux->can_change_globals = true; + + if ( ! indirect || func_id->IsGlobal() ) + { + z.aux->id_val = func_id; + + if ( ! indirect ) + z.func = func_id->GetVal()->AsFunc(); + } + + if ( n ) + { + auto id = n->Id(); + if ( id->IsGlobal() ) + { + AddInst(z); + auto global_slot = global_id_to_info[id]; + z = ZInstI(OP_STORE_GLOBAL_V, global_slot); + z.op_type = OP_V_I1; + z.t = globalsI[global_slot].id->GetType(); + } + } + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::ConstructTable(const NameExpr* n, const Expr* e) + { + auto con = e->GetOp1()->AsListExpr(); + auto tt = cast_intrusive(n->GetType()); + auto width = tt->GetIndices()->GetTypes().size(); + + auto z = GenInst(OP_CONSTRUCT_TABLE_VV, n, width); + z.aux = InternalBuildVals(con, width + 1); + z.t = tt; + z.attrs = e->AsTableConstructorExpr()->GetAttrs(); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::ConstructSet(const NameExpr* n, const Expr* e) + { + auto con = e->GetOp1()->AsListExpr(); + auto tt = n->GetType()->AsTableType(); + auto width = tt->GetIndices()->GetTypes().size(); + + auto z = GenInst(OP_CONSTRUCT_SET_VV, n, width); + z.aux = InternalBuildVals(con, width); + z.t = e->GetType(); + z.attrs = e->AsSetConstructorExpr()->GetAttrs(); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::ConstructRecord(const NameExpr* n, const Expr* e) + { + auto rc = e->AsRecordConstructorExpr(); + + ZInstI z; + + if ( rc->Map() ) + { + z = GenInst(OP_CONSTRUCT_KNOWN_RECORD_V, n); + z.aux = InternalBuildVals(rc->Op().get()); + z.aux->map = *rc->Map(); + } + else + { + z = GenInst(OP_CONSTRUCT_RECORD_V, n); + z.aux = InternalBuildVals(rc->Op().get()); + } + + z.t = e->GetType(); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::ConstructVector(const NameExpr* n, const Expr* e) + { + auto con = e->GetOp1()->AsListExpr(); + + auto z = GenInst(OP_CONSTRUCT_VECTOR_V, n); + z.aux = InternalBuildVals(con); + z.t = e->GetType(); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::ArithCoerce(const NameExpr* n, const Expr* e) + { + auto nt = n->GetType(); + auto nt_is_vec = nt->Tag() == TYPE_VECTOR; + + auto op = e->GetOp1(); + auto op_t = op->GetType(); + auto op_is_vec = op_t->Tag() == TYPE_VECTOR; + + auto e_t = e->GetType(); + auto et_is_vec = e_t->Tag() == TYPE_VECTOR; + + if ( nt_is_vec || op_is_vec || et_is_vec ) + { + if ( ! (nt_is_vec && op_is_vec && et_is_vec) ) + reporter->InternalError("vector confusion compiling coercion"); + + op_t = op_t->AsVectorType()->Yield(); + e_t = e_t->AsVectorType()->Yield(); + } + + auto targ_it = e_t->InternalType(); + auto op_it = op_t->InternalType(); + + if ( op_it == targ_it ) + reporter->InternalError("coercion wasn't folded"); + + if ( op->Tag() != EXPR_NAME ) + reporter->InternalError("coercion wasn't folded"); + + ZOp a; + + switch ( targ_it ) { + case TYPE_INTERNAL_DOUBLE: + { + if ( op_it == TYPE_INTERNAL_INT ) + a = nt_is_vec ? OP_COERCE_DI_VEC_VV : OP_COERCE_DI_VV; + else + a = nt_is_vec ? OP_COERCE_DU_VEC_VV : OP_COERCE_DU_VV; + break; + } + + case TYPE_INTERNAL_INT: + { + if ( op_it == TYPE_INTERNAL_UNSIGNED ) + a = nt_is_vec ? OP_COERCE_IU_VEC_VV : OP_COERCE_IU_VV; + else + a = nt_is_vec ? OP_COERCE_ID_VEC_VV : OP_COERCE_ID_VV; + break; + } + + case TYPE_INTERNAL_UNSIGNED: + { + if ( op_it == TYPE_INTERNAL_INT ) + a = nt_is_vec ? OP_COERCE_UI_VEC_VV : OP_COERCE_UI_VV; + else + a = nt_is_vec ? OP_COERCE_UD_VEC_VV : OP_COERCE_UD_VV; + break; + } + + default: + reporter->InternalError("bad target internal type in coercion"); + } + + return AddInst(GenInst(a, n, op->AsNameExpr())); + } + +const ZAMStmt ZAMCompiler::RecordCoerce(const NameExpr* n, const Expr* e) + { + auto r = e->AsRecordCoerceExpr(); + auto op = r->GetOp1()->AsNameExpr(); + + int op_slot = FrameSlot(op); + auto zop = OP_RECORD_COERCE_VV; + ZInstI z(zop, Frame1Slot(n, zop), op_slot); + + z.SetType(e->GetType()); + z.op_type = OP_VV; + + auto map = r->Map(); + auto map_size = map.size(); + z.aux = new ZInstAux(map_size); + z.aux->map = map; + + for ( auto i = 0; i < map_size; ++i ) + z.aux->Add(i, map[i], nullptr); + + // Mark the integer entries in z.aux as not being frame slots as usual. + z.aux->slots = nullptr; + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::TableCoerce(const NameExpr* n, const Expr* e) + { + auto op = e->GetOp1()->AsNameExpr(); + + int op_slot = FrameSlot(op); + auto zop = OP_TABLE_COERCE_VV; + ZInstI z(zop, Frame1Slot(n, zop), op_slot); + z.SetType(e->GetType()); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::VectorCoerce(const NameExpr* n, const Expr* e) + { + auto op = e->GetOp1()->AsNameExpr(); + int op_slot = FrameSlot(op); + + auto zop = OP_VECTOR_COERCE_VV; + ZInstI z(zop, Frame1Slot(n, zop), op_slot); + z.SetType(e->GetType()); + + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::Is(const NameExpr* n, const Expr* e) + { + auto is = e->AsIsExpr(); + auto op = e->GetOp1()->AsNameExpr(); + int op_slot = FrameSlot(op); + + ZInstI z(OP_IS_VV, Frame1Slot(n, OP_IS_VV), op_slot); + z.t2 = op->GetType(); + z.SetType(is->TestType()); + + return AddInst(z); + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/Inst-Gen.cc b/src/script_opt/ZAM/Inst-Gen.cc new file mode 100644 index 0000000000..43b49c66be --- /dev/null +++ b/src/script_opt/ZAM/Inst-Gen.cc @@ -0,0 +1,167 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Helper functions for generating ZAM code. + +#include "zeek/script_opt/ZAM/Compile.h" + + +namespace zeek::detail { + +ZInstI ZAMCompiler::GenInst(ZOp op) + { + return ZInstI(op); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1) + { + return ZInstI(op, Frame1Slot(v1, op)); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, int i) + { + auto z = ZInstI(op, Frame1Slot(v1, op), i); + z.op_type = OP_VV_I2; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const ConstExpr* c, const NameExpr* v1, + int i) + { + auto z = ZInstI(op, Frame1Slot(v1, op), i, c); + z.op_type = OP_VVC_I2; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2) + { + int nv2 = FrameSlot(v2); + return ZInstI(op, Frame1Slot(v1, op), nv2); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3) + { + int nv2 = FrameSlot(v2); + int nv3 = FrameSlot(v3); + return ZInstI(op, Frame1Slot(v1, op), nv2, nv3); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3, const NameExpr* v4) + { + int nv2 = FrameSlot(v2); + int nv3 = FrameSlot(v3); + int nv4 = FrameSlot(v4); + return ZInstI(op, Frame1Slot(v1, op), nv2, nv3, nv4); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const ConstExpr* ce) + { + return ZInstI(op, ce); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce) + { + return ZInstI(op, Frame1Slot(v1, op), ce); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const ConstExpr* ce, const NameExpr* v1) + { + return ZInstI(op, Frame1Slot(v1, op), ce); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce, + const NameExpr* v2) + { + int nv2 = FrameSlot(v2); + return ZInstI(op, Frame1Slot(v1, op), nv2, ce); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const ConstExpr* ce) + { + int nv2 = FrameSlot(v2); + return ZInstI(op, Frame1Slot(v1, op), nv2, ce); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3, const ConstExpr* ce) + { + int nv2 = FrameSlot(v2); + int nv3 = FrameSlot(v3); + return ZInstI(op, Frame1Slot(v1, op), nv2, nv3, ce); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const ConstExpr* ce, const NameExpr* v3) + { + // Note that here we reverse the order of the arguments; saves + // us from needing to implement a redundant constructor. + int nv2 = FrameSlot(v2); + int nv3 = FrameSlot(v3); + return ZInstI(op, Frame1Slot(v1, op), nv2, nv3, ce); + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c, + int i) + { + auto z = ZInstI(op, Frame1Slot(v1, op), i, c); + z.op_type = OP_VVC_I2; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + int i) + { + int nv2 = FrameSlot(v2); + auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i); + z.op_type = OP_VVV_I3; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + int i1, int i2) + { + int nv2 = FrameSlot(v2); + auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i1, i2); + z.op_type = OP_VVVV_I3_I4; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v, const ConstExpr* c, + int i1, int i2) + { + auto z = ZInstI(op, Frame1Slot(v, op), i1, i2, c); + z.op_type = OP_VVVC_I2_I3; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3, int i) + { + int nv2 = FrameSlot(v2); + int nv3 = FrameSlot(v3); + auto z = ZInstI(op, Frame1Slot(v1, op), nv2, nv3, i); + z.op_type = OP_VVVV_I4; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const ConstExpr* c, int i) + { + int nv2 = FrameSlot(v2); + auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i, c); + z.op_type = OP_VVVC_I3; + return z; + } + +ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c, + const NameExpr* v2, int i) + { + int nv2 = FrameSlot(v2); + auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i, c); + z.op_type = OP_VVVC_I3; + return z; + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/Inst-Gen.h b/src/script_opt/ZAM/Inst-Gen.h new file mode 100644 index 0000000000..a90619456e --- /dev/null +++ b/src/script_opt/ZAM/Inst-Gen.h @@ -0,0 +1,39 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for generating ZAM instructions, mainly to aid in translating +// NameExpr*'s to slots. Some aren't needed, but we provide a complete +// set mirroring the ZInstI constructors for consistency. +// +// Maintained separately from Compile.h to make it conceptually simple to +// add new helpers. + +ZInstI GenInst(ZOp op); +ZInstI GenInst(ZOp op, const NameExpr* v1); +ZInstI GenInst(ZOp op, const NameExpr* v1, int i); +ZInstI GenInst(ZOp op, const ConstExpr* c, const NameExpr* v1, int i); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3, const NameExpr* v4); +ZInstI GenInst(ZOp op, const ConstExpr* ce); +ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce); +ZInstI GenInst(ZOp op, const ConstExpr* ce, const NameExpr* v1); +ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce, + const NameExpr* v2); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const ConstExpr* ce); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3, const ConstExpr* ce); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const ConstExpr* ce, const NameExpr* v3); +ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c, int i); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, int i); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, int i1, int i2); +ZInstI GenInst(ZOp op, const NameExpr* v, const ConstExpr* c, int i1, int i2); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const NameExpr* v3, int i); +ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, + const ConstExpr* c, int i); +ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c, + const NameExpr* v2, int i); diff --git a/src/script_opt/ZAM/IterInfo.h b/src/script_opt/ZAM/IterInfo.h new file mode 100644 index 0000000000..2af43dbc8a --- /dev/null +++ b/src/script_opt/ZAM/IterInfo.h @@ -0,0 +1,146 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Classes to support ZAM for-loop iterations. + +#pragma once + +#include "zeek/Val.h" +#include "zeek/ZeekString.h" +#include "zeek/script_opt/ZAM/ZInst.h" + +namespace zeek::detail { + +// Class for iterating over the elements of a table. Requires some care +// because the dictionary iterators need to be destructed when done. + +class TableIterInfo { +public: + // No constructor needed, as all of our member variables are + // instead instantiated via BeginLoop(). This allows us to + // reuse TableIterInfo objects to lower the overhead associated + // with executing ZBody::DoExec for non-recursive functions. + + // We do, however, want to make sure that when we go out of scope, + // if we have any pending iterators we clear them. + ~TableIterInfo() { Clear(); } + + // Start looping over the elements of the given table. "_aux" + // provides information about the index variables, their types, + // and the type of the value variable (if any). + void BeginLoop(const TableVal* _tv, ZInstAux* _aux) + { + tv = _tv; + aux = _aux; + auto tvd = tv->AsTable(); + tbl_iter = tvd->begin(); + tbl_end = tvd->end(); + } + + // True if we're done iterating, false if not. + bool IsDoneIterating() const + { + return *tbl_iter == *tbl_end; + } + + // Indicates that the current iteration is finished. + void IterFinished() + { + ++*tbl_iter; + } + + // Performs the next iteration (assuming IsDoneIterating() returned + // false), assigning to the index variables. + void NextIter(ZVal* frame) + { + auto ind_lv = tv->RecreateIndex(*(*tbl_iter)->GetHashKey()); + for ( int i = 0; i < ind_lv->Length(); ++i ) + { + ValPtr ind_lv_p = ind_lv->Idx(i); + auto& var = frame[aux->loop_vars[i]]; + auto& t = aux->loop_var_types[i]; + if ( ZVal::IsManagedType(t) ) + ZVal::DeleteManagedType(var); + var = ZVal(ind_lv_p, t); + } + + IterFinished(); + } + + // For the current iteration, returns the corresponding value. + ZVal IterValue() + { + auto tev = (*tbl_iter)->GetValue(); + return ZVal(tev->GetVal(), aux->value_var_type); + } + + // Called upon finishing the iteration. + void EndIter() { Clear(); } + + // Called to explicitly clear any iteration state. + void Clear() + { + tbl_iter = std::nullopt; + tbl_end = std::nullopt; + } + +private: + // The table we're looping over. If we want to allow for the table + // going away before we're able to clear our iterators then we + // could change this to non-const and use Ref/Unref. + const TableVal* tv = nullptr; + + // Associated auxiliary information. + ZInstAux* aux; + + std::optional tbl_iter; + std::optional tbl_end; +}; + +// Class for simple step-wise iteration across an integer range. +// Suitable for iterating over vectors or strings. + +class StepIterInfo { +public: + // We do some cycle-squeezing by not having a constructor to + // initialize our member variables, since we impose a discipline + // that any use of the object starts with InitLoop(). That lets + // us use quasi-static objects for non-recursive functions. + + // Initializes for looping over the elements of a raw vector. + void InitLoop(const std::vector>* _vv) + { + vv = _vv; + n = vv->size(); + iter = 0; + } + + // Initializes for looping over the elements of a raw string. + void InitLoop(const String* _s) + { + s = _s; + n = s->Len(); + iter = 0; + } + + // True if we're done iterating, false if not. + bool IsDoneIterating() const + { + return iter >= n; + } + + // Indicates that the current iteration is finished. + void IterFinished() + { + ++iter; + } + + // Counter of where we are in the iteration. + bro_uint_t iter; // initialized to 0 at start of loop + bro_uint_t n; // we loop from 0 ... n-1 + + // The low-level value we're iterating over. + const std::vector>* vv; + const String* s; +}; + +} // namespace zeek::detail diff --git a/src/script_opt/ZAM/Low-Level.cc b/src/script_opt/ZAM/Low-Level.cc new file mode 100644 index 0000000000..3b8e177809 --- /dev/null +++ b/src/script_opt/ZAM/Low-Level.cc @@ -0,0 +1,172 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods relating to low-level ZAM instruction manipulation. + +#include "zeek/Reporter.h" +#include "zeek/Desc.h" +#include "zeek/script_opt/ZAM/Compile.h" +#include "zeek/script_opt/ScriptOpt.h" + +namespace zeek::detail { + + +const ZAMStmt ZAMCompiler::StartingBlock() + { + return ZAMStmt(insts1.size()); + } + +const ZAMStmt ZAMCompiler::FinishBlock(const ZAMStmt /* start */) + { + return ZAMStmt(insts1.size() - 1); + } + +bool ZAMCompiler::NullStmtOK() const + { + // They're okay iff they're the entire statement body. + return insts1.size() == 0; + } + +const ZAMStmt ZAMCompiler::EmptyStmt() + { + return ZAMStmt(insts1.size() - 1); + } + +const ZAMStmt ZAMCompiler::LastInst() + { + return ZAMStmt(insts1.size() - 1); + } + +const ZAMStmt ZAMCompiler::ErrorStmt() + { + return ZAMStmt(0); + } + +OpaqueVals* ZAMCompiler::BuildVals(const ListExprPtr& l) + { + return new OpaqueVals(InternalBuildVals(l.get())); + } + +ZInstAux* ZAMCompiler::InternalBuildVals(const ListExpr* l, int stride) + { + auto exprs = l->Exprs(); + int n = exprs.length(); + + auto aux = new ZInstAux(n * stride); + + int offset = 0; // offset into aux info + for ( int i = 0; i < n; ++i ) + { + auto& e = exprs[i]; + int num_vals = InternalAddVal(aux, offset, e); + ASSERT(num_vals == stride); + offset += num_vals; + } + + return aux; + } + +int ZAMCompiler::InternalAddVal(ZInstAux* zi, int i, Expr* e) + { + if ( e->Tag() == EXPR_ASSIGN ) + { // We're building up a table constructor + auto& indices = e->GetOp1()->AsListExpr()->Exprs(); + auto val = e->GetOp2(); + int width = indices.length(); + + for ( int j = 0; j < width; ++j ) + ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1); + + ASSERT(InternalAddVal(zi, i + width, val.get()) == 1); + + return width + 1; + } + + if ( e->Tag() == EXPR_LIST ) + { // We're building up a set constructor + auto& indices = e->AsListExpr()->Exprs(); + int width = indices.length(); + + for ( int j = 0; j < width; ++j ) + ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1); + + return width; + } + + if ( e->Tag() == EXPR_FIELD_ASSIGN ) + { + // These can appear when we're processing the expression + // list for a record constructor. + auto fa = e->AsFieldAssignExpr(); + e = fa->GetOp1().get(); + + if ( e->GetType()->Tag() == TYPE_TYPE ) + { + // Ugh - we actually need a "type" constant. + auto v = e->Eval(nullptr); + ASSERT(v); + zi->Add(i, v); + return 1; + } + + // Now that we've adjusted, fall through. + } + + if ( e->Tag() == EXPR_NAME ) + zi->Add(i, FrameSlot(e->AsNameExpr()), e->GetType()); + + else + zi->Add(i, e->AsConstExpr()->ValuePtr()); + + return 1; + } + +const ZAMStmt ZAMCompiler::AddInst(const ZInstI& inst) + { + ZInstI* i; + + if ( pending_inst ) + { + i = pending_inst; + pending_inst = nullptr; + } + else + i = new ZInstI(); + + *i = inst; + + insts1.push_back(i); + + top_main_inst = insts1.size() - 1; + + if ( pending_global_store < 0 ) + return ZAMStmt(top_main_inst); + + auto global_slot = pending_global_store; + pending_global_store = -1; + + auto store_inst = ZInstI(OP_STORE_GLOBAL_V, global_slot); + store_inst.op_type = OP_V_I1; + store_inst.t = globalsI[global_slot].id->GetType(); + + return AddInst(store_inst); + } + +const Stmt* ZAMCompiler::LastStmt(const Stmt* s) const + { + if ( s->Tag() == STMT_LIST ) + { + auto sl = s->AsStmtList()->Stmts(); + return sl[sl.length() - 1]; + } + + else + return s; + } + +ZAMStmt ZAMCompiler::PrevStmt(const ZAMStmt s) + { + return ZAMStmt(s.stmt_num - 1); + } + + +} // zeek::detail diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc new file mode 100644 index 0000000000..f49b84cdeb --- /dev/null +++ b/src/script_opt/ZAM/Stmt.cc @@ -0,0 +1,1154 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for traversing Stmt AST nodes to generate ZAM code. + +#include "zeek/IPAddr.h" +#include "zeek/Reporter.h" +#include "zeek/ZeekString.h" +#include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/ZAM/Compile.h" + +namespace zeek::detail { + +const ZAMStmt ZAMCompiler::CompileStmt(const Stmt* s) + { + SetCurrStmt(s); + + switch ( s->Tag() ) { + case STMT_PRINT: + return CompilePrint(static_cast(s)); + + case STMT_EXPR: + return CompileExpr(static_cast(s)); + + case STMT_IF: + return CompileIf(static_cast(s)); + + case STMT_SWITCH: + return CompileSwitch(static_cast(s)); + + case STMT_ADD: + return CompileAdd(static_cast(s)); + + case STMT_DELETE: + return CompileDel(static_cast(s)); + + case STMT_EVENT: + { + auto es = static_cast(s); + auto e = static_cast(es->StmtExpr()); + return CompileExpr(e); + } + + case STMT_WHILE: + return CompileWhile(static_cast(s)); + + case STMT_FOR: + return CompileFor(static_cast(s)); + + case STMT_RETURN: + return CompileReturn(static_cast(s)); + + case STMT_CATCH_RETURN: + return CompileCatchReturn(static_cast(s)); + + case STMT_LIST: + return CompileStmts(static_cast(s)); + + case STMT_INIT: + return CompileInit(static_cast(s)); + + case STMT_NULL: + return EmptyStmt(); + + case STMT_WHEN: + return CompileWhen(static_cast(s)); + + case STMT_CHECK_ANY_LEN: + { + auto cs = static_cast(s); + auto n = cs->StmtExpr()->AsNameExpr(); + auto expected_len = cs->ExpectedLen(); + return CheckAnyLenVi(n, expected_len); + } + + case STMT_NEXT: + return CompileNext(); + + case STMT_BREAK: + return CompileBreak(); + + case STMT_FALLTHROUGH: + return CompileFallThrough(); + + default: + reporter->InternalError("bad statement type in ZAMCompile::CompileStmt"); + } + } + +const ZAMStmt ZAMCompiler::CompilePrint(const PrintStmt* ps) + { + auto& l = ps->ExprListPtr(); + + if ( l->Exprs().length() == 1 ) + { // special-case the common situation of printing just 1 item + auto e0 = l->Exprs()[0]; + if ( e0->Tag() == EXPR_NAME ) + return Print1V(e0->AsNameExpr()); + else + return Print1C(e0->AsConstExpr()); + } + + return PrintO(BuildVals(l)); + } + +const ZAMStmt ZAMCompiler::CompileExpr(const ExprStmt* es) + { + auto e = es->StmtExprPtr(); + + if ( e->Tag() == EXPR_CALL ) + return Call(es); + + if ( e->Tag() == EXPR_ASSIGN && e->GetOp2()->Tag() == EXPR_CALL ) + return AssignToCall(es); + + return CompileExpr(e); + } + +const ZAMStmt ZAMCompiler::CompileIf(const IfStmt* is) + { + auto e = is->StmtExprPtr(); + auto block1 = is->TrueBranch(); + auto block2 = is->FalseBranch(); + + if ( block1->Tag() == STMT_NULL ) + block1 = nullptr; + + if ( block2->Tag() == STMT_NULL ) + block2 = nullptr; + + if ( ! block1 && ! block2 ) + // No need to evaluate conditional as it ought to be + // side-effect free in reduced form. + return EmptyStmt(); + + if ( ! block1 ) + { + // See if we're able to invert the conditional. If not, + // then IfElse() will need to deal with inverting the test. + // But we try here first, since some conditionals blow + // up into zillions of different operators depending + // on the type of their operands, so it's much simpler to + // deal with them now. + if ( e->InvertSense() ) + { + block1 = block2; + block2 = nullptr; + } + } + + return IfElse(e.get(), block1, block2); + } + +const ZAMStmt ZAMCompiler::IfElse(const Expr* e, const Stmt* s1, const Stmt* s2) + { + ZAMStmt cond_stmt = EmptyStmt(); + int branch_v; + + if ( e->Tag() == EXPR_NAME ) + { + auto n = e->AsNameExpr(); + + ZOp op = (s1 && s2) ? OP_IF_ELSE_VV : + (s1 ? OP_IF_VV : OP_IF_NOT_VV); + + ZInstI cond(op, FrameSlot(n), 0); + cond_stmt = AddInst(cond); + branch_v = 2; + } + else + cond_stmt = GenCond(e, branch_v); + + if ( s1 ) + { + auto s1_end = CompileStmt(s1); + if ( s2 ) + { + auto branch_after_s1 = GoToStub(); + auto s2_end = CompileStmt(s2); + SetV(cond_stmt, GoToTargetBeyond(branch_after_s1), + branch_v); + SetGoTo(branch_after_s1, GoToTargetBeyond(s2_end)); + + return s2_end; + } + + else + { + SetV(cond_stmt, GoToTargetBeyond(s1_end), branch_v); + return s1_end; + } + } + + // Only the else clause is non-empty. + auto s2_end = CompileStmt(s2); + + // For complex conditionals, we need to invert their sense since + // we're switching to "if ( ! cond ) s2". + auto z = insts1[cond_stmt.stmt_num]; + + switch ( z->op ) { + case OP_IF_ELSE_VV: + case OP_IF_VV: + case OP_IF_NOT_VV: + // These are generated correctly above, no need + // to fix up. + break; + + case OP_HAS_FIELD_COND_VVV: + z->op = OP_NOT_HAS_FIELD_COND_VVV; + break; + case OP_NOT_HAS_FIELD_COND_VVV: + z->op = OP_HAS_FIELD_COND_VVV; + break; + + case OP_VAL_IS_IN_TABLE_COND_VVV: + z->op = OP_VAL_IS_NOT_IN_TABLE_COND_VVV; + break; + case OP_VAL_IS_NOT_IN_TABLE_COND_VVV: + z->op = OP_VAL_IS_IN_TABLE_COND_VVV; + break; + + case OP_CONST_IS_IN_TABLE_COND_VVC: + z->op = OP_CONST_IS_NOT_IN_TABLE_COND_VVC; + break; + case OP_CONST_IS_NOT_IN_TABLE_COND_VVC: + z->op = OP_CONST_IS_IN_TABLE_COND_VVC; + break; + + case OP_VAL2_IS_IN_TABLE_COND_VVVV: + z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVVV; + break; + case OP_VAL2_IS_NOT_IN_TABLE_COND_VVVV: + z->op = OP_VAL2_IS_IN_TABLE_COND_VVVV; + break; + + case OP_VAL2_IS_IN_TABLE_COND_VVVC: + z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVVC; + break; + case OP_VAL2_IS_NOT_IN_TABLE_COND_VVVC: + z->op = OP_VAL2_IS_IN_TABLE_COND_VVVC; + break; + + case OP_VAL2_IS_IN_TABLE_COND_VVCV: + z->op = OP_VAL2_IS_NOT_IN_TABLE_COND_VVCV; + break; + case OP_VAL2_IS_NOT_IN_TABLE_COND_VVCV: + z->op = OP_VAL2_IS_IN_TABLE_COND_VVCV; + break; + + default: + reporter->InternalError("inconsistency in ZAMCompiler::IfElse"); + } + + SetV(cond_stmt, GoToTargetBeyond(s2_end), branch_v); + return s2_end; + } + +const ZAMStmt ZAMCompiler::GenCond(const Expr* e, int& branch_v) + { + auto op1 = e->GetOp1(); + auto op2 = e->GetOp2(); + + NameExpr* n1 = nullptr; + NameExpr* n2 = nullptr; + ConstExpr* c = nullptr; + + if ( e->Tag() == EXPR_HAS_FIELD ) + { + auto hf = e->AsHasFieldExpr(); + auto z = GenInst(OP_HAS_FIELD_COND_VVV, op1->AsNameExpr(), + hf->Field()); + z.op_type = OP_VVV_I2_I3; + branch_v = 3; + return AddInst(z); + } + + if ( e->Tag() == EXPR_IN ) + { + auto op1 = e->GetOp1(); + auto op2 = e->GetOp2()->AsNameExpr(); + + // First, deal with the easy cases: it's a single index. + if ( op1->Tag() == EXPR_LIST ) + { + auto& ind = op1->AsListExpr()->Exprs(); + if ( ind.length() == 1 ) + op1 = {NewRef{}, ind[0]}; + } + + if ( op1->Tag() == EXPR_NAME ) + { + auto z = GenInst(OP_VAL_IS_IN_TABLE_COND_VVV, + op1->AsNameExpr(), op2, 0); + z.t = op1->GetType(); + branch_v = 3; + return AddInst(z); + } + + if ( op1->Tag() == EXPR_CONST ) + { + auto z = GenInst(OP_CONST_IS_IN_TABLE_COND_VVC, + op2, op1->AsConstExpr(), 0); + z.t = op1->GetType(); + branch_v = 2; + return AddInst(z); + } + + // Now the harder case: 2 indexes. (Any number here other + // than two should have been disallowed due to how we reduce + // conditional expressions.) + + auto& ind = op1->AsListExpr()->Exprs(); + ASSERT(ind.length() == 2); + + auto ind0 = ind[0]; + auto ind1 = ind[1]; + + auto name0 = ind0->Tag() == EXPR_NAME; + auto name1 = ind1->Tag() == EXPR_NAME; + + auto n0 = name0 ? ind0->AsNameExpr() : nullptr; + auto n1 = name1 ? ind1->AsNameExpr() : nullptr; + + auto c0 = name0 ? nullptr : ind0->AsConstExpr(); + auto c1 = name1 ? nullptr : ind1->AsConstExpr(); + + ZInstI z; + + if ( name0 && name1 ) + { + z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVVV, + n0, n1, op2, 0); + branch_v = 4; + z.t2 = n0->GetType(); + } + + else if ( name0 ) + { + z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVVC, + n0, op2, c1, 0); + branch_v = 3; + z.t2 = n0->GetType(); + } + + else if ( name1 ) + { + z = GenInst(OP_VAL2_IS_IN_TABLE_COND_VVCV, + n1, op2, c0, 0); + branch_v = 3; + z.t2 = n1->GetType(); + } + + else + { // Both are constants, assign first to temporary. + auto slot = TempForConst(c0); + + z = ZInstI(OP_VAL2_IS_IN_TABLE_COND_VVVC, + slot, FrameSlot(op2), 0, c1); + z.op_type = OP_VVVC_I3; + branch_v = 3; + z.t2 = c0->GetType(); + } + + return AddInst(z); + } + + if ( op1->Tag() == EXPR_NAME ) + { + n1 = op1->AsNameExpr(); + + if ( op2->Tag() == EXPR_NAME ) + n2 = op2->AsNameExpr(); + else + c = op2->AsConstExpr(); + } + + else + { + c = op1->AsConstExpr(); + n2 = op2->AsNameExpr(); + } + + if ( n1 && n2 ) + branch_v = 3; + else + branch_v = 2; + + switch ( e->Tag() ) { +#include "ZAM-Conds.h" + + default: + reporter->InternalError("bad expression type in ZAMCompiler::GenCond"); + } + + // Not reached. + } + +const ZAMStmt ZAMCompiler::CompileSwitch(const SwitchStmt* sw) + { + auto e = sw->StmtExpr(); + + auto n = e->Tag() == EXPR_NAME ? e->AsNameExpr() : nullptr; + auto c = e->Tag() == EXPR_CONST ? e->AsConstExpr() : nullptr; + + auto t = e->GetType()->Tag(); + + // Need to track a new set of contexts for "break" statements. + PushBreaks(); + + auto& cases = *sw->Cases(); + + if ( cases.length() > 0 && cases[0]->TypeCases() ) + return TypeSwitch(sw, n, c); + else + return ValueSwitch(sw, n, c); + } + +const ZAMStmt ZAMCompiler::ValueSwitch(const SwitchStmt* sw, const NameExpr* v, + const ConstExpr* c) + { + int slot = v ? FrameSlot(v) : -1; + + if ( c ) + // Weird to have a constant switch expression, enough + // so that it doesn't seem worth optimizing. + slot = TempForConst(c); + + ASSERT(slot >= 0); + + // Figure out which jump table we're using. + auto t = v ? v->GetType() : c->GetType(); + int tbl = 0; + ZOp op; + + switch ( t->InternalType() ) { + case TYPE_INTERNAL_INT: + op = OP_SWITCHI_VVV; + tbl = int_casesI.size(); + break; + + case TYPE_INTERNAL_UNSIGNED: + op = OP_SWITCHU_VVV; + tbl = uint_casesI.size(); + break; + + case TYPE_INTERNAL_DOUBLE: + op = OP_SWITCHD_VVV; + tbl = double_casesI.size(); + break; + + case TYPE_INTERNAL_STRING: + op = OP_SWITCHS_VVV; + tbl = str_casesI.size(); + break; + + case TYPE_INTERNAL_ADDR: + op = OP_SWITCHA_VVV; + tbl = str_casesI.size(); + break; + + case TYPE_INTERNAL_SUBNET: + op = OP_SWITCHN_VVV; + tbl = str_casesI.size(); + break; + + default: + reporter->InternalError("bad switch type"); + } + + // Add the "head", i.e., the execution of the jump table. + auto sw_head_op = ZInstI(op, slot, tbl, 0); + sw_head_op.op_type = OP_VVV_I2_I3; + + auto sw_head = AddInst(sw_head_op); + auto body_end = sw_head; + + // Generate each of the cases. + auto cases = sw->Cases(); + std::vector case_start; + + PushFallThroughs(); + for ( auto c : *cases ) + { + auto start = GoToTargetBeyond(body_end); + ResolveFallThroughs(start); + case_start.push_back(start); + PushFallThroughs(); + body_end = CompileStmt(c->Body()); + } + + auto sw_end = GoToTargetBeyond(body_end); + ResolveFallThroughs(sw_end); + ResolveBreaks(sw_end); + + int def_ind = sw->DefaultCaseIndex(); + if ( def_ind >= 0 ) + SetV3(sw_head, case_start[def_ind]); + else + SetV3(sw_head, sw_end); + + // Now fill out the corresponding jump table. + // + // We will only use one of these. + CaseMapI new_int_cases; + CaseMapI new_uint_cases; + CaseMapI new_double_cases; + CaseMapI new_str_cases; + + for ( auto [cv, index] : sw->ValueMap() ) + { + auto case_body_start = case_start[index]; + + switch ( cv->GetType()->InternalType() ) { + case TYPE_INTERNAL_INT: + new_int_cases[cv->InternalInt()] = case_body_start; + break; + + case TYPE_INTERNAL_UNSIGNED: + new_uint_cases[cv->InternalUnsigned()] = case_body_start; + break; + + case TYPE_INTERNAL_DOUBLE: + new_double_cases[cv->InternalDouble()] = case_body_start; + break; + + case TYPE_INTERNAL_STRING: + { + // This leaks, but only statically so not worth + // tracking the value for ultimate deletion. + auto sv = cv->AsString()->Render(); + std::string s(sv); + new_str_cases[s] = case_body_start; + break; + } + + case TYPE_INTERNAL_ADDR: + { + auto a = cv->AsAddr().AsString(); + new_str_cases[a] = case_body_start; + break; + } + + case TYPE_INTERNAL_SUBNET: + { + auto n = cv->AsSubNet().AsString(); + new_str_cases[n] = case_body_start; + break; + } + + default: + reporter->InternalError("bad recovered type when compiling switch"); + } + } + + // Now add the jump table to the set we're keeping for the + // corresponding type. + + switch ( t->InternalType() ) { + case TYPE_INTERNAL_INT: + int_casesI.push_back(new_int_cases); + break; + + case TYPE_INTERNAL_UNSIGNED: + uint_casesI.push_back(new_uint_cases); + break; + + case TYPE_INTERNAL_DOUBLE: + double_casesI.push_back(new_double_cases); + break; + + case TYPE_INTERNAL_STRING: + case TYPE_INTERNAL_ADDR: + case TYPE_INTERNAL_SUBNET: + str_casesI.push_back(new_str_cases); + break; + + default: + reporter->InternalError("bad switch type"); + } + + return body_end; + } + +const ZAMStmt ZAMCompiler::TypeSwitch(const SwitchStmt* sw, const NameExpr* v, + const ConstExpr* c) + { + auto cases = sw->Cases(); + auto type_map = sw->TypeMap(); + + auto body_end = EmptyStmt(); + + auto tmp = NewSlot(true); // true since we know "any" is managed + + int slot = v ? FrameSlot(v) : 0; + + if ( v && v->GetType()->Tag() != TYPE_ANY ) + { + auto z = ZInstI(OP_ASSIGN_ANY_VV, tmp, slot); + body_end = AddInst(z); + slot = tmp; + } + + if ( c ) + { + auto z = ZInstI(OP_ASSIGN_ANY_VC, tmp, c); + body_end = AddInst(z); + slot = tmp; + } + + int def_ind = sw->DefaultCaseIndex(); + ZAMStmt def_succ(0); // successor to default, if any + bool saw_def_succ = false; // whether def_succ is meaningful + + PushFallThroughs(); + for ( auto& i : *type_map ) + { + auto id = i.first; + auto type = id->GetType(); + + ZInstI z; + + z = ZInstI(OP_BRANCH_IF_NOT_TYPE_VV, slot, 0); + z.SetType(type); + auto case_test = AddInst(z); + + // Type cases that don't use "as" create a placeholder + // ID with a null name. + if ( id->Name() ) + { + int id_slot = Frame1Slot(id, OP_CAST_ANY_VV); + z = ZInstI(OP_CAST_ANY_VV, id_slot, slot); + z.SetType(type); + body_end = AddInst(z); + } + else + body_end = case_test; + + ResolveFallThroughs(GoToTargetBeyond(body_end)); + body_end = CompileStmt((*cases)[i.second]->Body()); + SetV2(case_test, GoToTargetBeyond(body_end)); + + if ( def_ind >= 0 && i.second == def_ind + 1 ) + { + def_succ = case_test; + saw_def_succ = true; + } + + PushFallThroughs(); + } + + ResolveFallThroughs(GoToTargetBeyond(body_end)); + + if ( def_ind >= 0 ) + { + PushFallThroughs(); + + body_end = CompileStmt((*sw->Cases())[def_ind]->Body()); + + // Now resolve any fallthrough's in the default. + if ( saw_def_succ ) + ResolveFallThroughs(GoToTargetBeyond(def_succ)); + else + ResolveFallThroughs(GoToTargetBeyond(body_end)); + } + + ResolveBreaks(GoToTargetBeyond(body_end)); + + return body_end; + } + +const ZAMStmt ZAMCompiler::CompileAdd(const AddStmt* as) + { + auto e = as->StmtExprPtr(); + auto aggr = e->GetOp1()->AsNameExpr(); + auto index_list = e->GetOp2(); + + if ( index_list->Tag() != EXPR_LIST ) + reporter->InternalError("non-list in \"add\""); + + auto indices = index_list->AsListExprPtr(); + auto& exprs = indices->Exprs(); + + if ( exprs.length() == 1 ) + { + auto e1 = exprs[0]; + if ( e1->Tag() == EXPR_NAME ) + return AddStmt1VV(aggr, e1->AsNameExpr()); + else + return AddStmt1VC(aggr, e1->AsConstExpr()); + } + + return AddStmtVO(aggr, BuildVals(indices)); + } + +const ZAMStmt ZAMCompiler::CompileDel(const DelStmt* ds) + { + auto e = ds->StmtExprPtr(); + auto aggr = e->GetOp1()->AsNameExpr(); + + if ( e->Tag() == EXPR_FIELD ) + { + int field = e->AsFieldExpr()->Field(); + return DelFieldVi(aggr, field); + } + + auto index_list = e->GetOp2(); + + if ( index_list->Tag() != EXPR_LIST ) + reporter->InternalError("non-list in \"delete\""); + + auto internal_ind = BuildVals(index_list->AsListExprPtr()); + + return DelTableVO(aggr, internal_ind); + } + +const ZAMStmt ZAMCompiler::CompileWhile(const WhileStmt* ws) + { + auto loop_condition = ws->Condition(); + + if ( loop_condition->Tag() == EXPR_CONST ) + { + if ( loop_condition->IsZero() ) + return EmptyStmt(); + else + return Loop(ws->Body().get()); + } + + auto cond_pred = ws->CondPredStmt(); + + return While(cond_pred.get(), loop_condition.get(), ws->Body().get()); + } + +const ZAMStmt ZAMCompiler::While(const Stmt* cond_stmt, const Expr* cond, + const Stmt* body) + { + auto head = StartingBlock(); + + if ( cond_stmt ) + (void) CompileStmt(cond_stmt); + + ZAMStmt cond_IF = EmptyStmt(); + int branch_v; + + if ( cond->Tag() == EXPR_NAME ) + { + auto n = cond->AsNameExpr(); + cond_IF = AddInst(ZInstI(OP_IF_VV, FrameSlot(n), 0)); + branch_v = 2; + } + else + cond_IF = GenCond(cond, branch_v); + + PushNexts(); + PushBreaks(); + + if ( body && body->Tag() != STMT_NULL ) + (void) CompileStmt(body); + + auto tail = GoTo(GoToTarget(head)); + + auto beyond_tail = GoToTargetBeyond(tail); + SetV(cond_IF, beyond_tail, branch_v); + + ResolveNexts(GoToTarget(head)); + ResolveBreaks(beyond_tail); + + return tail; + } + +const ZAMStmt ZAMCompiler::CompileFor(const ForStmt* f) + { + auto e = f->LoopExpr(); + auto val = e->Tag() == EXPR_NAME ? e->AsNameExpr() : nullptr; + auto et = e->GetType()->Tag(); + + PushNexts(); + PushBreaks(); + + if ( et == TYPE_TABLE ) + return LoopOverTable(f, val); + + else if ( et == TYPE_VECTOR ) + return LoopOverVector(f, val); + + else if ( et == TYPE_STRING ) + return LoopOverString(f, e); + + else + reporter->InternalError("bad \"for\" loop-over value when compiling"); + } + +const ZAMStmt ZAMCompiler::LoopOverTable(const ForStmt* f, const NameExpr* val) + { + auto loop_vars = f->LoopVars(); + auto value_var = f->ValueVar(); + auto body = f->LoopBody(); + + // Check whether the loop variables are actually used in the body. + // This is motivated by an idiom where there's both loop_vars and + // a value_var, but the script only actually needs the value_var; + // and also some weird cases where the script is managing a + // separate iteration process manually. + ProfileFunc body_pf(body); + + int num_unused = 0; + + auto aux = new ZInstAux(0); + + for ( int i = 0; i < loop_vars->length(); ++i ) + { + auto id = (*loop_vars)[i]; + + if ( body_pf.Locals().count(id) == 0 ) + ++num_unused; + + aux->loop_vars.push_back(FrameSlot(id)); + aux->loop_var_types.push_back(id->GetType()); + } + + bool no_loop_vars = (num_unused == loop_vars->length()); + + if ( value_var && body_pf.Locals().count(value_var.get()) == 0 ) + // This is more clearly a coding botch - someone left in + // an unnecessary value_var variable. But might as + // well not do the work. + value_var = nullptr; + + if ( value_var ) + aux->value_var_type = value_var->GetType(); + + auto iter_slot = table_iters.size(); + table_iters.emplace_back(TableIterInfo()); + + auto z = ZInstI(OP_INIT_TABLE_LOOP_VV, FrameSlot(val), iter_slot); + z.op_type = OP_VV_I2; + z.SetType(value_var ? value_var->GetType() : nullptr); + z.aux = aux; + + auto init_end = AddInst(z); + auto iter_head = StartingBlock(); + + if ( value_var ) + { + ZOp op = no_loop_vars ? OP_NEXT_TABLE_ITER_VAL_VAR_NO_VARS_VVV : + OP_NEXT_TABLE_ITER_VAL_VAR_VVV; + z = ZInstI(op, FrameSlot(value_var), iter_slot, 0); + z.CheckIfManaged(value_var->GetType()); + z.op_type = OP_VVV_I2_I3; + } + else + { + ZOp op = no_loop_vars ? OP_NEXT_TABLE_ITER_NO_VARS_VV : + OP_NEXT_TABLE_ITER_VV; + z = ZInstI(op, iter_slot, 0); + z.op_type = OP_VV_I1_I2; + } + + z.aux = aux; // so ZOpt.cc can get to it + + return FinishLoop(iter_head, z, body, iter_slot, true); + } + +const ZAMStmt ZAMCompiler::LoopOverVector(const ForStmt* f, const NameExpr* val) + { + auto loop_vars = f->LoopVars(); + auto loop_var = (*loop_vars)[0]; + + int iter_slot = num_step_iters++; + + auto z = ZInstI(OP_INIT_VECTOR_LOOP_VV, FrameSlot(val), iter_slot); + z.op_type = OP_VV_I2; + + auto init_end = AddInst(z); + auto iter_head = StartingBlock(); + + z = ZInstI(OP_NEXT_VECTOR_ITER_VVV, FrameSlot(loop_var), iter_slot, 0); + z.op_type = OP_VVV_I2_I3; + + return FinishLoop(iter_head, z, f->LoopBody(), iter_slot, false); + } + +const ZAMStmt ZAMCompiler::LoopOverString(const ForStmt* f, const Expr* e) + { + auto n = e->Tag() == EXPR_NAME ? e->AsNameExpr() : nullptr; + auto c = e->Tag() == EXPR_CONST ? e->AsConstExpr() : nullptr; + auto loop_vars = f->LoopVars(); + auto loop_var = (*loop_vars)[0]; + + int iter_slot = num_step_iters++; + + ZInstI z; + + if ( n ) + { + z = ZInstI(OP_INIT_STRING_LOOP_VV, FrameSlot(n), iter_slot); + z.op_type = OP_VV_I2; + } + else + { + z = ZInstI(OP_INIT_STRING_LOOP_VC, iter_slot, c); + z.op_type = OP_VC_I1; + } + + auto init_end = AddInst(z); + auto iter_head = StartingBlock(); + + z = ZInstI(OP_NEXT_STRING_ITER_VVV, FrameSlot(loop_var), iter_slot, 0); + z.is_managed = true; + z.op_type = OP_VVV_I2_I3; + + return FinishLoop(iter_head, z, f->LoopBody(), iter_slot, false); + } + +const ZAMStmt ZAMCompiler::Loop(const Stmt* body) + { + PushNexts(); + PushBreaks(); + + auto head = StartingBlock(); + (void) CompileStmt(body); + auto tail = GoTo(GoToTarget(head)); + + ResolveNexts(GoToTarget(head)); + ResolveBreaks(GoToTargetBeyond(tail)); + + return tail; + } + +const ZAMStmt ZAMCompiler::FinishLoop(const ZAMStmt iter_head, ZInstI iter_stmt, + const Stmt* body, int iter_slot, + bool is_table) + { + auto loop_iter = AddInst(iter_stmt); + auto body_end = CompileStmt(body); + + // We only need cleanup for looping over tables, but for now we + // need some sort of placeholder instruction (until the optimizer + // can elide it) to resolve loop exits. + ZOp op = is_table ? OP_END_TABLE_LOOP_V : OP_NOP; + + auto loop_end = GoTo(GoToTarget(iter_head)); + auto z = ZInstI(op, iter_slot); + z.op_type = is_table ? OP_V_I1 : OP_X; + auto final_stmt = AddInst(z); + + auto ot = iter_stmt.op_type; + if ( ot == OP_VVV_I3 || ot == OP_VVV_I2_I3) + SetV3(loop_iter, GoToTarget(final_stmt)); + else + SetV2(loop_iter, GoToTarget(final_stmt)); + + ResolveNexts(GoToTarget(iter_head)); + ResolveBreaks(GoToTarget(final_stmt)); + + return final_stmt; + } + +const ZAMStmt ZAMCompiler::CompileReturn(const ReturnStmt* r) + { + auto e = r->StmtExpr(); + + if ( retvars.size() == 0 ) + { // a "true" return + if ( e ) + { + if ( e->Tag() == EXPR_NAME ) + return ReturnV(e->AsNameExpr()); + else + return ReturnC(e->AsConstExpr()); + } + + else + return ReturnX(); + } + + auto rv = retvars.back(); + if ( e && ! rv ) + reporter->InternalError("unexpected returned value inside inlined block"); + if ( ! e && rv ) + reporter->InternalError("expected returned value inside inlined block but none provider"); + + if ( e ) + { + if ( e->Tag() == EXPR_NAME ) + (void) AssignVV(rv, e->AsNameExpr()); + else + (void) AssignVC(rv, e->AsConstExpr()); + } + + return CompileCatchReturn(); + } + +const ZAMStmt ZAMCompiler::CompileCatchReturn(const CatchReturnStmt* cr) + { + retvars.push_back(cr->RetVar()); + + PushCatchReturns(); + + auto block = cr->Block(); + auto block_end = CompileStmt(block); + retvars.pop_back(); + + ResolveCatchReturns(GoToTargetBeyond(block_end)); + + // If control flow runs off the end of the block, then we need + // to consider sync'ing globals at that point. + auto block_last = LastStmt(block.get()); + + if ( block_last->Tag() == STMT_RETURN ) + return block_end; + + return top_main_inst; + } + +const ZAMStmt ZAMCompiler::CompileStmts(const StmtList* ws) + { + auto start = StartingBlock(); + + for ( const auto& stmt : ws->Stmts() ) + CompileStmt(stmt); + + return FinishBlock(start); + } + +const ZAMStmt ZAMCompiler::CompileInit(const InitStmt* is) + { + auto last = EmptyStmt(); + + for ( const auto& aggr : is->Inits() ) + { + if ( IsUnused(aggr, is) ) + continue; + + auto& t = aggr->GetType(); + + switch ( t->Tag() ) { + case TYPE_RECORD: + last = InitRecord(aggr, t->AsRecordType()); + break; + + case TYPE_VECTOR: + last = InitVector(aggr, t->AsVectorType()); + break; + + case TYPE_TABLE: + last = InitTable(aggr, t->AsTableType(), + aggr->GetAttrs().get()); + break; + + default: + break; + } + } + + return last; + } + +const ZAMStmt ZAMCompiler::InitRecord(IDPtr id, RecordType* rt) + { + auto z = ZInstI(OP_INIT_RECORD_V, FrameSlot(id)); + z.SetType({NewRef{}, rt}); + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::InitVector(IDPtr id, VectorType* vt) + { + auto z = ZInstI(OP_INIT_VECTOR_V, FrameSlot(id)); + z.SetType({NewRef{}, vt}); + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::InitTable(IDPtr id, TableType* tt, Attributes* attrs) + { + auto z = ZInstI(OP_INIT_TABLE_V, FrameSlot(id)); + z.SetType({NewRef{}, tt}); + z.attrs = {NewRef{}, attrs}; + return AddInst(z); + } + +const ZAMStmt ZAMCompiler::CompileWhen(const WhenStmt* ws) + { + auto cond = ws->Cond(); + auto body = ws->Body(); + auto timeout = ws->TimeoutExpr(); + auto timeout_body = ws->TimeoutBody(); + auto is_return = ws->IsReturn(); + + ZInstI z; + + if ( timeout ) + { + // Note, we fill in is_return by hand since it's already + // an int_val, doesn't need translation. + if ( timeout->Tag() == EXPR_CONST ) + { + z = GenInst(OP_WHEN_VVVC, timeout->AsConstExpr()); + z.op_type = OP_VVVC_I1_I2_I3; + z.v3 = is_return; + } + else + { + z = GenInst(OP_WHEN_VVVV, timeout->AsNameExpr()); + z.op_type = OP_VVVV_I2_I3_I4; + z.v4 = is_return; + } + } + + else + { + z = GenInst(OP_WHEN_VV); + z.op_type = OP_VV_I1_I2; + z.v1 = is_return; + } + + z.e = cond; + + auto when_eval = AddInst(z); + + auto branch_past_blocks = GoToStub(); + + auto when_body = CompileStmt(body); + auto when_done = ReturnX(); + + if ( timeout ) + { + auto t_body = CompileStmt(timeout_body); + auto t_done = ReturnX(); + + if ( timeout->Tag() == EXPR_CONST ) + { + SetV1(when_eval, GoToTargetBeyond(branch_past_blocks)); + SetV2(when_eval, GoToTargetBeyond(when_done)); + } + else + { + SetV2(when_eval, GoToTargetBeyond(branch_past_blocks)); + SetV3(when_eval, GoToTargetBeyond(when_done)); + } + + SetGoTo(branch_past_blocks, GoToTargetBeyond(t_done)); + + return t_done; + } + + else + { + SetV2(when_eval, GoToTargetBeyond(branch_past_blocks)); + SetGoTo(branch_past_blocks, GoToTargetBeyond(when_done)); + + return when_done; + } + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/Support.cc b/src/script_opt/ZAM/Support.cc new file mode 100644 index 0000000000..69f7b36282 --- /dev/null +++ b/src/script_opt/ZAM/Support.cc @@ -0,0 +1,106 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Low-level support utilities/globals for ZAM compilation. + +#include "zeek/Reporter.h" +#include "zeek/Desc.h" +#include "zeek/ZeekString.h" +#include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/ZAM/Support.h" + +namespace zeek::detail { + +const Stmt* curr_stmt; +TypePtr log_ID_enum_type; +TypePtr any_base_type; +bool ZAM_error = false; + +bool is_ZAM_compilable(const ProfileFunc* pf, const char** reason) + { + if ( pf->NumLambdas() > 0 ) + { + if ( reason ) + *reason = "use of lambda"; + return false; + } + + if ( pf->NumWhenStmts() > 0 ) + { + if ( reason ) + *reason = "use of \"when\""; + return false; + } + + return true; + } + +bool IsAny(const Type* t) + { + return t->Tag() == TYPE_ANY; + } + + +StringVal* ZAM_to_lower(const StringVal* sv) + { + auto bs = sv->AsString(); + const u_char* s = bs->Bytes(); + int n = bs->Len(); + u_char* lower_s = new u_char[n + 1]; + u_char* ls = lower_s; + + for ( int i = 0; i < n; ++i ) + { + if ( isascii(s[i]) && isupper(s[i]) ) + *ls++ = tolower(s[i]); + else + *ls++ = s[i]; + } + + *ls++ = '\0'; + + return new StringVal(new String(1, lower_s, n)); + } + +StringVal* ZAM_sub_bytes(const StringVal* s, bro_uint_t start, bro_int_t n) + { + if ( start > 0 ) + --start; // make it 0-based + + auto ss = s->AsString()->GetSubstring(start, n); + + return new StringVal(ss ? ss : new String("")); + } + +void ZAM_run_time_error(const char* msg) + { + fprintf(stderr, "%s\n", msg); + ZAM_error = true; + } + +void ZAM_run_time_error(const Location* loc, const char* msg) + { + reporter->RuntimeError(loc, "%s", msg); + ZAM_error = true; + } + +void ZAM_run_time_error(const char* msg, const Obj* o) + { + fprintf(stderr, "%s: %s\n", msg, obj_desc(o).c_str()); + ZAM_error = true; + } + +void ZAM_run_time_error(const Location* loc, const char* msg, const Obj* o) + { + reporter->RuntimeError(loc, "%s (%s)", msg, obj_desc(o).c_str()); + ZAM_error = true; + } + +void ZAM_run_time_warning(const Location* loc, const char* msg) + { + ODesc d; + loc->Describe(&d); + + reporter->Warning("%s: %s", d.Description(), msg); + } + +} // namespace zeek::detail diff --git a/src/script_opt/ZAM/Support.h b/src/script_opt/ZAM/Support.h new file mode 100644 index 0000000000..50f3981057 --- /dev/null +++ b/src/script_opt/ZAM/Support.h @@ -0,0 +1,53 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Low-level support utilities/globals for ZAM compilation. + +#pragma once + +#include "zeek/Expr.h" +#include "zeek/Stmt.h" + +namespace zeek::detail { + +typedef std::vector val_vec; + +// The (reduced) statement currently being compiled. Used for both +// tracking "use" and "reaching" definitions, and for error messages. +extern const Stmt* curr_stmt; + +// True if a function with the given profile can be compiled to ZAM. +// If not, returns the reason in *reason, if non-nil. +class ProfileFunc; +extern bool is_ZAM_compilable(const ProfileFunc* pf, + const char** reason = nullptr); + +// True if a given type is one that we treat internally as an "any" type. +extern bool IsAny(const Type* t); + +// Convenience functions for getting to these. +inline bool IsAny(const TypePtr& t) { return IsAny(t.get()); } +inline bool IsAny(const Expr* e) { return IsAny(e->GetType()); } + + +// Needed for the logging built-in. Exported so that ZAM can make sure it's +// defined when compiling. +extern TypePtr log_ID_enum_type; + +// Needed for a slight performance gain when dealing with "any" types. +extern TypePtr any_base_type; + +extern void ZAM_run_time_error(const char* msg); +extern void ZAM_run_time_error(const Location* loc, const char* msg); +extern void ZAM_run_time_error(const Location* loc, const char* msg, + const Obj* o); +extern void ZAM_run_time_error(const Stmt* stmt, const char* msg); +extern void ZAM_run_time_error(const char* msg, const Obj* o); + +extern bool ZAM_error; + +extern void ZAM_run_time_warning(const Location* loc, const char* msg); + +extern StringVal* ZAM_to_lower(const StringVal* sv); +extern StringVal* ZAM_sub_bytes(const StringVal* s, bro_uint_t start, bro_int_t n); + +} // namespace zeek::detail diff --git a/src/script_opt/ZAM/Vars.cc b/src/script_opt/ZAM/Vars.cc new file mode 100644 index 0000000000..e91d3feccc --- /dev/null +++ b/src/script_opt/ZAM/Vars.cc @@ -0,0 +1,160 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Methods for dealing with variables (both ZAM and script-level). + +#include "zeek/Reporter.h" +#include "zeek/Desc.h" +#include "zeek/script_opt/ProfileFunc.h" +#include "zeek/script_opt/Reduce.h" +#include "zeek/script_opt/ZAM/Compile.h" + +namespace zeek::detail { + + +bool ZAMCompiler::IsUnused(const IDPtr& id, const Stmt* where) const + { + if ( ! ud->HasUsage(where) ) + return true; + + auto usage = ud->GetUsage(where); + + // "usage" can be nil if due to constant propagation we've prune + // all of the uses of the given identifier. + + return ! usage || ! usage->HasID(id.get()); + } + +void ZAMCompiler::LoadParam(ID* id) + { + if ( id->IsType() ) + reporter->InternalError("don't know how to compile local variable that's a type not a value"); + + bool is_any = IsAny(id->GetType()); + + ZOp op; + + op = AssignmentFlavor(OP_LOAD_VAL_VV, id->GetType()->Tag()); + + int slot = AddToFrame(id); + + ZInstI z(op, slot, id->Offset()); + z.SetType(id->GetType()); + z.op_type = OP_VV_FRAME; + + (void) AddInst(z); + } + +const ZAMStmt ZAMCompiler::LoadGlobal(ID* id) + { + ZOp op; + + if ( id->IsType() ) + // Need a special load for these, as they don't fit + // with the usual template. + op = OP_LOAD_GLOBAL_TYPE_VV; + else + op = AssignmentFlavor(OP_LOAD_GLOBAL_VV, id->GetType()->Tag()); + + auto slot = RawSlot(id); + + ZInstI z(op, slot, global_id_to_info[id]); + z.SetType(id->GetType()); + z.op_type = OP_VV_I2; + + // We use the id_val for reporting used-but-not-set errors. + z.aux = new ZInstAux(0); + z.aux->id_val = id; + + return AddInst(z); + } + +int ZAMCompiler::AddToFrame(ID* id) + { + frame_layout1[id] = frame_sizeI; + frame_denizens.push_back(id); + return frame_sizeI++; + } + +int ZAMCompiler::FrameSlot(const ID* id) + { + auto slot = RawSlot(id); + + if ( id->IsGlobal() ) + (void) LoadGlobal(frame_denizens[slot]); + + return slot; + } + +int ZAMCompiler::Frame1Slot(const ID* id, ZAMOp1Flavor fl) + { + auto slot = RawSlot(id); + + switch ( fl ) { + case OP1_READ: + if ( id->IsGlobal() ) + (void) LoadGlobal(frame_denizens[slot]); + break; + + case OP1_WRITE: + if ( id->IsGlobal() ) + pending_global_store = global_id_to_info[id]; + break; + + case OP1_READ_WRITE: + if ( id->IsGlobal() ) + { + (void) LoadGlobal(frame_denizens[slot]); + pending_global_store = global_id_to_info[id]; + } + break; + + case OP1_INTERNAL: + break; + } + + return slot; + } + +int ZAMCompiler::RawSlot(const ID* id) + { + auto id_slot = frame_layout1.find(id); + + if ( id_slot == frame_layout1.end() ) + reporter->InternalError("ID %s missing from frame layout", id->Name()); + + return id_slot->second; + } + +bool ZAMCompiler::HasFrameSlot(const ID* id) const + { + return frame_layout1.find(id) != frame_layout1.end(); + } + +int ZAMCompiler::NewSlot(bool is_managed) + { + char buf[8192]; + snprintf(buf, sizeof buf, "#internal-%d#", frame_sizeI); + + // In the following, all that matters is that for managed types + // we pick a tag that will be viewed as managed, and vice versa. + + auto tag = is_managed ? TYPE_TABLE : TYPE_VOID; + + auto internal_reg = new ID(buf, SCOPE_FUNCTION, false); + internal_reg->SetType(base_type(tag)); + + return AddToFrame(internal_reg); + } + +int ZAMCompiler::TempForConst(const ConstExpr* c) + { + auto slot = NewSlot(c->GetType()); + + auto z = ZInstI(OP_ASSIGN_CONST_VC, slot, c); + z.CheckIfManaged(c->GetType()); + (void) AddInst(z); + + return slot; + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/ZBody.cc b/src/script_opt/ZAM/ZBody.cc new file mode 100644 index 0000000000..af1e16ba00 --- /dev/null +++ b/src/script_opt/ZAM/ZBody.cc @@ -0,0 +1,539 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/Desc.h" +#include "zeek/RE.h" +#include "zeek/Frame.h" +#include "zeek/EventHandler.h" +#include "zeek/Trigger.h" +#include "zeek/Traverse.h" +#include "zeek/Reporter.h" +#include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/ZAM/Compile.h" + +// Needed for managing the corresponding values. +#include "zeek/File.h" +#include "zeek/Func.h" +#include "zeek/OpaqueVal.h" + +// Just needed for BiFs. +#include "zeek/analyzer/Manager.h" +#include "zeek/broker/Manager.h" +#include "zeek/file_analysis/Manager.h" +#include "zeek/logging/Manager.h" + + +namespace zeek::detail { + +using std::vector; + +static bool did_init = false; + +// Count of how often each type of ZOP executed, and how much CPU it +// cumulatively took. +int ZOP_count[OP_NOP+1]; +double ZOP_CPU[OP_NOP+1]; + + +void report_ZOP_profile() + { + for ( int i = 1; i <= OP_NOP; ++i ) + if ( ZOP_count[i] > 0 ) + printf("%s\t%d\t%.06f\n", ZOP_name(ZOp(i)), + ZOP_count[i], ZOP_CPU[i]); + } + + +// Sets the given element to a copy of an existing (not newly constructed) +// ZVal, including underlying memory management. Returns false if the +// assigned value was missing (which we can only tell for managed types), +// true otherwise. + +static bool copy_vec_elem(VectorVal* vv, int ind, ZVal zv, const TypePtr& t) + { + if ( vv->Size() <= ind ) + vv->Resize(ind + 1); + + auto& elem = (*vv->RawVec())[ind]; + + if ( ! ZVal::IsManagedType(t) ) + { + elem = zv; + return true; + } + + if ( elem ) + ZVal::DeleteManagedType(*elem); + + elem = zv; + auto managed_elem = elem->ManagedVal(); + + if ( ! managed_elem ) + { + elem = std::nullopt; + return false; + } + + zeek::Ref(managed_elem); + return true; + } + +// Unary vector operations never work on managed types, so no need +// to pass in the type ... However, the RHS, which normally would +// be const, needs to be non-const so we can use its Type() method +// to get at a shareable VectorType. +static void vec_exec(ZOp op, VectorVal*& v1, VectorVal* v2, const ZInst& z); + +// Binary operations *can* have managed types (strings). +static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1, VectorVal* v2, + const VectorVal* v3, const ZInst& z); + +// Vector coercion. +// +// ### Should check for underflow/overflow. +#define VEC_COERCE(tag, lhs_type, cast, rhs_accessor) \ + static VectorVal* vec_coerce_##tag(VectorVal* vec) \ + { \ + auto& v = *vec->RawVec(); \ + auto yt = make_intrusive(base_type(lhs_type)); \ + auto res_zv = new VectorVal(yt); \ + auto n = v.size(); \ + res_zv->Resize(n); \ + auto& res = *res_zv->RawVec(); \ + for ( auto i = 0U; i < n; ++i ) \ + if ( v[i] ) \ + res[i] = ZVal(cast((*v[i]).rhs_accessor)); \ + else \ + res[i] = std::nullopt; \ + return res_zv; \ + } + +VEC_COERCE(IU, TYPE_INT, bro_int_t, AsCount()) +VEC_COERCE(ID, TYPE_INT, bro_int_t, AsDouble()) +VEC_COERCE(UI, TYPE_COUNT, bro_int_t, AsInt()) +VEC_COERCE(UD, TYPE_COUNT, bro_uint_t, AsDouble()) +VEC_COERCE(DI, TYPE_DOUBLE, double, AsInt()) +VEC_COERCE(DU, TYPE_DOUBLE, double, AsCount()) + +double curr_CPU_time() + { + struct timespec ts; + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); + return double(ts.tv_sec) + double(ts.tv_nsec) / 1e9; + } + + +ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc) +: Stmt(STMT_ZAM) + { + func_name = _func_name; + + frame_denizens = zc->FrameDenizens(); + frame_size = frame_denizens.size(); + + // Concretize the names of the frame denizens. + for ( auto& f : frame_denizens ) + for ( auto i = 0U; i < f.ids.size(); ++i ) + f.names.push_back(f.ids[i]->Name()); + + managed_slots = zc->ManagedSlots(); + + globals = zc->Globals(); + num_globals = globals.size(); + + int_cases = zc->GetCases(); + uint_cases = zc->GetCases(); + double_cases = zc->GetCases(); + str_cases = zc->GetCases(); + + if ( zc->NonRecursive() ) + { + fixed_frame = new ZVal[frame_size]; + + for ( auto i = 0U; i < managed_slots.size(); ++i ) + fixed_frame[managed_slots[i]].ClearManagedVal(); + } + + table_iters = zc->GetTableIters(); + num_step_iters = zc->NumStepIters(); + + // It's a little weird doing this in the constructor, but unless + // we add a general "initialize for ZAM" function, this is as good + // a place as any. + if ( ! did_init ) + { + auto log_ID_type = lookup_ID("ID", "Log"); + ASSERT(log_ID_type); + log_ID_enum_type = log_ID_type->GetType(); + + any_base_type = base_type(TYPE_ANY); + + ZVal::SetZValNilStatusAddr(&ZAM_error); + + did_init = false; + } + } + +ZBody::~ZBody() + { + delete[] fixed_frame; + delete[] insts; + delete inst_count; + delete CPU_time; + } + +void ZBody::SetInsts(vector& _insts) + { + ninst = _insts.size(); + auto insts_copy = new ZInst[ninst]; + + for ( auto i = 0U; i < ninst; ++i ) + insts_copy[i] = *_insts[i]; + + insts = insts_copy; + + InitProfile(); + } + +void ZBody::SetInsts(vector& instsI) + { + ninst = instsI.size(); + auto insts_copy = new ZInst[ninst]; + + for ( auto i = 0U; i < ninst; ++i ) + { + auto& iI = *instsI[i]; + insts_copy[i] = iI; + if ( iI.stmt ) + insts_copy[i].loc = iI.stmt->Original()->GetLocationInfo(); + } + + insts = insts_copy; + + InitProfile(); + } + +void ZBody::InitProfile() + { + if ( analysis_options.profile_ZAM ) + { + inst_count = new vector; + inst_CPU = new vector; + for ( auto i = 0U; i < ninst; ++i ) + { + inst_count->push_back(0); + inst_CPU->push_back(0.0); + } + + CPU_time = new double; + *CPU_time = 0.0; + } + } + +ValPtr ZBody::Exec(Frame* f, StmtFlowType& flow) + { +#ifdef DEBUG + double t = analysis_options.profile_ZAM ? curr_CPU_time() : 0.0; +#endif + + auto val = DoExec(f, 0, flow); + +#ifdef DEBUG + if ( analysis_options.profile_ZAM ) + *CPU_time += curr_CPU_time() - t; +#endif + + return val; + } + +ValPtr ZBody::DoExec(Frame* f, int start_pc, StmtFlowType& flow) + { + int pc = start_pc; + const int end_pc = ninst; + + // Return value, or nil if none. + const ZVal* ret_u; + + // Type of the return value. If nil, then we don't have a value. + TypePtr ret_type; + +#ifdef DEBUG + bool do_profile = analysis_options.profile_ZAM; +#endif + + ZVal* frame; + std::unique_ptr local_table_iters; + std::vector step_iters(num_step_iters); + + if ( fixed_frame ) + frame = fixed_frame; + else + { + frame = new ZVal[frame_size]; + // Clear slots for which we do explicit memory management. + for ( auto s : managed_slots ) + frame[s].ClearManagedVal(); + + if ( table_iters.size() > 0 ) + { + local_table_iters = + std::make_unique(table_iters.size()); + *local_table_iters = table_iters; + tiv_ptr = &(*local_table_iters); + } + } + + flow = FLOW_RETURN; // can be over-written by a Hook-Break + + while ( pc < end_pc && ! ZAM_error ) { + auto& z = insts[pc]; + +#ifdef DEBUG + int profile_pc; + double profile_CPU; + + if ( do_profile ) + { + ++ZOP_count[z.op]; + ++(*inst_count)[pc]; + + profile_pc = pc; + profile_CPU = curr_CPU_time(); + } +#endif + + switch ( z.op ) { + case OP_NOP: + break; + +#include "ZAM-EvalMacros.h" +#include "ZAM-EvalDefs.h" + + default: + reporter->InternalError("bad ZAM opcode"); + } + +#ifdef DEBUG + if ( do_profile ) + { + double dt = curr_CPU_time() - profile_CPU; + (*inst_CPU)[profile_pc] += dt; + ZOP_CPU[z.op] += dt; + } +#endif + + ++pc; + } + + auto result = ret_type ? ret_u->ToVal(ret_type) : nullptr; + + if ( fixed_frame ) + { + // Make sure we don't have any dangling iterators. + for ( auto& ti : table_iters ) + ti.Clear(); + + // Free slots for which we do explicit memory management, + // preparing them for reuse. + for ( auto i = 0U; i < managed_slots.size(); ++i ) + { + auto& v = frame[managed_slots[i]]; + ZVal::DeleteManagedType(v); + v.ClearManagedVal(); + } + } + else + { + // Free those slots for which we do explicit memory management. + // No need to then clear them, as we're about to throw away + // the entire frame. + for ( auto i = 0U; i < managed_slots.size(); ++i ) + { + auto& v = frame[managed_slots[i]]; + ZVal::DeleteManagedType(v); + } + + delete [] frame; + } + + // Clear any error state. + ZAM_error = false; + + return result; + } + +void ZBody::ProfileExecution() const + { + if ( inst_count->size() == 0 ) + { + printf("%s has an empty body\n", func_name); + return; + } + + if ( (*inst_count)[0] == 0 ) + { + printf("%s did not execute\n", func_name); + return; + } + + printf("%s CPU time: %.06f\n", func_name, *CPU_time); + + for ( auto i = 0U; i < inst_count->size(); ++i ) + { + printf("%s %d %d %.06f ", func_name, i, + (*inst_count)[i], (*inst_CPU)[i]); + insts[i].Dump(i, &frame_denizens); + } + } + +bool ZBody::CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, + const Location* loc) const + { + if ( IsAny(expected_type) ) + return true; + + if ( ! same_type(any_type, expected_type, false, false) ) + { + auto at = any_type->Tag(); + auto et = expected_type->Tag(); + + if ( at == TYPE_RECORD && et == TYPE_RECORD ) + { + auto at_r = any_type->AsRecordType(); + auto et_r = expected_type->AsRecordType(); + + if ( record_promotion_compatible(et_r, at_r) ) + return true; + } + + char buf[8192]; + snprintf(buf, sizeof buf, "run-time type clash (%s/%s)", + type_name(at), type_name(et)); + + reporter->RuntimeError(loc, "%s", buf); + return false; + } + + return true; + } + +void ZBody::Dump() const + { + printf("Frame:\n"); + + for ( unsigned i = 0; i < frame_denizens.size(); ++i ) + { + auto& d = frame_denizens[i]; + + printf("frame[%d] =", i); + + if ( d.names.size() > 0 ) + for ( auto& n : d.names ) + printf(" %s", n); + else + for ( auto& id : d.ids ) + printf(" %s", id->Name()); + printf("\n"); + } + + printf("Final code:\n"); + + for ( unsigned i = 0; i < ninst; ++i ) + { + auto& inst = insts[i]; + printf("%d: ", i); + inst.Dump(i, &frame_denizens); + } + } + +void ZBody::StmtDescribe(ODesc* d) const + { + d->AddSP("ZAM-code"); + d->AddSP(func_name); + } + +TraversalCode ZBody::Traverse(TraversalCallback* cb) const + { + TraversalCode tc = cb->PreStmt(this); + HANDLE_TC_STMT_PRE(tc); + + tc = cb->PostStmt(this); + HANDLE_TC_STMT_POST(tc); + } + + +ValPtr ZAMResumption::Exec(Frame* f, StmtFlowType& flow) + { + return am->DoExec(f, xfer_pc, flow); + } + +void ZAMResumption::StmtDescribe(ODesc* d) const + { + d->Add(""); + } + +TraversalCode ZAMResumption::Traverse(TraversalCallback* cb) const + { + TraversalCode tc = cb->PreStmt(this); + HANDLE_TC_STMT_PRE(tc); + + tc = cb->PostStmt(this); + HANDLE_TC_STMT_POST(tc); + } + + +// Unary vector operation of v1 v2. +static void vec_exec(ZOp op, VectorVal*& v1, VectorVal* v2, const ZInst& z) + { + // We could speed this up further still by gen'ing up an instance + // of the loop inside each switch case (in which case we might as + // well move the whole kit-and-caboodle into the Exec method). But + // that seems like a lot of code bloat for only a very modest gain. + + auto& vec2 = *v2->RawVec(); + auto n = vec2.size(); + auto vec1_ptr = new vector>(n); + auto& vec1 = *vec1_ptr; + + for ( auto i = 0U; i < n; ++i ) + switch ( op ) { + +#include "ZAM-Vec1EvalDefs.h" + + default: + reporter->InternalError("bad invocation of VecExec"); + } + + auto vt = cast_intrusive(v2->GetType()); + auto old_v1 = v1; + v1 = new VectorVal(std::move(vt), vec1_ptr); + Unref(old_v1); + } + +// Binary vector operation of v1 = v2 v3. +static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1, + VectorVal* v2, const VectorVal* v3, const ZInst& z) + { + // See comment above re further speed-up. + + auto& vec2 = *v2->RawVec(); + auto& vec3 = *v3->RawVec(); + auto n = vec2.size(); + auto vec1_ptr = new vector>(n); + auto& vec1 = *vec1_ptr; + + for ( auto i = 0U; i < vec2.size(); ++i ) + switch ( op ) { + +#include "ZAM-Vec2EvalDefs.h" + + default: + reporter->InternalError("bad invocation of VecExec"); + } + + auto vt = cast_intrusive(std::move(t)); + auto old_v1 = v1; + v1 = new VectorVal(std::move(vt), vec1_ptr); + Unref(old_v1); + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/ZBody.h b/src/script_opt/ZAM/ZBody.h new file mode 100644 index 0000000000..06561c98bf --- /dev/null +++ b/src/script_opt/ZAM/ZBody.h @@ -0,0 +1,147 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// ZBody: ZAM function body that replaces a function's original AST body. + +#pragma once + +#include "zeek/script_opt/ZAM/IterInfo.h" +#include "zeek/script_opt/ZAM/Support.h" + +namespace zeek::detail { + +// Static information about globals used in a function. +class GlobalInfo { +public: + IDPtr id; + int slot; +}; + + +// These are the counterparts to CaseMapI and CaseMapsI in ZAM.h, +// but concretized to use instruction numbers rather than pointers +// to instructions. +template using CaseMap = std::map; +template using CaseMaps = std::vector>; + +using TableIterVec = std::vector; + +class ZBody : public Stmt { +public: + ZBody(const char* _func_name, const ZAMCompiler* zc); + + ~ZBody() override; + + // These are split out from the constructor to allow construction + // of a ZBody from either save-file full instructions (first method) + // or intermediary instructions (second method). + void SetInsts(std::vector& insts); + void SetInsts(std::vector& instsI); + + ValPtr Exec(Frame* f, StmtFlowType& flow) override; + + // Older code exists for save files, but let's see if we can + // avoid having to support them, as they're a fairly elaborate + // production. + // + // void SaveTo(FILE* f, int interp_frame_size) const; + + void Dump() const; + + void ProfileExecution() const; + +protected: + friend class ZAMResumption; + + // Initializes profiling information, if needed. + void InitProfile(); + + ValPtr DoExec(Frame* f, int start_pc, StmtFlowType& flow); + + // Run-time checking for "any" type being consistent with + // expected typed. Returns true if the type match is okay. + bool CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type, + const Location* loc) const; + + StmtPtr Duplicate() override { return {NewRef{}, this}; } + + void StmtDescribe(ODesc* d) const override; + TraversalCode Traverse(TraversalCallback* cb) const override; + +private: + const char* func_name; + + const ZInst* insts = nullptr; + unsigned int ninst; + + FrameReMap frame_denizens; + int frame_size; + + // A list of frame slots that correspond to managed values. + std::vector managed_slots; + + // This is non-nil if the function is (asserted to be) non-recursive, + // in which case we pre-allocate this. + ZVal* fixed_frame = nullptr; + + // Pre-allocated table iteration values. For recursive invocations, + // these are copied into a local stack variable, but for non-recursive + // functions they can be used directly. + TableIterVec table_iters; + + // Points to the TableIterVec used to manage iteration over tables. + // For non-recursive functions, we just use the static one, but + // for recursive ones this points to the local stack variable. + TableIterVec* tiv_ptr = &table_iters; + + // Number of StepIterInfo's required by the function. These we + // always create using a local stack variable, since they don't + // require any overhead or cleanup. + int num_step_iters; + + std::vector globals; + int num_globals; + + // The following are only maintained if we're doing profiling. + // + // These need to be pointers so we can manipulate them in a + // const method. + std::vector* inst_count = nullptr; // for profiling + double* CPU_time = nullptr; // cumulative CPU time for the program + std::vector* inst_CPU; // per-instruction CPU time. + + CaseMaps int_cases; + CaseMaps uint_cases; + CaseMaps double_cases; + CaseMaps str_cases; +}; + +// This is a statement that resumes execution into a code block in a +// ZBody. Used for deferred execution for "when" statements. + +class ZAMResumption : public Stmt { +public: + ZAMResumption(ZBody* _am, int _xfer_pc) + : Stmt(STMT_ZAM_RESUMPTION) + { + am = _am; + xfer_pc = _xfer_pc; + } + + ValPtr Exec(Frame* f, StmtFlowType& flow) override; + + StmtPtr Duplicate() override { return {NewRef{}, this}; } + + void StmtDescribe(ODesc* d) const override; + +protected: + TraversalCode Traverse(TraversalCallback* cb) const override; + + ZBody* am; + int xfer_pc = 0; +}; + + +// Prints the execution profile. +extern void report_ZOP_profile(); + +} // namespace zeek::detail diff --git a/src/script_opt/ZAM/ZInst.cc b/src/script_opt/ZAM/ZInst.cc new file mode 100644 index 0000000000..77ef429c22 --- /dev/null +++ b/src/script_opt/ZAM/ZInst.cc @@ -0,0 +1,615 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/Desc.h" +#include "zeek/Reporter.h" +#include "zeek/Func.h" +#include "zeek/script_opt/ZAM/ZInst.h" + +using std::string; + +namespace zeek::detail { + +void ZInst::Dump(int inst_num, const FrameReMap* mappings) const + { + // printf("v%d ", n); + + auto id1 = VName(1, inst_num, mappings); + auto id2 = VName(2, inst_num, mappings); + auto id3 = VName(3, inst_num, mappings); + auto id4 = VName(4, inst_num, mappings); + + Dump(id1, id2, id3, id4); + } + +void ZInst::Dump(const string& id1, const string& id2, const string& id3, + const string& id4) const + { + printf("%s ", ZOP_name(op)); + // printf("(%s) ", op_type_name(op_type)); + if ( t && 0 ) + printf("(%s) ", type_name(t->Tag())); + + switch ( op_type ) { + case OP_X: + break; + + case OP_V: + printf("%s", id1.c_str()); + break; + + case OP_VV: + printf("%s, %s", id1.c_str(), id2.c_str()); + break; + + case OP_VVV: + printf("%s, %s, %s", id1.c_str(), id2.c_str(), id3.c_str()); + break; + + case OP_VVVV: + printf("%s, %s, %s, %s", id1.c_str(), id2.c_str(), id3.c_str(), + id4.c_str()); + break; + + case OP_VVVC: + printf("%s, %s, %s, %s", id1.c_str(), id2.c_str(), id3.c_str(), + ConstDump().c_str()); + break; + + case OP_C: + printf("%s", ConstDump().c_str()); + break; + + case OP_VC: + printf("%s, %s", id1.c_str(), ConstDump().c_str()); + break; + + case OP_VVC: + printf("%s, %s, %s", id1.c_str(), id2.c_str(), + ConstDump().c_str()); + break; + + case OP_V_I1: + printf("%d", v1); + break; + + case OP_VC_I1: + printf("%d %s", v1, ConstDump().c_str()); + break; + + case OP_VV_FRAME: + printf("%s, interpreter frame[%d]", id1.c_str(), v2); + break; + + case OP_VV_I2: + printf("%s, %d", id1.c_str(), v2); + break; + + case OP_VV_I1_I2: + printf("%d, %d", v1, v2); + break; + + case OP_VVC_I2: + printf("%s, %d, %s", id1.c_str(), v2, ConstDump().c_str()); + break; + + case OP_VVV_I3: + printf("%s, %s, %d", id1.c_str(), id2.c_str(), v3); + break; + + case OP_VVV_I2_I3: + printf("%s, %d, %d", id1.c_str(), v2, v3); + break; + + case OP_VVVV_I4: + printf("%s, %s, %s, %d", id1.c_str(), id2.c_str(), id3.c_str(), + v4); + break; + + case OP_VVVV_I3_I4: + printf("%s, %s, %d, %d", id1.c_str(), id2.c_str(), v3, v4); + break; + + case OP_VVVV_I2_I3_I4: + printf("%s, %d, %d, %d", id1.c_str(), v2, v3, v4); + break; + + case OP_VVVC_I3: + printf("%s, %s, %d, %s", id1.c_str(), id2.c_str(), v3, + ConstDump().c_str()); + break; + + case OP_VVVC_I2_I3: + printf("%s, %d, %d, %s", id1.c_str(), v2, v3, + ConstDump().c_str()); + break; + + case OP_VVVC_I1_I2_I3: + printf("%d, %d, %d, %s", v1, v2, v3, ConstDump().c_str()); + break; + } + + if ( func ) + printf(" (func %s)", func->Name()); + + printf("\n"); + } + +int ZInst::NumFrameSlots() const + { + switch ( op_type ) { + case OP_X: return 0; + case OP_V: return 1; + case OP_VV: return 2; + case OP_VVV: return 3; + case OP_VVVV: return 4; + case OP_VVVC: return 3; + case OP_C: return 0; + case OP_VC: return 1; + case OP_VVC: return 2; + + case OP_V_I1: return 0; + case OP_VC_I1: return 0; + case OP_VV_I1_I2: return 0; + case OP_VV_FRAME: return 1; + case OP_VV_I2: return 1; + case OP_VVC_I2: return 1; + case OP_VVV_I3: return 2; + case OP_VVV_I2_I3: return 1; + + case OP_VVVV_I4: return 3; + case OP_VVVV_I3_I4: return 2; + case OP_VVVV_I2_I3_I4: return 1; + case OP_VVVC_I3: return 2; + case OP_VVVC_I2_I3: return 1; + case OP_VVVC_I1_I2_I3: return 0; + } + } + +int ZInst::NumSlots() const + { + switch ( op_type ) { + case OP_X: return 0; + case OP_C: return 0; + case OP_V: return 1; + case OP_VC: return 1; + case OP_VV: return 2; + case OP_VVC: return 2; + case OP_VVV: return 3; + case OP_VVVC: return 3; + case OP_VVVV: return 4; + + case OP_V_I1: return 1; + case OP_VC_I1: return 1; + + case OP_VV_I1_I2: return 2; + case OP_VV_FRAME: return 2; + case OP_VV_I2: return 2; + case OP_VVC_I2: return 2; + + case OP_VVV_I3: return 3; + case OP_VVV_I2_I3: return 3; + case OP_VVVC_I3: return 3; + case OP_VVVC_I2_I3: return 3; + case OP_VVVC_I1_I2_I3: return 3; + + case OP_VVVV_I4: return 4; + case OP_VVVV_I3_I4: return 4; + case OP_VVVV_I2_I3_I4: return 4; + } + } + +string ZInst::VName(int n, int inst_num, const FrameReMap* mappings) const + { + if ( n > NumFrameSlots() ) + return ""; + + int slot = n == 1 ? v1 : (n == 2 ? v2 : (n == 3 ? v3 : v4)); + + if ( slot < 0 ) + return ""; + + // Find which identifier manifests at this instruction. + ASSERT(slot >= 0 && slot < mappings->size()); + + auto& map = (*mappings)[slot]; + + unsigned int i; + for ( i = 0; i < map.id_start.size(); ++i ) + { + // If the slot is right at the boundary between two + // identifiers, then it matters whether this is slot 1 + // (starts right here) vs. slot > 1 (ignore change right + // at the boundary and stick with older value). + if ( (n == 1 && map.id_start[i] > inst_num) || + (n > 1 && map.id_start[i] >= inst_num) ) + // Went too far. + break; + } + + if ( i < map.id_start.size() ) + { + ASSERT(i > 0); + } + + auto id = map.names.size() > 0 ? map.names[i-1] : map.ids[i-1]->Name(); + + return util::fmt("%d (%s)", slot, id); + } + +ValPtr ZInst::ConstVal() const + { + switch ( op_type ) { + case OP_C: + case OP_VC: + case OP_VC_I1: + case OP_VVC: + case OP_VVC_I2: + case OP_VVVC: + case OP_VVVC_I3: + case OP_VVVC_I2_I3: + case OP_VVVC_I1_I2_I3: + return c.ToVal(t); + + case OP_X: + case OP_V: + case OP_VV: + case OP_VVV: + case OP_VVVV: + case OP_V_I1: + case OP_VV_FRAME: + case OP_VV_I2: + case OP_VV_I1_I2: + case OP_VVV_I3: + case OP_VVV_I2_I3: + case OP_VVVV_I4: + case OP_VVVV_I3_I4: + case OP_VVVV_I2_I3_I4: + return nullptr; + } + } + +string ZInst::ConstDump() const + { + auto v = ConstVal(); + + ODesc d; + + d.Clear(); + v->Describe(&d); + + return d.Description(); + } + + +void ZInstI::Dump(const FrameMap* frame_ids, const FrameReMap* remappings) const + { + int n = NumFrameSlots(); + // printf("v%d ", n); + + auto id1 = VName(1, frame_ids, remappings); + auto id2 = VName(2, frame_ids, remappings); + auto id3 = VName(3, frame_ids, remappings); + auto id4 = VName(4, frame_ids, remappings); + + ZInst::Dump(id1, id2, id3, id4); + } + +string ZInstI::VName(int n, const FrameMap* frame_ids, + const FrameReMap* remappings) const + { + if ( n > NumFrameSlots() ) + return ""; + + int slot = n == 1 ? v1 : (n == 2 ? v2 : (n == 3 ? v3 : v4)); + + if ( slot < 0 ) + return ""; + + const ID* id; + + if ( remappings && live ) + { // Find which identifier manifests at this instruction. + ASSERT(slot >= 0 && slot < remappings->size()); + + auto& map = (*remappings)[slot]; + + unsigned int i; + for ( i = 0; i < map.id_start.size(); ++i ) + { + // See discussion for ZInst::VName. + if ( (n == 1 && map.id_start[i] > inst_num) || + (n > 1 && map.id_start[i] >= inst_num) ) + // Went too far. + break; + } + + if ( i < map.id_start.size() ) + { + ASSERT(i > 0); + } + + // For ZInstI's, map.ids is always populated. + id = map.ids[i-1]; + } + + else + id = (*frame_ids)[slot]; + + return util::fmt("%d (%s)", slot, id->Name()); + } + +bool ZInstI::DoesNotContinue() const + { + switch ( op ) { + case OP_GOTO_V: + case OP_HOOK_BREAK_X: + case OP_RETURN_C: + case OP_RETURN_V: + case OP_RETURN_X: + return true; + + default: + return false; + } + } + +bool ZInstI::IsDirectAssignment() const + { + if ( op_type != OP_VV ) + return false; + + switch ( op ) { + case OP_ASSIGN_VV_N: + case OP_ASSIGN_VV_A: + case OP_ASSIGN_VV_O: + case OP_ASSIGN_VV_P: + case OP_ASSIGN_VV_R: + case OP_ASSIGN_VV_S: + case OP_ASSIGN_VV_F: + case OP_ASSIGN_VV_T: + case OP_ASSIGN_VV_V: + case OP_ASSIGN_VV_L: + case OP_ASSIGN_VV_f: + case OP_ASSIGN_VV_t: + case OP_ASSIGN_VV: + return true; + + default: + return false; + } + } + +bool ZInstI::HasSideEffects() const + { + return op_side_effects[op]; + } + +bool ZInstI::AssignsToSlot1() const + { + switch ( op_type ) { + case OP_X: + case OP_C: + case OP_V_I1: + case OP_VC_I1: + case OP_VV_I1_I2: + case OP_VVVC_I1_I2_I3: + return false; + + // We use this ginormous set of cases rather than "default" so + // that when we add a new operand type, we have to consider + // its behavior here. (Same for many of the other switch's + // used for ZInst/ZinstI.) + case OP_V: + case OP_VC: + case OP_VV_FRAME: + case OP_VV_I2: + case OP_VVC_I2: + case OP_VVV_I2_I3: + case OP_VVVC_I2_I3: + case OP_VVVV_I2_I3_I4: + case OP_VV: + case OP_VVC: + case OP_VVV_I3: + case OP_VVVV_I3_I4: + case OP_VVVC_I3: + case OP_VVV: + case OP_VVVC: + case OP_VVVV_I4: + case OP_VVVV: + auto fl = op1_flavor[op]; + return fl == OP1_WRITE || fl == OP1_READ_WRITE; + } + } + +bool ZInstI::UsesSlot(int slot) const + { + auto fl = op1_flavor[op]; + auto v1_relevant = fl == OP1_READ || fl == OP1_READ_WRITE; + auto v1_match = v1_relevant && v1 == slot; + + switch ( op_type ) { + case OP_X: + case OP_C: + case OP_V_I1: + case OP_VC_I1: + case OP_VV_I1_I2: + case OP_VVVC_I1_I2_I3: + return false; + + case OP_V: + case OP_VC: + case OP_VV_FRAME: + case OP_VV_I2: + case OP_VVC_I2: + case OP_VVV_I2_I3: + case OP_VVVC_I2_I3: + case OP_VVVV_I2_I3_I4: + return v1_match; + + case OP_VV: + case OP_VVC: + case OP_VVV_I3: + case OP_VVVV_I3_I4: + case OP_VVVC_I3: + return v1_match || v2 == slot; + + case OP_VVV: + case OP_VVVC: + case OP_VVVV_I4: + return v1_match || v2 == slot || v3 == slot; + + case OP_VVVV: + return v1_match || v2 == slot || v3 == slot || v4 == slot; + } + } + +bool ZInstI::UsesSlots(int& s1, int& s2, int& s3, int& s4) const + { + s1 = s2 = s3 = s4 = -1; + + auto fl = op1_flavor[op]; + auto v1_relevant = fl == OP1_READ || fl == OP1_READ_WRITE; + + switch ( op_type ) { + case OP_X: + case OP_C: + case OP_V_I1: + case OP_VC_I1: + case OP_VV_I1_I2: + case OP_VVVC_I1_I2_I3: + return false; + + case OP_V: + case OP_VC: + case OP_VV_FRAME: + case OP_VV_I2: + case OP_VVC_I2: + case OP_VVV_I2_I3: + case OP_VVVC_I2_I3: + case OP_VVVV_I2_I3_I4: + if ( ! v1_relevant ) + return false; + + s1 = v1; + return true; + + case OP_VV: + case OP_VVC: + case OP_VVV_I3: + case OP_VVVV_I3_I4: + case OP_VVVC_I3: + s1 = v2; + + if ( v1_relevant ) + s2 = v1; + + return true; + + case OP_VVV: + case OP_VVVC: + case OP_VVVV_I4: + s1 = v2; + s2 = v3; + + if ( v1_relevant ) + s3 = v1; + + return true; + + case OP_VVVV: + s1 = v2; + s2 = v3; + s3 = v4; + + if ( v1_relevant ) + s4 = v1; + + return true; + } + } + +void ZInstI::UpdateSlots(std::vector& slot_mapping) + { + switch ( op_type ) { + case OP_X: + case OP_C: + case OP_V_I1: + case OP_VC_I1: + case OP_VV_I1_I2: + case OP_VVVC_I1_I2_I3: + return; // so we don't do any v1 remapping. + + case OP_V: + case OP_VC: + case OP_VV_FRAME: + case OP_VV_I2: + case OP_VVC_I2: + case OP_VVV_I2_I3: + case OP_VVVC_I2_I3: + case OP_VVVV_I2_I3_I4: + break; + + case OP_VV: + case OP_VVC: + case OP_VVV_I3: + case OP_VVVV_I3_I4: + case OP_VVVC_I3: + v2 = slot_mapping[v2]; + break; + + case OP_VVV: + case OP_VVVC: + case OP_VVVV_I4: + v2 = slot_mapping[v2]; + v3 = slot_mapping[v3]; + break; + + case OP_VVVV: + v2 = slot_mapping[v2]; + v3 = slot_mapping[v3]; + v4 = slot_mapping[v4]; + break; + } + + // Note, unlike for UsesSlots() we do *not* include OP1_READ_WRITE + // here, because such instructions will already have v1 remapped + // given it's an assignment target. + if ( op1_flavor[op] == OP1_READ && v1 >= 0 ) + v1 = slot_mapping[v1]; + } + +bool ZInstI::IsGlobalLoad() const + { + if ( op == OP_LOAD_GLOBAL_TYPE_VV ) + // These don't have flavors. + return true; + + static std::unordered_set global_ops; + + if ( global_ops.size() == 0 ) + { // Initialize the set. + for ( int t = 0; t < NUM_TYPES; ++t ) + { + TypeTag tag = TypeTag(t); + ZOp global_op_flavor = + AssignmentFlavor(OP_LOAD_GLOBAL_VV, tag, false); + + if ( global_op_flavor != OP_NOP ) + global_ops.insert(global_op_flavor); + } + } + + return global_ops.count(op) > 0; + } + +void ZInstI::InitConst(const ConstExpr* ce) + { + auto v = ce->ValuePtr(); + t = ce->GetType(); + c = ZVal(v, t); + + if ( ZAM_error ) + reporter->InternalError("bad value compiling code"); + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/ZInst.h b/src/script_opt/ZAM/ZInst.h new file mode 100644 index 0000000000..19733666fd --- /dev/null +++ b/src/script_opt/ZAM/ZInst.h @@ -0,0 +1,469 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Operators and instructions used in ZAM execution. + +#pragma once + +#include "zeek/script_opt/ZAM/Support.h" +#include "zeek/script_opt/ZAM/ZOp.h" + +namespace zeek::detail { + +class Expr; +class ConstExpr; +class Attributes; +class Stmt; + +using AttributesPtr = IntrusivePtr; + +// Maps ZAM frame slots to associated identifiers. +typedef std::vector FrameMap; + +// Maps ZAM frame slots to information for sharing the slot across +// multiple script variables. +class FrameSharingInfo { +public: + // The variables sharing the slot. ID's need to be non-const so we + // can manipulate them, for example by changing their interpreter + // frame offset. + std::vector ids; + + // A parallel vector, only used for fully compiled code, which + // gives the names of the identifiers. When in use, the above + // "ids" member variable may be empty. + std::vector names; + + // The ZAM instruction number where a given identifier starts its + // scope, parallel to "ids". + std::vector id_start; + + // The current end of the frame slot's scope. Gets updated as + // new IDs are added to share the slot. + int scope_end; + + // Whether this is a managed slot. + bool is_managed; +}; + +typedef std::vector FrameReMap; + +class ZInstAux; + +// A ZAM instruction. This base class has all the information for +// execution, but omits information and methods only necessary for +// compiling. +class ZInst { +public: + ZInst(ZOp _op, ZAMOpType _op_type) + { + op = _op; + op_type = _op_type; + } + + // Create a stub instruction that will be populated later. + ZInst() { } + + virtual ~ZInst() { } + + // Methods for printing out the instruction for debugging/maintenance. + void Dump(int inst_num, const FrameReMap* mappings) const; + void Dump(const std::string& id1, const std::string& id2, + const std::string& id3, const std::string& id4) const; + + // Returns the name to use in identifying one of the slots/integer + // values (designated by "n"). "inst_num" identifes the instruction + // by its number within a larger set. "mappings" provides the + // mappings used to translate raw slots to the corresponding + // script variable(s). + std::string VName(int n, int inst_num, + const FrameReMap* mappings) const; + + // Number of slots that refer to a frame element. These always + // come first, if we use additional slots. + int NumFrameSlots() const; + + // Total number of slots in use. >= NumFrameSlots() + int NumSlots() const; + + // Returns nil if this instruction doesn't have an associated constant. + ValPtr ConstVal() const; + + // Returns a string describing the constant. + std::string ConstDump() const; + + ZOp op; + ZAMOpType op_type; + + // Usually indices into frame, though sometimes hold integer constants. + // When an instruction has both frame slots and integer constants, + // the former always come first, even if conceptually in the operation + // the constant is an "earlier" operand. + int v1, v2, v3, v4; + + ZVal c; // constant associated with instruction, if any + + // Meta-data associated with the execution. + + // Type, usually for interpreting the constant. + TypePtr t = nullptr; + TypePtr t2 = nullptr; // just a few ops need two types + const Expr* e = nullptr; // only needed for "when" expressions + Func* func = nullptr; // used for calls + EventHandler* event_handler = nullptr; // used for referring to events + AttributesPtr attrs = nullptr; // used for things like constructors + + // Auxiliary information. We could in principle use this to + // consolidate a bunch of the above, though at the cost of + // slightly slower access. Most instructions don't need "aux", + // which is why we bundle these separately. + ZInstAux* aux = nullptr; + + // Location associated with this instruction, for error reporting. + const Location* loc = nullptr; + + // Whether v1 represents a frame slot type for which we + // explicitly manage the memory. + bool is_managed = false; +}; + +// A intermediary ZAM instruction, one that includes information/methods +// needed for compiling. Intermediate instructions use pointers to other +// such instructions for branches, rather than concrete instruction +// numbers. This allows the AM optimizer to easily prune instructions. +class ZInstI : public ZInst { +public: + // These constructors can be used directly, but often instead + // they'll be generated via the use of Inst-Gen methods. + ZInstI(ZOp _op) : ZInst(_op, OP_X) + { + op = _op; + op_type = OP_X; + } + + ZInstI(ZOp _op, int _v1) : ZInst(_op, OP_V) + { + v1 = _v1; + } + + ZInstI(ZOp _op, int _v1, int _v2) : ZInst(_op, OP_VV) + { + v1 = _v1; + v2 = _v2; + } + + ZInstI(ZOp _op, int _v1, int _v2, int _v3) : ZInst(_op, OP_VVV) + { + v1 = _v1; + v2 = _v2; + v3 = _v3; + } + + ZInstI(ZOp _op, int _v1, int _v2, int _v3, int _v4) + : ZInst(_op, OP_VVVV) + { + v1 = _v1; + v2 = _v2; + v3 = _v3; + v4 = _v4; + } + + ZInstI(ZOp _op, const ConstExpr* ce) : ZInst(_op, OP_C) + { + InitConst(ce); + } + + ZInstI(ZOp _op, int _v1, const ConstExpr* ce) : ZInst(_op, OP_VC) + { + v1 = _v1; + InitConst(ce); + } + + ZInstI(ZOp _op, int _v1, int _v2, const ConstExpr* ce) + : ZInst(_op, OP_VVC) + { + v1 = _v1; + v2 = _v2; + InitConst(ce); + } + + ZInstI(ZOp _op, int _v1, int _v2, int _v3, const ConstExpr* ce) + : ZInst(_op, OP_VVVC) + { + v1 = _v1; + v2 = _v2; + v3 = _v3; + InitConst(ce); + } + + // Constructor used when we're going to just copy in another ZInstI. + ZInstI() { } + + // If "remappings" is non-nil, then it is used instead of frame_ids. + void Dump(const FrameMap* frame_ids, const FrameReMap* remappings) const; + + // Note that this is *not* an override of the base class's VName + // but instead a method with similar functionality but somewhat + // different behavior (namely, being cognizant of frame_ids). + std::string VName(int n, const FrameMap* frame_ids, + const FrameReMap* remappings) const; + + // True if this instruction definitely won't proceed to the one + // after it. + bool DoesNotContinue() const; + + // True if this instruction always branches elsewhere. Different + // from DoesNotContinue() in that returns & hook breaks do not + // continue, but they are not branches. + bool IsUnconditionalBranch() const { return op == OP_GOTO_V; } + + // True if this instruction is of the form "v1 = v2". + bool IsDirectAssignment() const; + + // True if this instruction has side effects when executed, so + // should not be pruned even if it has a dead assignment. + bool HasSideEffects() const; + + // True if the given instruction assigns to the frame location + // given by slot 1 (v1). + bool AssignsToSlot1() const; + + // True if the given instruction uses the value in the given frame + // slot. (Assigning to the slot does not constitute using the value.) + bool UsesSlot(int slot) const; + + // Returns the slots used (not assigned to). Any slot not used + // is set to -1. Returns true if at least one slot was used. + bool UsesSlots(int& s1, int& s2, int& s3, int& s4) const; + + // Updates used (not assigned) slots per the given mapping. + void UpdateSlots(std::vector& slot_mapping); + + // True if the instruction corresponds to loading a global into + // the ZAM frame. + bool IsGlobalLoad() const; + + // True if the instruction corresponds to some sort of load, + // either from the interpreter frame or of a global. + bool IsLoad() const + { + return op_type == OP_VV_FRAME || IsGlobalLoad(); + } + + // True if the instruction corresponds to storing a global. + bool IsGlobalStore() const + { + return op == OP_STORE_GLOBAL_V; + } + + void CheckIfManaged(const TypePtr& t) + { if ( ZVal::IsManagedType(t) ) is_managed = true; } + + void SetType(TypePtr _t) + { + t = std::move(_t); + if ( t ) + CheckIfManaged(t); + } + + // Whether the instruction should be included in final code + // generation. + bool live = true; + + // Whether the instruction is the beginning of a loop, meaning + // it's the target of backward control flow. + bool loop_start = false; + + // How deep the instruction is within loop bodies (for all + // instructions in a loop, not just their beginnings). For + // example, a value of 2 means the instruction is inside a + // loop that itself is inside one more loop. + int loop_depth = 0; + + // Branch target, prior to concretizing into PC target. + ZInstI* target = nullptr; + int target_slot = 0; // which of v1/v2/v3 should hold the target + + // The final PC location of the statement. -1 indicates not + // yet assigned. + int inst_num = -1; + + // Number of associated label(s) (indicating the statement is + // a branch target). + int num_labels = 0; + + // Used for debugging. Transformed into the ZInst "loc" field. + const Stmt* stmt = curr_stmt; + +private: + // Initialize 'c' from the given ConstExpr. + void InitConst(const ConstExpr* ce); +}; + +// Auxiliary information, used when the fixed ZInst layout lacks +// sufficient expressiveness to represent all of the elements that +// an instruction needs. +class ZInstAux { +public: + // if n is positive then it gives the size of parallel arrays + // tracking slots, constants, and types. + ZInstAux(int _n) + { + n = _n; + if ( n > 0 ) + { + slots = ints = new int[n]; + constants = new ValPtr[n]; + types = new TypePtr[n]; + } + } + + ~ZInstAux() + { + delete [] ints; + delete [] constants; + delete [] types; + } + + // Returns the i'th element of the parallel arrays as a ValPtr. + ValPtr ToVal(const ZVal* frame, int i) const + { + if ( constants[i] ) + return constants[i]; + else + return frame[slots[i]].ToVal(types[i]); + } + + // Returns the parallel arrays as a ListValPtr. + ListValPtr ToListVal(const ZVal* frame) const + { + auto lv = make_intrusive(TYPE_ANY); + for ( auto i = 0; i < n; ++i ) + lv->Append(ToVal(frame, i)); + + return lv; + } + + // Converts the parallel arrays to a ListValPtr suitable for + // use as indices for indexing a table or set. "offset" specifies + // which index we're looking for (there can be a bunch for + // constructors), and "width" the number of elements in a single + // index. + ListValPtr ToIndices(const ZVal* frame, int offset, int width) const + { + auto lv = make_intrusive(TYPE_ANY); + for ( auto i = 0; i < 0 + width; ++i ) + lv->Append(ToVal(frame, offset + i)); + + return lv; + } + + // Returns the parallel arrays converted to a vector of ValPtr's. + const val_vec& ToValVec(const ZVal* frame) + { + vv.clear(); + FillValVec(vv, frame); + return vv; + } + + // Populates the given vector of ValPtr's with the conversion + // of the parallel arrays. + void FillValVec(val_vec& vec, const ZVal* frame) const + { + for ( auto i = 0; i < n; ++i ) + vec.push_back(ToVal(frame, i)); + } + + // When building up a ZInstAux, sets one element of the parallel + // arrays to a given frame slot and type. + void Add(int i, int slot, TypePtr t) + { + ints[i] = slot; + constants[i] = nullptr; + types[i] = t; + } + + // Same but for constants. + void Add(int i, ValPtr c) + { + ints[i] = -1; + constants[i] = c; + types[i] = nullptr; + } + + + // Member variables. We could add accessors for manipulating + // these (and make the variables private), but for convenience we + // make them directly available. + + // These are parallel arrays, used to build up lists of values. + // Each element is either an integer or a constant. Usually the + // integer is a frame slot (in which case "slots" points to "ints"; + // if not, it's nil). + // + // We track associated types, too, enabling us to use + // ZVal::ToVal to convert frame slots or constants to ValPtr's. + + int n; // size of arrays + int* slots = nullptr; // either nil or points to ints + int* ints = nullptr; + ValPtr* constants = nullptr; + TypePtr* types = nullptr; + + // Used for accessing function names. + ID* id_val = nullptr; + + // Whether the instruction can lead to globals changing. + // Currently only needed by the optimizer, but convenient + // to store here. + bool can_change_globals = false; + + // The following is only used for OP_CONSTRUCT_KNOWN_RECORD_V, + // to map elements in slots/constants/types to record field offsets. + std::vector map; + + ///// The following three apply to looping over the elements of tables. + + // Frame slots of iteration variables, such as "[v1, v2, v3] in aggr". + std::vector loop_vars; + + // Their types. + std::vector loop_var_types; + + // Type associated with the "value" entry, for "k, value in aggr" + // iteration. + TypePtr value_var_type; + + + // This is only used to return values stored elsewhere in this + // object - it's not set directly. + // + // If we cared about memory penny-pinching, we could make this + // a pointer and only instantiate as needed. + val_vec vv; +}; + +// Returns a human-readable version of the given ZAM op-code. +extern const char* ZOP_name(ZOp op); + +// Maps a generic operation to a specific one associated with the given type. +// The third argument governs what to do if the given type has no assignment +// flavor. If true, this leads to an assertion failure. If false, and +// if there's no flavor for the type, then OP_NOP is returned. +extern ZOp AssignmentFlavor(ZOp orig, TypeTag tag, bool strict=true); + + +// The following all use initializations produced by Gen-ZAM. + +// Maps first operands, and then type tags, to operands. +extern std::unordered_map> assignment_flavor; + +// Maps flavorful assignments to their non-assignment counterpart. +// Used for optimization when we determine that the assigned-to +// value is superfluous. +extern std::unordered_map assignmentless_op; + +// Maps flavorful assignments to what op-type their non-assignment +// counterpart uses. +extern std::unordered_map assignmentless_op_type; + +} // namespace zeek::detail diff --git a/src/script_opt/ZAM/ZOp.cc b/src/script_opt/ZAM/ZOp.cc new file mode 100644 index 0000000000..c3b7244098 --- /dev/null +++ b/src/script_opt/ZAM/ZOp.cc @@ -0,0 +1,116 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/ZAM/Support.h" +#include "zeek/script_opt/ZAM/ZOp.h" + + +namespace zeek::detail { + +const char* ZOP_name(ZOp op) + { + switch ( op ) { +#include "zeek/ZAM-OpsNamesDefs.h" + case OP_NOP: return "nop"; + } + } + +static const char* op_type_name(ZAMOpType ot) + { + switch ( ot ) { + case OP_X: return "X"; + case OP_C: return "C"; + case OP_V: return "V"; + case OP_V_I1: return "V_I1"; + case OP_VC_I1: return "VC_I1"; + case OP_VC: return "VC"; + case OP_VV: return "VV"; + case OP_VV_I2: return "VV_I2"; + case OP_VV_I1_I2: return "VV_I1_I2"; + case OP_VV_FRAME: return "VV_FRAME"; + case OP_VVC: return "VVC"; + case OP_VVC_I2: return "VVC_I2"; + case OP_VVV: return "VVV"; + case OP_VVV_I3: return "VVV_I3"; + case OP_VVV_I2_I3: return "VVV_I2_I3"; + case OP_VVVC: return "VVVC"; + case OP_VVVC_I3: return "VVVC_I3"; + case OP_VVVC_I2_I3: return "VVVC_I2_I3"; + case OP_VVVC_I1_I2_I3: return "VVVC_I1_I2_I3"; + case OP_VVVV: return "VVVV"; + case OP_VVVV_I4: return "VVVV_I4"; + case OP_VVVV_I3_I4: return "VVVV_I3_I4"; + case OP_VVVV_I2_I3_I4: return "VVVV_I2_I3_I4"; + } + } + + +ZAMOp1Flavor op1_flavor[] = { +#include "zeek/ZAM-Op1FlavorsDefs.h" + OP1_INTERNAL, // OP_NOP +}; + +bool op_side_effects[] = { +#include "zeek/ZAM-OpSideEffects.h" + false, // OP_NOP +}; + + +std::unordered_map> assignment_flavor; +std::unordered_map assignmentless_op; +std::unordered_map assignmentless_op_type; + +ZOp AssignmentFlavor(ZOp orig, TypeTag tag, bool strict) + { + static bool did_init = false; + + if ( ! did_init ) + { + std::unordered_map empty_map; + +#include "zeek/ZAM-AssignFlavorsDefs.h" + + did_init = true; + } + + // Map type tag to equivalent, as needed. + switch ( tag ) { + case TYPE_BOOL: + case TYPE_ENUM: + tag = TYPE_INT; + break; + + case TYPE_PORT: + tag = TYPE_COUNT; + break; + + case TYPE_TIME: + case TYPE_INTERVAL: + tag = TYPE_DOUBLE; + break; + + default: + break; + } + + if ( assignment_flavor.count(orig) == 0 ) + { + if ( strict ) + ASSERT(false); + else + return OP_NOP; + } + + auto orig_map = assignment_flavor[orig]; + + if ( orig_map.count(tag) == 0 ) + { + if ( strict ) + ASSERT(false); + else + return OP_NOP; + } + + return orig_map[tag]; + } + +} // zeek::detail diff --git a/src/script_opt/ZAM/ZOp.h b/src/script_opt/ZAM/ZOp.h new file mode 100644 index 0000000000..e0110e129b --- /dev/null +++ b/src/script_opt/ZAM/ZOp.h @@ -0,0 +1,65 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// ZAM instruction opcodes and associated information. + +#pragma once + +namespace zeek::detail { + +// Opcodes associated with ZAM instructions. +typedef enum { +#include "zeek/ZAM-OpsDefs.h" + OP_NOP, +} ZOp; + + +// Possible types of instruction operands in terms of which fields they use. +// Used for low-level optimization (so important that they're correct), +// and for dumping instructions. + +// V: one of the instruction's integer values, treated as a frame slot +// C: the instruction's associated constant +// I1/I2/I3/I4: the instruction's integer value, used directly (not as a slot) +// FRAME: a slot in the (intrepreter) Frame object +// X: no operands +typedef enum { + OP_X, OP_C, OP_V, OP_V_I1, OP_VC_I1, + + OP_VC, + OP_VV, + OP_VV_I2, + OP_VV_I1_I2, + OP_VV_FRAME, + + OP_VVC, + OP_VVC_I2, + OP_VVV, + OP_VVV_I3, + OP_VVV_I2_I3, + + OP_VVVC, + OP_VVVC_I3, + OP_VVVC_I2_I3, + OP_VVVC_I1_I2_I3, + OP_VVVV, + OP_VVVV_I4, + OP_VVVV_I3_I4, + OP_VVVV_I2_I3_I4, + +} ZAMOpType; + +// Possible "flavors" for an operator's first slot. +typedef enum { + OP1_READ, // the slot is read, not modified + OP1_WRITE, // the slot is modified, not read - the most common + OP1_READ_WRITE, // the slot is both read and then modified, e.g. "++" + OP1_INTERNAL, // we're doing some internal manipulation of the slot +} ZAMOp1Flavor; + +// Maps an operand to its flavor. +extern ZAMOp1Flavor op1_flavor[]; + +// Maps an operand to whether it has side effects. +extern bool op_side_effects[]; + +} // namespace zeek::detail