From c1f240295f63c077b5b062353e73657317e6fe90 Mon Sep 17 00:00:00 2001 From: Christian Kreibich Date: Tue, 15 Mar 2022 21:34:17 -0700 Subject: [PATCH] gen-zam: Move Gen-ZAM sources to src subdirectory --- tools/gen-zam/src/Gen-ZAM.cc | 2223 ++++++++++++++++++++++++++++++++++ tools/gen-zam/src/Gen-ZAM.h | 986 +++++++++++++++ 2 files changed, 3209 insertions(+) create mode 100644 tools/gen-zam/src/Gen-ZAM.cc create mode 100644 tools/gen-zam/src/Gen-ZAM.h diff --git a/tools/gen-zam/src/Gen-ZAM.cc b/tools/gen-zam/src/Gen-ZAM.cc new file mode 100644 index 0000000000..92948b4d84 --- /dev/null +++ b/tools/gen-zam/src/Gen-ZAM.cc @@ -0,0 +1,2223 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/ZAM/Gen-ZAM.h" + +#include +#include + +using namespace std; + +// Helper functions to convert dashes to underscores or vice versa. +static char dash_to_under(char c) + { + return c == '-' ? '_' : c; + } + +static char under_to_dash(char c) + { + return c == '_' ? '-' : c; + } + +// Structure for binding together Zeek script types, internal names Gen-ZAM +// uses to track them, mnemonics for referring to them in instruction names, +// the corresponding Val accessor, and whether the type requires memory +// management. +struct TypeInfo + { + string tag; + ZAM_ExprType et; + string suffix; + string accessor; // doesn't include "As" prefix or "()" suffix + bool is_managed; + }; + +static vector ZAM_type_info = { + {"TYPE_ADDR", ZAM_EXPR_TYPE_ADDR, "A", "Addr", true}, + {"TYPE_ANY", ZAM_EXPR_TYPE_ANY, "a", "Any", true}, + {"TYPE_COUNT", ZAM_EXPR_TYPE_UINT, "U", "Count", false}, + {"TYPE_DOUBLE", ZAM_EXPR_TYPE_DOUBLE, "D", "Double", false}, + {"TYPE_FILE", ZAM_EXPR_TYPE_FILE, "f", "File", true}, + {"TYPE_FUNC", ZAM_EXPR_TYPE_FUNC, "F", "Func", true}, + {"TYPE_INT", ZAM_EXPR_TYPE_INT, "I", "Int", false}, + {"TYPE_LIST", ZAM_EXPR_TYPE_LIST, "L", "List", true}, + {"TYPE_OPAQUE", ZAM_EXPR_TYPE_OPAQUE, "O", "Opaque", true}, + {"TYPE_PATTERN", ZAM_EXPR_TYPE_PATTERN, "P", "Pattern", true}, + {"TYPE_RECORD", ZAM_EXPR_TYPE_RECORD, "R", "Record", true}, + {"TYPE_STRING", ZAM_EXPR_TYPE_STRING, "S", "String", true}, + {"TYPE_SUBNET", ZAM_EXPR_TYPE_SUBNET, "N", "SubNet", true}, + {"TYPE_TABLE", ZAM_EXPR_TYPE_TABLE, "T", "Table", true}, + {"TYPE_TYPE", ZAM_EXPR_TYPE_TYPE, "t", "Type", true}, + {"TYPE_VECTOR", ZAM_EXPR_TYPE_VECTOR, "V", "Vector", true}, +}; + +// Given a ZAM_ExprType, returns the corresponding TypeInfo. +const TypeInfo& find_type_info(ZAM_ExprType et) + { + assert(et != ZAM_EXPR_TYPE_NONE); + + auto pred = [et](const TypeInfo& ti) -> bool + { + return ti.et == et; + }; + auto ti = std::find_if(ZAM_type_info.begin(), ZAM_type_info.end(), pred); + + assert(ti != ZAM_type_info.end()); + return *ti; + } + +// Given a ZAM_ExprType, return its ZVal accessor. Takes into account +// some naming inconsistencies between ZVal's and Val's. +string find_type_accessor(ZAM_ExprType et) + { + switch ( et ) + { + case ZAM_EXPR_TYPE_NONE: + return ""; + + case ZAM_EXPR_TYPE_UINT: + return "uint_val"; + + case ZAM_EXPR_TYPE_PATTERN: + return "re_val"; + + default: + { + string acc = find_type_info(et).accessor; + transform(acc.begin(), acc.end(), acc.begin(), ::tolower); + return acc + "_val"; + } + } + } + +// Maps ZAM operand types to pairs of (1) the C++ name used to declare +// the operand in a method declaration, and (2) the variable name to +// use for the operand. +unordered_map> ArgsManager::ot_to_args = { + {ZAM_OT_AUX, {"OpaqueVals*", "v"}}, + {ZAM_OT_CONSTANT, {"const ConstExpr*", "c"}}, + {ZAM_OT_EVENT_HANDLER, {"EventHandler*", "h"}}, + {ZAM_OT_INT, {"int", "i"}}, + {ZAM_OT_LIST, {"const ListExpr*", "l"}}, + {ZAM_OT_RECORD_FIELD, {"const NameExpr*", "n"}}, + {ZAM_OT_VAR, {"const NameExpr*", "n"}}, + + // The following gets special treatment. + {ZAM_OT_ASSIGN_FIELD, {"const NameExpr*", "n"}}, +}; + +ArgsManager::ArgsManager(const vector& ot, ZAM_InstClass zc) + { + int n = 0; + bool add_field = false; + + for ( const auto& ot_i : ot ) + { + if ( ot_i == ZAM_OT_NONE ) + { // it had better be the only operand type + assert(ot.size() == 1); + break; + } + + if ( n++ == 0 && zc == ZIC_COND ) + // Skip the conditional's nominal assignment slot. + continue; + + // Start off the argument info using the usual case + // of (1) same method parameter name as GenInst argument, + // and (2) not requiring a record field. + auto& arg_i = ot_to_args[ot_i]; + Arg arg = {arg_i.second, arg_i.first, arg_i.second, false}; + + if ( ot_i == ZAM_OT_ASSIGN_FIELD ) + { + arg.is_field = true; + + if ( n == 1 ) + { // special-case the parameter + arg.decl_name = "flhs"; + arg.decl_type = "const FieldLHSAssignExpr*"; + } + } + + args.emplace_back(move(arg)); + } + + Differentiate(); + } + +void ArgsManager::Differentiate() + { + // First, figure out which parameter names are used how often. + map name_count; // how often the name apepars + map usage_count; // how often the name's been used so far + for ( auto& arg : args ) + { + auto& name = arg.param_name; + if ( name_count.count(name) == 0 ) + { + name_count[name] = 1; + usage_count[name] = 0; + } + else + ++name_count[name]; + } + + // Now for each name - whether appearing as an argument or in + // a declaration - if it's used more than once, then differentiate + // it. Note, some names only appear multiple times as arguments + // when invoking methods, but not in the declarations of the methods + // themselves. + for ( auto& arg : args ) + { + auto& decl = arg.decl_name; + auto& name = arg.param_name; + bool decl_and_arg_same = decl == name; + + if ( name_count[name] == 1 ) + continue; // it's unique + + auto n = to_string(++usage_count[name]); + name += n; + if ( decl_and_arg_same ) + decl += n; + } + + // Finally, build the full versions of the declaration and parameters. + + // Tracks how many record fields we're dealing with. + int num_fields = 0; + + for ( auto& arg : args ) + { + if ( ! full_decl.empty() ) + full_decl += ", "; + + full_decl += arg.decl_type + " " + arg.decl_name; + + if ( ! full_params.empty() ) + full_params += ", "; + + full_params += arg.param_name; + params.push_back(arg.param_name); + + if ( arg.is_field ) + ++num_fields; + } + + assert(num_fields <= 2); + + // Add in additional arguments/parameters for record fields. + if ( num_fields == 1 ) + full_params += ", field"; + else if ( num_fields == 2 ) + { + full_decl += ", int field2"; + full_params += ", field1, field2"; + } + } + +ZAM_OpTemplate::ZAM_OpTemplate(ZAMGen* _g, string _base_name) : g(_g), base_name(move(_base_name)) + { + // Make the base name viable in a C++ name. + transform(base_name.begin(), base_name.end(), base_name.begin(), dash_to_under); + + cname = base_name; + transform(cname.begin(), cname.end(), cname.begin(), ::toupper); + } + +void ZAM_OpTemplate::Build() + { + op_loc = g->CurrLoc(); + + string line; + while ( g->ScanLine(line) ) + { + if ( line.size() <= 1 ) + break; + + auto words = g->SplitIntoWords(line); + if ( words.empty() ) + break; + + Parse(words[0], line, words); + } + } + +void ZAM_OpTemplate::Instantiate() + { + InstantiateOp(OperandTypes(), IncludesVectorOp()); + } + +void ZAM_OpTemplate::UnaryInstantiate() + { + // First operand is always the frame slot to which this operation + // assigns the result of the applying unary operator. + vector ots = {ZAM_OT_VAR}; + ots.resize(2); + + // Now build versions for a constant operand (maybe not actually + // needed due to constant folding, but sometimes that gets deferred + // to run-time) ... + if ( ! NoConst() ) + { + ots[1] = ZAM_OT_CONSTANT; + InstantiateOp(ots, IncludesVectorOp()); + } + + // ... and for a variable (frame-slot) operand. + ots[1] = ZAM_OT_VAR; + InstantiateOp(ots, IncludesVectorOp()); + } + +void ZAM_OpTemplate::Parse(const string& attr, const string& line, const Words& words) + { + int num_args = -1; // -1 = don't enforce + int nwords = words.size(); + + if ( attr == "type" ) + { + if ( nwords <= 1 ) + g->Gripe("missing argument", line); + + num_args = 1; + + const char* types = words[1].c_str(); + while ( *types ) + { + ZAM_OperandType ot = ZAM_OT_NONE; + switch ( *types ) + { + case 'C': + ot = ZAM_OT_CONSTANT; + break; + case 'F': + ot = ZAM_OT_ASSIGN_FIELD; + break; + case 'H': + ot = ZAM_OT_EVENT_HANDLER; + break; + case 'L': + ot = ZAM_OT_LIST; + break; + case 'O': + ot = ZAM_OT_AUX; + break; + case 'R': + ot = ZAM_OT_RECORD_FIELD; + break; + case 'V': + ot = ZAM_OT_VAR; + break; + case 'i': + ot = ZAM_OT_INT; + break; + + case 'X': + ot = ZAM_OT_NONE; + break; + + default: + g->Gripe("bad operand type", words[1]); + break; + } + + AddOpType(ot); + + ++types; + } + } + + else if ( attr == "op1-read" ) + { + num_args = 0; + SetOp1Flavor("OP1_READ"); + } + + else if ( attr == "op1-read-write" ) + { + num_args = 0; + SetOp1Flavor("OP1_READ_WRITE"); + } + + else if ( attr == "op1-internal" ) + { + num_args = 0; + SetOp1Flavor("OP1_INTERNAL"); + } + + else if ( attr == "set-type" ) + { + num_args = 1; + if ( nwords > 1 ) + SetTypeParam(ExtractTypeParam(words[1])); + } + + else if ( attr == "set-type2" ) + { + num_args = 1; + if ( nwords > 1 ) + SetType2Param(ExtractTypeParam(words[1])); + } + + else if ( attr == "custom-method" ) + SetCustomMethod(g->SkipWords(line, 1)); + + else if ( attr == "method-post" ) + SetPostMethod(g->SkipWords(line, 1)); + + else if ( attr == "side-effects" ) + { + if ( nwords == 3 ) + SetAssignmentLess(words[1], words[2]); + else + // otherwise shouldn't be any arguments + num_args = 0; + + SetHasSideEffects(); + } + + else if ( attr == "no-eval" ) + { + num_args = 0; + SetNoEval(); + } + + else if ( attr == "vector" ) + { + num_args = 0; + SetIncludesVectorOp(); + } + + else if ( attr == "assign-val" ) + { + num_args = 1; + if ( words.size() > 1 ) + SetAssignVal(words[1]); + } + + else if ( attr == "eval" ) + { + AddEval(g->SkipWords(line, 1)); + + auto addl = GatherEval(); + if ( ! addl.empty() ) + AddEval(addl); + } + + else if ( attr == "macro" ) + g->ReadMacro(line); + + else + g->Gripe("unknown template attribute", attr); + + if ( num_args >= 0 && num_args != nwords - 1 ) + g->Gripe("extraneous or missing arguments", line); + } + +string ZAM_OpTemplate::GatherEval() + { + string res; + string l; + while ( g->ScanLine(l) ) + { + if ( l.size() <= 1 || ! isspace(l.c_str()[0]) ) + { + g->PutBack(l); + return res; + } + + res += l; + } + + return res; + } + +int ZAM_OpTemplate::ExtractTypeParam(const string& arg) + { + if ( arg == "$$" ) + return 1; + + if ( arg[0] != '$' ) + g->Gripe("bad set-type parameter, should be $n", arg); + + int param = atoi(&arg[1]); + + if ( param <= 0 || param > 2 ) + g->Gripe("bad set-type parameter, should be $1 or $2", arg); + + // Convert operand to underlying instruction element, i.e., add + // one to account for the $$ assignment slot. + return param + 1; + } + +// Maps an operand type to a character mnemonic used to distinguish +// it from others. +unordered_map ZAM_OpTemplate::ot_to_char = { + {ZAM_OT_AUX, 'O'}, {ZAM_OT_CONSTANT, 'C'}, {ZAM_OT_EVENT_HANDLER, 'H'}, + {ZAM_OT_ASSIGN_FIELD, 'F'}, {ZAM_OT_INT, 'i'}, {ZAM_OT_LIST, 'L'}, + {ZAM_OT_NONE, 'X'}, {ZAM_OT_RECORD_FIELD, 'R'}, {ZAM_OT_VAR, 'V'}, +}; + +void ZAM_OpTemplate::InstantiateOp(const vector& ot, bool do_vec) + { + auto method = MethodName(ot); + + InstantiateOp(method, ot, ZIC_REGULAR); + + if ( IncludesFieldOp() ) + InstantiateOp(method, ot, ZIC_FIELD); + + if ( do_vec ) + InstantiateOp(method, ot, ZIC_VEC); + + if ( IsConditionalOp() ) + InstantiateOp(method, ot, ZIC_COND); + } + +void ZAM_OpTemplate::InstantiateOp(const string& method, const vector& ot, + ZAM_InstClass zc) + { + string suffix = ""; + + if ( zc == ZIC_FIELD ) + suffix = "_field"; + else if ( zc == ZIC_VEC ) + suffix = "_vec"; + else if ( zc == ZIC_COND ) + suffix = "_cond"; + + if ( ! IsInternalOp() ) + InstantiateMethod(method, suffix, ot, zc); + + if ( IsAssignOp() ) + InstantiateAssignOp(ot, suffix); + else + { + InstantiateEval(ot, suffix, zc); + + if ( HasAssignmentLess() ) + { + auto op_string = "_" + OpSuffix(ot); + auto op = g->GenOpCode(this, op_string); + GenAssignmentlessVersion(op); + } + } + } + +void ZAM_OpTemplate::GenAssignmentlessVersion(string op) + { + EmitTo(AssignFlavor); + Emit("assignmentless_op[" + op + "] = " + AssignmentLessOp() + ";"); + Emit("assignmentless_op_type[" + op + "] = " + AssignmentLessOpType() + ";"); + } + +void ZAM_OpTemplate::InstantiateMethod(const string& m, const string& suffix, + const vector& ot_orig, ZAM_InstClass zc) + { + if ( IsInternalOp() ) + return; + + auto ot = ot_orig; + if ( zc == ZIC_FIELD ) + // Need to make room for the field offset. + ot.emplace_back(ZAM_OT_INT); + + auto decls = MethodDeclare(ot, zc); + + EmitTo(MethodDecl); + Emit("const ZAMStmt " + m + suffix + "(" + decls + ");"); + + EmitTo(MethodDef); + Emit("const ZAMStmt ZAMCompiler::" + m + suffix + "(" + decls + ")"); + BeginBlock(); + + InstantiateMethodCore(ot, suffix, zc); + + if ( HasPostMethod() ) + Emit(GetPostMethod()); + + if ( ! HasCustomMethod() ) + Emit("return AddInst(z);"); + + EndBlock(); + NL(); + } + +void ZAM_OpTemplate::InstantiateMethodCore(const vector& ot, string suffix, + ZAM_InstClass zc) + { + if ( HasCustomMethod() ) + { + Emit(GetCustomMethod()); + return; + } + + assert(! ot.empty()); + + string full_suffix = "_" + OpSuffix(ot) + suffix; + + Emit("ZInstI z;"); + + if ( ot[0] == ZAM_OT_AUX ) + { + auto op = g->GenOpCode(this, full_suffix, zc); + Emit("z = ZInstI(" + op + ");"); + return; + } + + if ( ot[0] == ZAM_OT_NONE ) + { + auto op = g->GenOpCode(this, full_suffix, zc); + Emit("z = GenInst(" + op + ");"); + return; + } + + if ( ot.size() > 1 && ot[1] == ZAM_OT_AUX ) + { + auto op = g->GenOpCode(this, full_suffix, zc); + Emit("z = ZInstI(" + op + ", Frame1Slot(n, " + op + "));"); + return; + } + + ArgsManager args(ot, zc); + BuildInstruction(ot, args.Params(), full_suffix, zc); + + auto tp = GetTypeParam(); + if ( tp > 0 ) + Emit("z.SetType(" + args.NthParam(tp - 1) + "->GetType());"); + + auto tp2 = GetType2Param(); + if ( tp2 > 0 ) + Emit("z.t2 = " + args.NthParam(tp2 - 1) + "->GetType();"); + } + +void ZAM_OpTemplate::BuildInstruction(const vector& ot, const string& params, + const string& suffix, ZAM_InstClass zc) + { + auto op = g->GenOpCode(this, suffix, zc); + Emit("z = GenInst(" + op + ", " + params + ");"); + } + +void ZAM_OpTemplate::InstantiateEval(const vector& ot, const string& suffix, + ZAM_InstClass zc) + { + auto eval = GetEval(); + + if ( ot.size() > 1 ) + { // Check for use of "$1" to indicate the operand + string op1; + if ( ot[1] == ZAM_OT_CONSTANT ) + op1 = "z.c"; + else if ( ot[1] == ZAM_OT_VAR ) + op1 = "frame[z.v2]"; + + eval = regex_replace(eval, regex("\\$1"), op1); + } + + InstantiateEval(Eval, OpSuffix(ot) + suffix, eval, zc); + } + +void ZAM_OpTemplate::InstantiateEval(EmitTarget et, const string& op_suffix, const string& eval, + ZAM_InstClass zc) + { + auto op_code = g->GenOpCode(this, "_" + op_suffix, zc); + + EmitTo(et); + Emit("case " + op_code + ":"); + BeginBlock(); + Emit(eval); + EndBlock(); + EmitUp("break;"); + NL(); + } + +void ZAM_OpTemplate::InstantiateAssignOp(const vector& ot, const string& suffix) + { + // First, create a generic version of the operand, which the + // ZAM compiler uses to find specific-flavored versions. + auto op_string = "_" + OpSuffix(ot); + auto generic_op = g->GenOpCode(this, op_string); + auto flavor_ind = "assignment_flavor[" + generic_op + "]"; + + EmitTo(AssignFlavor); + Emit(flavor_ind + " = empty_map;"); + + auto eval = GetEval(); + auto v = GetAssignVal(); + + for ( auto& ti : ZAM_type_info ) + { + auto op = g->GenOpCode(this, op_string + "_" + ti.suffix); + + if ( IsInternalOp() ) + { + EmitTo(AssignFlavor); + Emit(flavor_ind + "[" + ti.tag + "] = " + op + ";"); + + if ( HasAssignmentLess() ) + GenAssignmentlessVersion(op); + } + + EmitTo(Eval); + Emit("case " + op + ":"); + BeginBlock(); + GenAssignOpCore(ot, eval, ti.accessor, ti.is_managed); + Emit("break;"); + EndBlock(); + } + } + +void ZAM_OpTemplate::GenAssignOpCore(const vector& ot, const string& eval, + const string& accessor, bool is_managed) + { + if ( HasAssignVal() ) + { + GenAssignOpValCore(eval, accessor, is_managed); + return; + } + + if ( ! eval.empty() ) + g->Gripe("assign-op should not have an \"eval\"", eval); + + auto lhs_field = (ot[0] == ZAM_OT_ASSIGN_FIELD); + auto rhs_field = lhs_field && ot.size() > 2 && (ot[2] == ZAM_OT_INT); + auto constant_op = (ot[1] == ZAM_OT_CONSTANT); + + string rhs = constant_op ? "z.c" : "frame[z.v2]"; + + auto acc = ".As" + accessor + "()"; + + if ( accessor == "Any" && constant_op && ! rhs_field ) + { + // "any_val = constant" or "x$any_val = constant". + // + // These require special-casing, because to avoid going + // through a CoerceToAny operation, we allow expressing + // these directly. They don't fit with the usual assignment + // paradigm since the RHS differs in type from the LHS. + Emit("auto v = z.c.ToVal(z.t);"); + + if ( lhs_field ) + { + Emit("auto r = frame[z.v1].AsRecord();"); + Emit("auto& f = r->RawField(z.v2);"); + } + else + Emit("auto& f = frame[z.v1];"); + + Emit("zeek::Unref(f.ManagedVal());"); + Emit("f = ZVal(v.release());"); + } + + else if ( rhs_field ) + { + // The following is counter-intuitive, but comes from the + // fact that we build out the instruction parameters as + // an echo of the method parameters, and for this case that + // means that the RHS field offset comes *before*, not after, + // the LHS field offset. + auto lhs_offset = constant_op ? 3 : 4; + auto rhs_offset = lhs_offset - 1; + + Emit("auto v = " + rhs + ".AsRecord()->RawOptField(z.v" + to_string(rhs_offset) + + "); // note, RHS field before LHS field"); + + Emit("if ( ! v )"); + BeginBlock(); + Emit("ZAM_run_time_error(z.loc, \"field value missing\");"); + Emit("break;"); + EndBlock(); + + auto slot = "z.v" + to_string(lhs_offset); + Emit("auto r = frame[z.v1].AsRecord();"); + Emit("auto& f = r->RawField(" + slot + "); // note, LHS field after RHS field"); + + if ( is_managed ) + { + Emit("zeek::Ref((*v)" + acc + ");"); + Emit("zeek::Unref(f.ManagedVal());"); + } + + Emit("f = *v;"); + } + + else + { + if ( is_managed ) + Emit("zeek::Ref(" + rhs + acc + ");"); + + if ( lhs_field ) + { + auto lhs_offset = constant_op ? 2 : 3; + auto slot = "z.v" + to_string(lhs_offset); + Emit("auto r = frame[z.v1].AsRecord();"); + Emit("auto& f = r->RawField(" + slot + ");"); + + if ( is_managed ) + Emit("zeek::Unref(f.ManagedVal());"); + + Emit("f = " + rhs + ";"); + } + + else + { + if ( is_managed ) + Emit("zeek::Unref(frame[z.v1].ManagedVal());"); + + Emit("frame[z.v1] = ZVal(" + rhs + acc + ");"); + } + } + + if ( lhs_field ) + Emit("r->Modified();"); + } + +void ZAM_OpTemplate::GenAssignOpValCore(const string& eval, const string& accessor, bool is_managed) + { + auto v = GetAssignVal(); + + Emit(eval); + + // Maps Zeek types to how to get the underlying value from a ValPtr. + static unordered_map val_accessors = { + {"Addr", "->AsAddrVal()"}, {"Any", ".get()"}, + {"Count", "->AsCount()"}, {"Double", "->AsDouble()"}, + {"Int", "->AsInt()"}, {"Pattern", "->AsPatternVal()"}, + {"String", "->AsStringVal()"}, {"SubNet", "->AsSubNetVal()"}, + {"Table", "->AsTableVal()"}, {"Vector", "->AsVectorVal()"}, + {"File", "->AsFile()"}, {"Func", "->AsFunc()"}, + {"List", "->AsListVal()"}, {"Opaque", "->AsOpaqueVal()"}, + {"Record", "->AsRecordVal()"}, {"Type", "->AsTypeVal()"}, + }; + + auto val_accessor = val_accessors[accessor]; + + string rhs; + if ( IsInternalOp() ) + rhs = v + val_accessor; + else + rhs = v + ".As" + accessor + "()"; + + if ( is_managed ) + { + Emit("auto rhs = " + rhs + ";"); + Emit("zeek::Ref(rhs);"); + Emit("Unref(frame[z.v1].ManagedVal());"); + Emit("frame[z.v1] = ZVal(rhs);"); + } + else + Emit("frame[z.v1] = ZVal(" + rhs + ");"); + } + +string ZAM_OpTemplate::MethodName(const vector& ot) const + { + return base_name + OpSuffix(ot); + } + +string ZAM_OpTemplate::MethodDeclare(const vector& ot, ZAM_InstClass zc) + { + ArgsManager args(ot, zc); + return args.Decls(); + } + +string ZAM_OpTemplate::OpSuffix(const vector& ot) const + { + string os; + for ( auto& o : ot ) + os += ot_to_char[o]; + return os; + } + +string ZAM_OpTemplate::SkipWS(const string& s) const + { + auto sp = s.c_str(); + while ( *sp && isspace(*sp) ) + ++sp; + + return sp; + } + +void ZAM_OpTemplate::Emit(const string& s) + { + g->Emit(curr_et, s); + } + +void ZAM_OpTemplate::EmitNoNL(const string& s) + { + g->SetNoNL(true); + Emit(s); + g->SetNoNL(false); + } + +void ZAM_OpTemplate::IndentUp() + { + g->IndentUp(); + } + +void ZAM_OpTemplate::IndentDown() + { + g->IndentDown(); + } + +void ZAM_UnaryOpTemplate::Instantiate() + { + UnaryInstantiate(); + } + +void ZAM_DirectUnaryOpTemplate::Instantiate() + { + EmitTo(DirectDef); + Emit("case EXPR_" + cname + ":\treturn " + direct + "(lhs, rhs);"); + } + +// Maps op-type mnemonics to the corresponding internal value used by Gen-ZAM. +static unordered_map expr_type_names = { + {'*', ZAM_EXPR_TYPE_DEFAULT}, {'A', ZAM_EXPR_TYPE_ADDR}, {'a', ZAM_EXPR_TYPE_ANY}, + {'D', ZAM_EXPR_TYPE_DOUBLE}, {'f', ZAM_EXPR_TYPE_FILE}, {'F', ZAM_EXPR_TYPE_FUNC}, + {'I', ZAM_EXPR_TYPE_INT}, {'L', ZAM_EXPR_TYPE_LIST}, {'X', ZAM_EXPR_TYPE_NONE}, + {'O', ZAM_EXPR_TYPE_OPAQUE}, {'P', ZAM_EXPR_TYPE_PATTERN}, {'R', ZAM_EXPR_TYPE_RECORD}, + {'S', ZAM_EXPR_TYPE_STRING}, {'N', ZAM_EXPR_TYPE_SUBNET}, {'T', ZAM_EXPR_TYPE_TABLE}, + {'t', ZAM_EXPR_TYPE_TYPE}, {'U', ZAM_EXPR_TYPE_UINT}, {'V', ZAM_EXPR_TYPE_VECTOR}, +}; + +// Inverse of the above. +static unordered_map expr_name_types; + +ZAM_ExprOpTemplate::ZAM_ExprOpTemplate(ZAMGen* _g, string _base_name) + : ZAM_OpTemplate(_g, _base_name) + { + static bool did_map_init = false; + + if ( ! did_map_init ) + { // Create the inverse mapping. + for ( auto& tn : expr_type_names ) + expr_name_types[tn.second] = tn.first; + + did_map_init = true; + } + } + +void ZAM_ExprOpTemplate::Parse(const string& attr, const string& line, const Words& words) + { + if ( attr == "op-type" ) + { + if ( words.size() == 1 ) + g->Gripe("op-type needs arguments", line); + + for ( auto i = 1U; i < words.size(); ++i ) + { + auto& w_i = words[i]; + if ( w_i.size() != 1 ) + g->Gripe("bad op-type argument", w_i); + + auto et_c = w_i.c_str()[0]; + if ( expr_type_names.count(et_c) == 0 ) + g->Gripe("bad op-type argument", w_i); + + AddExprType(expr_type_names[et_c]); + } + } + + else if ( attr == "includes-field-op" ) + { + if ( words.size() != 1 ) + g->Gripe("includes-field-op does not take any arguments", line); + + SetIncludesFieldOp(); + } + + else if ( attr == "eval-type" ) + { + if ( words.size() < 3 ) + g->Gripe("eval-type needs type and evaluation", line); + + auto& type = words[1]; + if ( type.size() != 1 ) + g->Gripe("bad eval-type type", type); + + auto type_c = type.c_str()[0]; + if ( expr_type_names.count(type_c) == 0 ) + g->Gripe("bad eval-type type", type); + + auto et = expr_type_names[type_c]; + + if ( expr_types.count(et) == 0 ) + g->Gripe("eval-type type not present in eval-type", type); + + auto eval = g->SkipWords(line, 2); + eval += GatherEval(); + AddEvalSet(et, eval); + } + + else if ( attr == "eval-mixed" ) + { + if ( words.size() < 4 ) + g->Gripe("eval-mixed needs types and evaluation", line); + + auto& type1 = words[1]; + auto& type2 = words[2]; + if ( type1.size() != 1 || type2.size() != 1 ) + g->Gripe("bad eval-mixed types", line); + + auto type_c1 = type1.c_str()[0]; + auto type_c2 = type2.c_str()[0]; + if ( expr_type_names.count(type_c1) == 0 || expr_type_names.count(type_c2) == 0 ) + g->Gripe("bad eval-mixed types", line); + + auto et1 = expr_type_names[type_c1]; + auto et2 = expr_type_names[type_c2]; + + if ( eval_set.count(et1) > 0 ) + g->Gripe("eval-mixed uses type also included in op-type", line); + + auto eval = g->SkipWords(line, 3); + eval += GatherEval(); + AddEvalSet(et1, et2, eval); + } + + else if ( attr == "eval-pre" ) + { + if ( words.size() < 2 ) + g->Gripe("eval-pre needs evaluation", line); + + auto eval = g->SkipWords(line, 1); + eval += GatherEval(); + + SetPreEval(eval); + } + + else + // Not an attribute specific to expr-op's. + ZAM_OpTemplate::Parse(attr, line, words); + } + +void ZAM_ExprOpTemplate::Instantiate() + { + InstantiateOp(OperandTypes(), IncludesVectorOp()); + + if ( op_types.size() > 1 && op_types[1] == ZAM_OT_CONSTANT ) + InstantiateC1(op_types, op_types.size() - 1); + if ( op_types.size() > 2 && op_types[2] == ZAM_OT_CONSTANT ) + InstantiateC2(op_types, op_types.size() - 1); + if ( op_types.size() > 3 && op_types[3] == ZAM_OT_CONSTANT ) + InstantiateC3(op_types); + + bool all_var = true; + for ( auto i = 1U; i < op_types.size(); ++i ) + if ( op_types[i] != ZAM_OT_VAR ) + all_var = false; + + if ( all_var ) + InstantiateV(op_types); + + if ( op_types.size() == 3 && op_types[1] == ZAM_OT_RECORD_FIELD && op_types[2] == ZAM_OT_INT ) + InstantiateV(op_types); + } + +void ZAM_ExprOpTemplate::InstantiateC1(const vector& ots, int arity, bool do_vec) + { + string args = "lhs, r1->AsConstExpr()"; + + if ( arity == 1 && ots[0] == ZAM_OT_RECORD_FIELD ) + args += ", rhs->AsFieldExpr()->Field()"; + + else if ( arity > 1 ) + { + args += ", "; + + if ( ots[2] == ZAM_OT_RECORD_FIELD ) + args += "rhs->AsFieldExpr()->Field()"; + else + args += "r2->AsNameExpr()"; + } + + auto m = MethodName(ots); + + EmitTo(C1Def); + + EmitNoNL("case EXPR_" + cname + ":"); + + if ( do_vec ) + DoVectorCase(m, args); + else + EmitUp("return " + m + "(" + args + ");"); + + if ( IncludesFieldOp() ) + { + EmitTo(C1FieldDef); + Emit("case EXPR_" + cname + ":\treturn " + m + "_field(" + args + ", field);"); + } + } + +void ZAM_ExprOpTemplate::InstantiateC2(const vector& ots, int arity) + { + string args = "lhs, r1->AsNameExpr(), r2->AsConstExpr()"; + + if ( arity == 3 ) + args += ", r3->AsNameExpr()"; + + auto method = MethodName(ots); + auto m = method.c_str(); + + EmitTo(C2Def); + Emit("case EXPR_" + cname + ":\treturn " + m + "(" + args + ");"); + + if ( IncludesFieldOp() ) + { + EmitTo(C2FieldDef); + Emit("case EXPR_" + cname + ":\treturn " + m + "_field(" + args + ", field);"); + } + } + +void ZAM_ExprOpTemplate::InstantiateC3(const vector& ots) + { + EmitTo(C3Def); + Emit("case EXPR_" + cname + ":\treturn " + MethodName(ots) + + "(lhs, r1->AsNameExpr(), r2->AsNameExpr(), r3->AsConstExpr());"); + } + +void ZAM_ExprOpTemplate::InstantiateV(const vector& ots) + { + auto m = MethodName(ots); + + string args = "lhs, r1->AsNameExpr()"; + + if ( ots.size() >= 3 ) + { + if ( ots[2] == ZAM_OT_INT ) + { + string acc_flav = IncludesFieldOp() ? "Has" : ""; + args += ", rhs->As" + acc_flav + "FieldExpr()->Field()"; + } + else + args += ", r2->AsNameExpr()"; + + if ( ots.size() == 4 ) + args += ", r3->AsNameExpr()"; + } + + EmitTo(VDef); + EmitNoNL("case EXPR_" + cname + ":"); + + if ( IncludesVectorOp() ) + DoVectorCase(m, args); + else + EmitUp("return " + m + "(" + args + ");"); + + if ( IncludesFieldOp() ) + { + EmitTo(VFieldDef); + Emit("case EXPR_" + cname + ":\treturn " + m + "_field(" + args + ", field);"); + } + } + +void ZAM_ExprOpTemplate::DoVectorCase(const string& m, const string& args) + { + NL(); + IndentUp(); + Emit("if ( rt->Tag() == TYPE_VECTOR )"); + EmitUp("return " + m + "_vec(" + args + ");"); + Emit("else"); + EmitUp("return " + m + "(" + args + ");"); + IndentDown(); + } + +void ZAM_ExprOpTemplate::BuildInstructionCore(const string& params, const string& suffix, + ZAM_InstClass zc) + { + Emit("auto tag = t->Tag();"); + Emit("auto i_t = t->InternalType();"); + + int ncases = 0; + + for ( auto& [et1, et2_map] : eval_mixed_set ) + for ( auto& [et2, eval] : et2_map ) + GenMethodTest(et1, et2, params, suffix, ++ncases > 1, zc); + + bool do_default = false; + + for ( auto et : ExprTypes() ) + { + if ( et == ZAM_EXPR_TYPE_DEFAULT ) + do_default = true; + else + GenMethodTest(et, et, params, suffix, ++ncases > 1, zc); + } + + Emit("else"); + + if ( do_default ) + { + auto op = g->GenOpCode(this, suffix, zc); + EmitUp("z = GenInst(" + op + ", " + params + ");"); + } + + else + EmitUp("reporter->InternalError(\"bad tag when generating method core\");"); + } + +void ZAM_ExprOpTemplate::GenMethodTest(ZAM_ExprType et1, ZAM_ExprType et2, const string& params, + const string& suffix, bool do_else, ZAM_InstClass zc) + { + // Maps ZAM_ExprType's to the information needed (variable name, + // constant to compare it against) to identify using an "if" test + // that a given AST Expr node employs the given type of operand. + static map> if_tests = { + {ZAM_EXPR_TYPE_ADDR, {"i_t", "TYPE_INTERNAL_ADDR"}}, + {ZAM_EXPR_TYPE_ANY, {"tag", "TYPE_ANY"}}, + {ZAM_EXPR_TYPE_DOUBLE, {"i_t", "TYPE_INTERNAL_DOUBLE"}}, + {ZAM_EXPR_TYPE_FILE, {"tag", "TYPE_FILE"}}, + {ZAM_EXPR_TYPE_FUNC, {"tag", "TYPE_FUNC"}}, + {ZAM_EXPR_TYPE_INT, {"i_t", "TYPE_INTERNAL_INT"}}, + {ZAM_EXPR_TYPE_LIST, {"tag", "TYPE_LIST"}}, + {ZAM_EXPR_TYPE_OPAQUE, {"tag", "TYPE_OPAQUE"}}, + {ZAM_EXPR_TYPE_PATTERN, {"tag", "TYPE_PATTERN"}}, + {ZAM_EXPR_TYPE_RECORD, {"tag", "TYPE_RECORD"}}, + {ZAM_EXPR_TYPE_STRING, {"i_t", "TYPE_INTERNAL_STRING"}}, + {ZAM_EXPR_TYPE_SUBNET, {"i_t", "TYPE_INTERNAL_SUBNET"}}, + {ZAM_EXPR_TYPE_TABLE, {"tag", "TYPE_TABLE"}}, + {ZAM_EXPR_TYPE_TYPE, {"tag", "TYPE_TYPE"}}, + {ZAM_EXPR_TYPE_UINT, {"i_t", "TYPE_INTERNAL_UNSIGNED"}}, + {ZAM_EXPR_TYPE_VECTOR, {"tag", "TYPE_VECTOR"}}, + }; + + if ( if_tests.count(et1) == 0 ) + g->Gripe("bad op-type", op_loc); + + auto if_test = if_tests[et1]; + auto if_var = if_test.first; + auto if_val = if_test.second; + + string test = "if ( " + if_var + " == " + if_val + " )"; + if ( do_else ) + test = "else " + test; + + Emit(test); + + auto op_suffix = suffix + "_" + expr_name_types[et1]; + if ( et2 != et1 ) + op_suffix += expr_name_types[et2]; + + auto op = g->GenOpCode(this, op_suffix, zc); + EmitUp("z = GenInst(" + op + ", " + params + ");"); + } + +EvalInstance::EvalInstance(ZAM_ExprType _lhs_et, ZAM_ExprType _op1_et, ZAM_ExprType _op2_et, + string _eval, bool _is_def) + { + lhs_et = _lhs_et; + op1_et = _op1_et; + op2_et = _op2_et; + eval = move(_eval); + is_def = _is_def; + } + +string EvalInstance::LHSAccessor(bool is_ptr) const + { + if ( lhs_et == ZAM_EXPR_TYPE_NONE || lhs_et == ZAM_EXPR_TYPE_DEFAULT ) + return ""; + + string deref = is_ptr ? "->" : "."; + string acc = find_type_accessor(lhs_et); + + return deref + acc; + } + +string EvalInstance::Accessor(ZAM_ExprType et, bool is_ptr) const + { + if ( et == ZAM_EXPR_TYPE_NONE || et == ZAM_EXPR_TYPE_DEFAULT ) + return ""; + + string deref = is_ptr ? "->" : "."; + return deref + "As" + find_type_info(et).accessor + "()"; + } + +string EvalInstance::OpMarker() const + { + if ( op1_et == ZAM_EXPR_TYPE_DEFAULT || op1_et == ZAM_EXPR_TYPE_NONE ) + return ""; + + if ( op1_et == op2_et ) + return "_" + find_type_info(op1_et).suffix; + + return "_" + find_type_info(op1_et).suffix + find_type_info(op2_et).suffix; + } + +void ZAM_ExprOpTemplate::InstantiateEval(const vector& ot_orig, + const string& suffix, ZAM_InstClass zc) + { + if ( expr_types.empty() ) + { // No operand types to expand over. + ZAM_OpTemplate::InstantiateEval(ot_orig, suffix, zc); + return; + } + + auto ot = ot_orig; + if ( zc == ZIC_FIELD ) + // Make room for the offset. + ot.emplace_back(ZAM_OT_INT); + + auto ot_str = OpSuffix(ot); + + // Some of these might not wind up being used, but no harm in + // initializing them in case they are. + string lhs, op1, op2; + string branch_target = "z.v"; + + EmitTarget emit_target = Eval; + + if ( zc == ZIC_VEC ) + { + lhs = "vec1[i]"; + op1 = "vec2[i]"; + op2 = "vec3[i]"; + + emit_target = Arity() == 1 ? Vec1Eval : Vec2Eval; + } + + else + { + lhs = "frame[z.v1]"; + + auto op1_offset = zc == ZIC_COND ? 1 : 2; + auto op2_offset = op1_offset + 1; + bool ot1_const = ot[1] == ZAM_OT_CONSTANT; + bool ot2_const = Arity() >= 2 && ot[2] == ZAM_OT_CONSTANT; + + if ( ot1_const ) + { + op1 = "z.c"; + --op2_offset; + branch_target += "2"; + } + else + { + op1 = "frame[z.v" + to_string(op1_offset) + "]"; + + if ( Arity() > 1 && ot[2] == ZAM_OT_VAR ) + branch_target += "3"; + else + branch_target += "2"; + } + + if ( ot2_const ) + op2 = "z.c"; + else + op2 = "frame[z.v" + to_string(op2_offset) + "]"; + + if ( zc == ZIC_FIELD ) + { + // Compute the slot holding the field offset. + + auto f = + // The first slots are taken up by the + // assignment slot and the operands ... + Arity() + 1 + + // ... and slots are numbered starting at 1. + +1; + + if ( ot1_const || ot2_const ) + // One of the operand slots won't be needed + // due to the presence of a constant. + // (It's never the case that both operands + // are constants - those instead get folded.) + --f; + + lhs += ".AsRecord()->RawField(z.v" + to_string(f) + ")"; + } + } + + vector eval_instances; + + for ( auto et : expr_types ) + { + auto is_def = eval_set.count(et) == 0; + string eval = is_def ? GetEval() : eval_set[et]; + auto lhs_et = IsConditionalOp() ? ZAM_EXPR_TYPE_INT : et; + eval_instances.emplace_back(lhs_et, et, et, eval, is_def); + } + + if ( zc != ZIC_VEC ) + for ( auto em1 : eval_mixed_set ) + { + auto et1 = em1.first; + for ( auto em2 : em1.second ) + { + auto et2 = em2.first; + + // For the LHS, either its expression type is + // ignored, or if it's a conditional, so just + // note it for the latter. + auto lhs_et = ZAM_EXPR_TYPE_INT; + eval_instances.emplace_back(lhs_et, et1, et2, em2.second, false); + } + } + + for ( auto& ei : eval_instances ) + { + auto lhs_accessor = ei.LHSAccessor(); + if ( HasExplicitResultType() ) + lhs_accessor = ""; + + string lhs_ei = lhs; + if ( zc != ZIC_VEC ) + lhs_ei += lhs_accessor; + + auto op1_ei = op1 + ei.Op1Accessor(zc == ZIC_VEC); + auto op2_ei = op2 + ei.Op2Accessor(zc == ZIC_VEC); + + auto eval = SkipWS(ei.Eval()); + + auto has_target = eval.find("$$") != string::npos; + + if ( zc == ZIC_VEC ) + { + const char* rhs; + if ( has_target ) + rhs = "\\$\\$ = ([^;\n]*)"; + else + rhs = "^[^;\n]*"; + + auto replacement = VecEvalRE(has_target); + + eval = regex_replace(eval, regex(rhs), replacement); + } + + auto is_none = ei.LHS_ET() == ZAM_EXPR_TYPE_NONE; + auto is_default = ei.LHS_ET() == ZAM_EXPR_TYPE_DEFAULT; + + if ( ! is_none && ! is_default && find_type_info(ei.LHS_ET()).is_managed && + ! HasExplicitResultType() ) + { + auto delim = zc == ZIC_VEC ? "->" : "."; + auto pre = "auto hold_lhs = " + lhs + delim + "ManagedVal();\n\t"; + auto post = "\tUnref(hold_lhs);"; + eval = pre + eval + post; + } + + eval = regex_replace(eval, regex("\\$1"), op1_ei); + eval = regex_replace(eval, regex("\\$2"), op2_ei); + + string pre = GetPreEval(); + pre = regex_replace(pre, regex("\\$1"), op1_ei); + pre = regex_replace(pre, regex("\\$2"), op2_ei); + + if ( has_target ) + eval = regex_replace(eval, regex("\\$\\$"), lhs_ei); + + else if ( zc == ZIC_COND ) + { // Aesthetics: get rid of trailing newlines. + eval = regex_replace(eval, regex("\n"), ""); + eval = "if ( ! (" + eval + ") ) " + "{ pc = " + branch_target + "; continue; }"; + } + + else if ( ! is_none && (ei.IsDefault() || IsConditionalOp()) ) + { + eval = lhs_ei + " = " + eval; + + // Ensure a single terminating semicolon. + eval = regex_replace(eval, regex(";*\n"), ";\n"); + } + + eval = pre + eval; + + auto full_suffix = ot_str + suffix + ei.OpMarker(); + + ZAM_OpTemplate::InstantiateEval(emit_target, full_suffix, eval, zc); + + if ( zc == ZIC_VEC ) + { + string dispatch_params = "frame[z.v1].AsVectorRef(), frame[z.v2].AsVector()"; + + if ( Arity() == 2 ) + dispatch_params += ", frame[z.v3].AsVector()"; + + auto op_code = g->GenOpCode(this, "_" + full_suffix); + auto dispatch = "vec_exec(" + op_code + ", z.t, " + dispatch_params + ", z);"; + + ZAM_OpTemplate::InstantiateEval(Eval, full_suffix, dispatch, zc); + } + } + } + +void ZAM_UnaryExprOpTemplate::Parse(const string& attr, const string& line, const Words& words) + { + if ( attr == "no-const" ) + { + if ( words.size() != 1 ) + g->Gripe("extraneous argument to no-const", line); + + SetNoConst(); + } + + else if ( attr == "explicit-result-type" ) + { + if ( words.size() != 1 ) + g->Gripe("extraneous argument to explicit-result-type", line); + SetHasExplicitResultType(); + } + + else + ZAM_ExprOpTemplate::Parse(attr, line, words); + } + +void ZAM_UnaryExprOpTemplate::Instantiate() + { + UnaryInstantiate(); + + vector ots = {ZAM_OT_VAR, ZAM_OT_CONSTANT}; + + if ( ! NoConst() ) + InstantiateC1(ots, 1, IncludesVectorOp()); + + ots[1] = ZAM_OT_VAR; + InstantiateV(ots); + } + +void ZAM_UnaryExprOpTemplate::BuildInstruction(const vector& ot, + const string& params, const string& suffix, + ZAM_InstClass zc) + { + const auto& ets = ExprTypes(); + + if ( ets.size() == 1 && ets.count(ZAM_EXPR_TYPE_NONE) == 1 ) + { + ZAM_ExprOpTemplate::BuildInstruction(ot, params, suffix, zc); + return; + } + + auto constant_op = ot[1] == ZAM_OT_CONSTANT; + string type_src = constant_op ? "c" : "n2"; + + if ( ot[0] == ZAM_OT_ASSIGN_FIELD ) + { + type_src = constant_op ? "n" : "n1"; + Emit("auto " + type_src + " = flhs->GetOp1()->AsNameExpr();"); + Emit("auto t = flhs->GetType();"); + Emit("int field = flhs->Field();"); + } + + else + { + if ( IsAssignOp() ) + type_src = constant_op ? "n" : "n1"; + + auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; + Emit("auto t = " + type_src + "->GetType()" + type_suffix); + } + + BuildInstructionCore(params, suffix, zc); + + if ( IsAssignOp() && IsFieldOp() ) + // These can't take the type from the LHS variable, since + // that's the enclosing record and not the field within it. + Emit("z.t = t;"); + + else if ( zc == ZIC_VEC ) + { + if ( constant_op ) + Emit("z.t = n->GetType();"); + else + Emit("z.t = n1->GetType();"); + } + } + +ZAM_AssignOpTemplate::ZAM_AssignOpTemplate(ZAMGen* _g, string _base_name) + : ZAM_UnaryExprOpTemplate(_g, _base_name) + { + // Assignments apply to every valid form of ExprType. + for ( auto& etn : expr_type_names ) + { + auto et = etn.second; + if ( et != ZAM_EXPR_TYPE_NONE && et != ZAM_EXPR_TYPE_DEFAULT ) + AddExprType(et); + } + } + +void ZAM_AssignOpTemplate::Parse(const string& attr, const string& line, const Words& words) + { + if ( attr == "field-op" ) + { + if ( words.size() != 1 ) + g->Gripe("field-op does not take any arguments", line); + + SetFieldOp(); + } + + else + ZAM_OpTemplate::Parse(attr, line, words); + } + +void ZAM_AssignOpTemplate::Instantiate() + { + if ( op_types.size() != 1 ) + g->Gripe("operation needs precisely one \"type\"", op_loc); + + vector ots; + ots.push_back(op_types[0]); + + // Build constant/variable versions ... + ots.push_back(ZAM_OT_CONSTANT); + + if ( ots[0] == ZAM_OT_RECORD_FIELD ) + ots.push_back(ZAM_OT_INT); + + InstantiateOp(ots, false); + if ( IsFieldOp() ) + InstantiateC1(ots, 1); + + ots[1] = ZAM_OT_VAR; + InstantiateOp(ots, false); + + // ... and for assignments to fields, additional field versions. + if ( ots[0] == ZAM_OT_ASSIGN_FIELD ) + { + ots.push_back(ZAM_OT_INT); + InstantiateOp(ots, false); + + ots[1] = ZAM_OT_CONSTANT; + InstantiateOp(ots, false); + } + + else if ( IsFieldOp() ) + InstantiateV(ots); + } + +void ZAM_BinaryExprOpTemplate::Instantiate() + { + // As usual, the first slot receives the operator's result. + vector ots = {ZAM_OT_VAR}; + ots.resize(3); + + // Build each combination for constant/variable operand, + // except skip constant/constant as that is always folded. + + // We only include vector operations when both operands + // are non-constants. + + ots[1] = ZAM_OT_CONSTANT; + ots[2] = ZAM_OT_VAR; + InstantiateOp(ots, false); + + if ( ! IsInternalOp() ) + InstantiateC1(ots, 2, false); + + ots[1] = ZAM_OT_VAR; + ots[2] = ZAM_OT_CONSTANT; + InstantiateOp(ots, false); + + if ( ! IsInternalOp() ) + InstantiateC2(ots, 2); + + ots[2] = ZAM_OT_VAR; + InstantiateOp(ots, IncludesVectorOp()); + + if ( ! IsInternalOp() ) + InstantiateV(ots); + } + +void ZAM_BinaryExprOpTemplate::BuildInstruction(const vector& ot, + const string& params, const string& suffix, + ZAM_InstClass zc) + { + auto constant_op = ot[1] == ZAM_OT_CONSTANT; + string type_src = constant_op ? "c" : "n2"; + auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; + Emit("auto t = " + type_src + "->GetType()" + type_suffix); + BuildInstructionCore(params, suffix, zc); + + if ( zc == ZIC_VEC ) + Emit("z.t = n1->GetType();"); + } + +void ZAM_RelationalExprOpTemplate::Instantiate() + { + ZAM_BinaryExprOpTemplate::Instantiate(); + + EmitTo(Cond); + + Emit("case EXPR_" + cname + ":"); + IndentUp(); + Emit("if ( n1 && n2 )"); + EmitUp("return " + cname + "VVV_cond(n1, n2);"); + Emit("else if ( n1 )"); + EmitUp("return " + cname + "VVC_cond(n1, c);"); + Emit("else"); + EmitUp("return " + cname + "VCV_cond(c, n2);"); + IndentDown(); + NL(); + } + +void ZAM_RelationalExprOpTemplate::BuildInstruction(const vector& ot, + const string& params, const string& suffix, + ZAM_InstClass zc) + { + string op1; + + if ( zc == ZIC_COND ) + { + if ( ot[1] == ZAM_OT_CONSTANT ) + op1 = "c"; + else if ( ot[2] == ZAM_OT_CONSTANT ) + op1 = "n"; + else + op1 = "n1"; + } + else + op1 = "n2"; + + auto type_suffix = zc == ZIC_VEC ? "->Yield();" : ";"; + Emit("auto t = " + op1 + "->GetType()" + type_suffix); + BuildInstructionCore(params, suffix, zc); + + if ( zc == ZIC_VEC ) + Emit("z.t = n1->GetType();"); + } + +void ZAM_InternalBinaryOpTemplate::Parse(const string& attr, const string& line, const Words& words) + { + if ( attr == "op-accessor" ) + { + if ( words.size() != 2 ) + g->Gripe("op-accessor takes one argument", line); + + SetOpAccessor(words[1]); + } + + else if ( attr == "op1-accessor" ) + { + if ( words.size() != 2 ) + g->Gripe("op-accessor1 takes one argument", line); + + SetOp1Accessor(words[1]); + } + + else if ( attr == "op2-accessor" ) + { + if ( words.size() != 2 ) + g->Gripe("op-accessor2 takes one argument", line); + + SetOp2Accessor(words[1]); + } + + else + ZAM_BinaryExprOpTemplate::Parse(attr, line, words); + } + +void ZAM_InternalBinaryOpTemplate::InstantiateEval(const vector& ot, + const string& suffix, ZAM_InstClass zc) + { + assert(ot.size() == 3); + + auto op1_const = ot[1] == ZAM_OT_CONSTANT; + auto op2_const = ot[2] == ZAM_OT_CONSTANT; + + string op1 = op1_const ? "z.c" : "frame[z.v2]"; + string op2 = op2_const ? "z.c" : (op1_const ? "frame[z.v2]" : "frame[z.v3]"); + + string prelude = "auto op1 = " + op1 + "." + op1_accessor + ";\n"; + prelude += "auto op2 = " + op2 + "." + op2_accessor + ";\n"; + + auto eval = prelude + GetEval(); + + auto& ets = ExprTypes(); + if ( ! ets.empty() ) + { + if ( ets.size() != 1 ) + g->Gripe("internal-binary-op's can have at most one op-type", op_loc); + + for ( auto& et : ets ) + { + auto acc = find_type_accessor(et); + auto lhs = "frame[z.v1]." + acc; + eval = regex_replace(eval, regex("\\$\\$"), lhs); + } + } + + ZAM_OpTemplate::InstantiateEval(Eval, OpSuffix(ot) + suffix, eval, zc); + } + +void ZAM_InternalOpTemplate::Parse(const string& attr, const string& line, const Words& words) + { + if ( attr != "num-call-args" ) + { + if ( attr == "indirect-call" ) + { + if ( words.size() != 1 ) + g->Gripe("indirect-call takes one argument", line); + // Note, currently only works with a *subsequent* + // num-call-args, whose setting needs to be 'n'. + is_indirect_call = true; + } + else + ZAM_OpTemplate::Parse(attr, line, words); + + return; + } + + if ( words.size() != 2 ) + g->Gripe("num-call-args takes one argument", line); + + eval = "std::vector args;\n"; + + auto& arg = words[1]; + int n = arg == "n" ? -1 : stoi(arg); + + auto arg_offset = HasAssignVal() ? 1 : 0; + auto arg_slot = arg_offset + 1; + + string func = "z.func"; + + if ( n == 1 ) + { + eval += "args.push_back("; + if ( op_types[arg_offset] == ZAM_OT_CONSTANT ) + eval += "z.c"; + else + eval += "frame[z.v" + to_string(arg_slot) + "]"; + + eval += ".ToVal(z.t));\n"; + } + + else if ( n != 0 ) + { + eval += "auto aux = z.aux;\n"; + + if ( n < 0 ) + { + if ( is_indirect_call ) + { + func = "func"; + + eval += "auto sel = z.v" + to_string(arg_slot) + ";\n"; + eval += "auto func = (sel < 0) ? "; + eval += "aux->id_val->GetVal()->AsFunc() : "; + eval += "frame[sel].AsFunc();\n"; + } + + eval += "auto n = aux->n;\n"; + eval += "for ( auto i = 0; i < n; ++i )\n"; + eval += "\targs.push_back(aux->ToVal(frame, i));\n"; + } + + else + for ( auto i = 0; i < n; ++i ) + { + eval += "args.push_back(aux->ToVal(frame, "; + eval += to_string(i); + eval += "));\n"; + } + } + + eval += "f->SetCallLoc(z.loc);\n"; + + if ( HasAssignVal() ) + { + auto av = GetAssignVal(); + eval += "auto " + av + " = " + func + "->Invoke(&args, f);\n"; + eval += "if ( ! " + av + " ) { ZAM_error = true; break; }\n"; + } + else + eval += "(void) " + func + "->Invoke(&args, f);\n"; + } + +bool TemplateInput::ScanLine(string& line) + { + if ( ! put_back.empty() ) + { + line = put_back; + put_back.clear(); + return true; + } + + char buf[8192]; + + // Read lines, discarding comments, which have to start at the + // beginning of a line. + do + { + if ( ! fgets(buf, sizeof buf, f) ) + return false; + ++loc.line_num; + } while ( buf[0] == '#' ); + + line = buf; + return true; + } + +vector TemplateInput::SplitIntoWords(const string& line) const + { + vector words; + + for ( auto start = line.c_str(); *start && *start != '\n'; ) + { + auto end = start + 1; + while ( *end && ! isspace(*end) ) + ++end; + + words.emplace_back(string(start, end - start)); + + start = end; + while ( *start && isspace(*start) ) + ++start; + } + + return words; + } + +string TemplateInput::SkipWords(const string& line, int n) const + { + auto s = line.c_str(); + + for ( int i = 0; i < n; ++i ) + { + // Find end of current word. + while ( *s && *s != '\n' ) + { + if ( isspace(*s) ) + break; + ++s; + } + + if ( *s == '\n' ) + break; + + // Find start of next word. + while ( *s && isspace(*s) ) + ++s; + } + + return string(s); + } + +void TemplateInput::Gripe(const char* msg, const string& input) const + { + auto input_s = input.c_str(); + int n = strlen(input_s); + + fprintf(stderr, "%s, line %d: %s - %s", loc.file_name, loc.line_num, msg, input_s); + if ( n == 0 || input_s[n - 1] != '\n' ) + fprintf(stderr, "\n"); + + exit(1); + } + +void TemplateInput::Gripe(const char* msg, const InputLoc& l) const + { + fprintf(stderr, "%s, line %d: %s\n", l.file_name, l.line_num, msg); + exit(1); + } + +ZAMGen::ZAMGen(int argc, char** argv) + { + auto prog_name = argv[0]; + + if ( argc != 2 ) + { + fprintf(stderr, "usage: %s \n", prog_name); + exit(1); + } + + auto file_name = argv[1]; + auto f = strcmp(file_name, "-") ? fopen(file_name, "r") : stdin; + + if ( ! f ) + { + fprintf(stderr, "%s: cannot open \"%s\"\n", prog_name, file_name); + exit(1); + } + + ti = make_unique(f, prog_name, file_name); + + InitEmitTargets(); + + while ( ParseTemplate() ) + ; + + for ( auto& t : templates ) + t->Instantiate(); + + GenMacros(); + + CloseEmitTargets(); + } + +void ZAMGen::ReadMacro(const string& line) + { + vector mac; + mac.emplace_back(SkipWords(line, 1)); + + string s; + while ( ScanLine(s) ) + { + if ( s.size() <= 1 || ! isspace(s.c_str()[0]) ) + { + PutBack(s); + break; + } + + mac.push_back(s); + } + + macros.emplace_back(move(mac)); + } + +void ZAMGen::GenMacros() + { + for ( auto& m : macros ) + { + for ( auto i = 0U; i < m.size(); ++i ) + { + auto ms = m[i]; + if ( i == 0 ) + ms = "#define " + ms; + + if ( i < m.size() - 1 ) + ms = regex_replace(ms, regex("\n"), " \\\n"); + + Emit(EvalMacros, ms); + } + + Emit(EvalMacros, "\n"); + } + } + +string ZAMGen::GenOpCode(const ZAM_OpTemplate* ot, const string& suffix, ZAM_InstClass zc) + { + auto op = "OP_" + ot->CanonicalName() + suffix; + + static unordered_set known_opcodes; + + if ( known_opcodes.count(op) > 0 ) + // We've already done this one, don't re-define its auxiliary + // information. + return op; + + known_opcodes.insert(op); + + IndentUp(); + + // Generate the enum defining the opcode ... + Emit(OpDef, op + ","); + + // ... the "flavor" of how it treats its first operand ... + auto op_comment = ",\t// " + op; + auto op1_always_read = (zc == ZIC_FIELD || zc == ZIC_COND); + auto flavor = op1_always_read ? "OP1_READ" : ot->GetOp1Flavor(); + Emit(Op1Flavor, flavor + op_comment); + + // ... whether it has side effects ... + auto se = ot->HasSideEffects() ? "true" : "false"; + Emit(OpSideEffects, se + op_comment); + + // ... and the switch case that maps the enum to a string + // representation. + auto name = ot->BaseName(); + transform(name.begin(), name.end(), name.begin(), ::tolower); + name += suffix; + transform(name.begin(), name.end(), name.begin(), under_to_dash); + Emit(OpName, "case " + op + ":\treturn \"" + name + "\";"); + + IndentDown(); + + return op; + } + +void ZAMGen::Emit(EmitTarget et, const string& s) + { + assert(et != None); + + if ( gen_files.count(et) == 0 ) + { + fprintf(stderr, "bad generation file type\n"); + exit(1); + } + + FILE* f = gen_files[et]; + + for ( auto i = indent_level; i > 0; --i ) + fputs("\t", f); + + fputs(s.c_str(), f); + + if ( ! no_NL && (s.empty() || s.back() != '\n') ) + fputs("\n", f); + } + +void ZAMGen::InitEmitTargets() + { + // Maps an EmitTarget enum to its corresponding filename. + static const unordered_map gen_file_names = { + {None, nullptr}, + {AssignFlavor, "ZAM-AssignFlavorsDefs.h"}, + {C1Def, "ZAM-GenExprsDefsC1.h"}, + {C1FieldDef, "ZAM-GenFieldsDefsC1.h"}, + {C2Def, "ZAM-GenExprsDefsC2.h"}, + {C2FieldDef, "ZAM-GenFieldsDefsC2.h"}, + {C3Def, "ZAM-GenExprsDefsC3.h"}, + {Cond, "ZAM-Conds.h"}, + {DirectDef, "ZAM-DirectDefs.h"}, + {Eval, "ZAM-EvalDefs.h"}, + {EvalMacros, "ZAM-EvalMacros.h"}, + {MethodDecl, "ZAM-MethodDecls.h"}, + {MethodDef, "ZAM-MethodDefs.h"}, + {Op1Flavor, "ZAM-Op1FlavorsDefs.h"}, + {OpDef, "ZAM-OpsDefs.h"}, + {OpName, "ZAM-OpsNamesDefs.h"}, + {OpSideEffects, "ZAM-OpSideEffects.h"}, + {VDef, "ZAM-GenExprsDefsV.h"}, + {VFieldDef, "ZAM-GenFieldsDefsV.h"}, + {Vec1Eval, "ZAM-Vec1EvalDefs.h"}, + {Vec2Eval, "ZAM-Vec2EvalDefs.h"}, + }; + + for ( auto& gfn : gen_file_names ) + { + auto fn = gfn.second; + if ( ! fn ) + continue; + + auto f = fopen(fn, "w"); + if ( ! f ) + { + fprintf(stderr, "can't open generation file %s\n", fn); + exit(1); + } + + gen_files[gfn.first] = f; + } + + InitSwitch(C1Def, "C1 assignment"); + InitSwitch(C2Def, "C2 assignment"); + InitSwitch(C3Def, "C3 assignment"); + InitSwitch(VDef, "V assignment"); + + InitSwitch(C1FieldDef, "C1 field assignment"); + InitSwitch(C2FieldDef, "C2 field assignment"); + InitSwitch(VFieldDef, "V field assignment"); + } + +void ZAMGen::InitSwitch(EmitTarget et, string desc) + { + Emit(et, "{"); + Emit(et, "switch ( rhs->Tag() ) {"); + + switch_targets[et] = desc; + } + +void ZAMGen::CloseEmitTargets() + { + FinishSwitches(); + + for ( auto& gf : gen_files ) + fclose(gf.second); + } + +void ZAMGen::FinishSwitches() + { + for ( auto& st : switch_targets ) + { + auto et = st.first; + auto& desc = st.second; + + Emit(et, "default:"); + IndentUp(); + Emit(et, "reporter->InternalError(\"inconsistency in " + desc + + ": %s\", obj_desc(rhs).c_str());"); + IndentDown(); + Emit(et, "}"); + Emit(et, "}"); + } + } + +bool ZAMGen::ParseTemplate() + { + string line; + + if ( ! ScanLine(line) ) + return false; + + if ( line.size() <= 1 ) + // A blank line - no template to parse. + return true; + + auto words = SplitIntoWords(line); + + if ( words.size() < 2 ) + Gripe("too few words at start of template", line); + + auto op = words[0]; + + if ( op == "macro" ) + { + ReadMacro(line); + return true; + } + + auto op_name = words[1]; + + // We track issues with the wrong number of template arguments + // up front, to avoid mis-invoking constructors, but we don't + // report these until later because if the template names a + // bad operation, it's better to report that as the core problem. + const char* args_mismatch = nullptr; + + if ( op == "direct-unary-op" ) + { + if ( words.size() != 3 ) + args_mismatch = "direct-unary-op takes 2 arguments"; + } + + else if ( words.size() != 2 ) + args_mismatch = "templates take 1 argument"; + + unique_ptr t; + + if ( op == "op" ) + t = make_unique(this, op_name); + else if ( op == "unary-op" ) + t = make_unique(this, op_name); + else if ( op == "direct-unary-op" && ! args_mismatch ) + t = make_unique(this, op_name, words[2]); + else if ( op == "assign-op" ) + t = make_unique(this, op_name); + else if ( op == "expr-op" ) + t = make_unique(this, op_name); + else if ( op == "unary-expr-op" ) + t = make_unique(this, op_name); + else if ( op == "binary-expr-op" ) + t = make_unique(this, op_name); + else if ( op == "rel-expr-op" ) + t = make_unique(this, op_name); + else if ( op == "internal-binary-op" ) + t = make_unique(this, op_name); + else if ( op == "internal-op" ) + t = make_unique(this, op_name); + else if ( op == "internal-assignment-op" ) + t = make_unique(this, op_name); + + else + Gripe("bad template name", op); + + if ( args_mismatch ) + Gripe(args_mismatch, line); + + t->Build(); + templates.emplace_back(move(t)); + + return true; + } + +int main(int argc, char** argv) + { + try + { + ZAMGen zg(argc, argv); + exit(0); + } + catch ( const std::regex_error& e ) + { + fprintf(stderr, "%s: regular expression error - %s\n", argv[0], e.what()); + exit(1); + } + } diff --git a/tools/gen-zam/src/Gen-ZAM.h b/tools/gen-zam/src/Gen-ZAM.h new file mode 100644 index 0000000000..e5f2eaec4d --- /dev/null +++ b/tools/gen-zam/src/Gen-ZAM.h @@ -0,0 +1,986 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Gen-ZAM is a standalone program that takes as input a file specifying +// ZAM operations and from them generates a (large) set of C++ include +// files used to instantiate those operations as low-level ZAM instructions. +// (Those files are described in the EmitTarget enumeration below.) +// +// See Ops.in for documentation regarding the format of the ZAM templates. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +using std::string; +using std::vector; + +// An instruction can have one of four basic classes. +enum ZAM_InstClass + { + ZIC_REGULAR, // a non-complicated instruction + ZIC_COND, // a conditional branch + ZIC_VEC, // a vector operation + ZIC_FIELD, // a record field assignment + }; + +// For a given instruction operand, its general type. +enum ZAM_OperandType + { + ZAM_OT_CONSTANT, // uses the instruction's associated constant + ZAM_OT_EVENT_HANDLER, // uses the associated event handler + ZAM_OT_INT, // directly specified integer + ZAM_OT_VAR, // frame slot associated with a variable + + ZAM_OT_ASSIGN_FIELD, // record field offset to assign to + ZAM_OT_RECORD_FIELD, // record field offset to access + + // The following wind up the same in the ultimate instruction, + // but they differ in the calling sequences used to generate + // the instruction. + ZAM_OT_AUX, // uses the instruction's "aux" field + ZAM_OT_LIST, // a list, managed via the "aux" field + + ZAM_OT_NONE, // instruction has no direct operands + }; + +// For instructions corresponding to evaluating expressions, the type +// of a given operand. The generator uses these to transform the operand's +// low-level ZVal into a higher-level type expected by the associated +// evaluation code. +enum ZAM_ExprType + { + ZAM_EXPR_TYPE_ADDR, + ZAM_EXPR_TYPE_ANY, + ZAM_EXPR_TYPE_DOUBLE, + ZAM_EXPR_TYPE_FUNC, + ZAM_EXPR_TYPE_INT, + ZAM_EXPR_TYPE_PATTERN, + ZAM_EXPR_TYPE_RECORD, + ZAM_EXPR_TYPE_STRING, + ZAM_EXPR_TYPE_SUBNET, + ZAM_EXPR_TYPE_TABLE, + ZAM_EXPR_TYPE_UINT, + ZAM_EXPR_TYPE_VECTOR, + ZAM_EXPR_TYPE_FILE, + ZAM_EXPR_TYPE_OPAQUE, + ZAM_EXPR_TYPE_LIST, + ZAM_EXPR_TYPE_TYPE, + + // Used to specify "apart from the explicitly specified operand + // types, do this action for any other types". + ZAM_EXPR_TYPE_DEFAULT, + + // Used for expressions where the evaluation code for the + // expression deals directly with the operand's ZVal, rather + // than the generator providing a higher-level version. + ZAM_EXPR_TYPE_NONE, + }; + +// We only use the following in the context where the vector's elements +// are individual words from the same line. We don't use it in other +// contexts where we're tracking a bunch of strings. +using Words = vector; + +// Used for error-reporting. +struct InputLoc + { + const char* file_name; + int line_num = 0; + }; + +// An EmitTarget is a generated file to which code will be emitted. +// The different values are used to instruct the generator which target +// is currently of interest. +enum EmitTarget + { + // Indicates that no generated file has yet been specified. + None, + + // Declares/defines methods that take AST nodes and generate + // corresponding ZAM instructions. + MethodDecl, + MethodDef, + + // Switch cases for expressions that are compiled directly, using + // custom methods rather than methods produced by the generator. + DirectDef, + + // Switch cases for invoking various flavors of methods produced + // by the generator for generating ZAM instructions for AST + // expressions. C1/C2/C3 refer to the first/second/third operand + // being a constant. V refers to none of the operands being + // a constant. + C1Def, + C2Def, + C3Def, + VDef, + + // The same, but for when the expression is being assigned to + // a record field rather than a variable. There's no "C3" option + // because of how we reduce AST ternary operations. + C1FieldDef, + C2FieldDef, + VFieldDef, + + // Switch cases for compiling relational operations used in + // conditionals. + Cond, + + // Switch cases that provide the C++ code for executing specific + // individual ZAM instructions. + Eval, + + // #define's used to provide the templator's macro functionality. + EvalMacros, + + // Switch cases the provide the C++ code for executing unary + // and binary vector operations. + Vec1Eval, + Vec2Eval, + + // A set of instructions to dynamically generate maps that + // translate a generic ZAM operation (e.g., OP_LOAD_GLOBAL_VV) + // to a specific ZAM instruction, given a specific type + // (e.g., for OP_LOAD_GLOBAL_VV plus TYPE_ADDR, the map yields + // OP_LOAD_GLOBAL_VV_A). + AssignFlavor, + + // A list of values, one per ZAM instruction, that indicate whether + // that instruction writes to its first operand (the most common + // case), reads the operand but doesn't write to it, both reads it + // and writes to it, or none of these apply because the first + // operand isn't a frame variable. See the ZAMOp1Flavor enum + // defined in ZOp.h. + Op1Flavor, + + // A list of boolean values, one per ZAM instruction, that indicate + // whether the instruction has side effects, and thus should not + // be deleted even if its associated assignment is to a dead value + // (one not subsequently used). + OpSideEffects, + + // A list of names enumerating each ZAM instruction. These + // are ZAM opcodes. + OpDef, + + // A list of cases, indexed by ZAM opcode, that return a + // human-readable string of naming the opcode, for use in debugging + // output. For example, for OP_NEGATE_VV_I the corresponding + // string is "negate-VV-I". + OpName, + }; + +// A helper class for managing the (ordered) collection of ZAM_OperandType's +// associated with an instruction in order to generate C++ calling sequences +// (both parameters for declarations, and arguments for invocations). +class ArgsManager + { +public: + // Constructed by providing the various ZAM_OperandType's along + // with the instruction's class. + ArgsManager(const vector& ot, ZAM_InstClass ic); + + // Returns a string defining the parameters for a declaration; + // these have full C++ type information along with the parameter + // name. + string Decls() const { return full_decl; } + + // Returns a string for passing the parameters in a function + // call. This is a comma-separated list of the parameter names, + // with no associated C++ types. + string Params() const { return full_params; } + + // Returns the name of the given parameter, indexed starting with 0. + const string& NthParam(int n) const { return params[n]; } + +private: + // Makes sure that each parameter has a unique name. For any + // parameter 'x' that occurs more than once, renames the instances + // "x1", "x2", etc. + void Differentiate(); + + // Maps ZAM_OperandType's to their associated C++ type and + // canonical parameter name. + static std::unordered_map> ot_to_args; + + // For a single argument/parameter, tracks its declaration name, + // C++ type, and the name to use when providing it as a parameter. + // These last two names are potentially distinct when we're + // assigning to record field (which is tracked by the is_field + // member variable), hence the need to track both. + struct Arg + { + string decl_name; + string decl_type; + string param_name; + bool is_field; + }; + + // All of the argument/parameters associated with the collection + // of ZAM_OperandType's. + vector args; + + // Each of the individual parameters. + vector params; + + // See Decls() and Params() above. + string full_decl; + string full_params; + }; + +// There are two mutually interacting classes: ZAMGen is the overall +// driver for the ZAM generator, while ZAM_OpTemplate represents a +// single operation template, with subclasses for specific types of +// operations. +class ZAMGen; + +class ZAM_OpTemplate + { +public: + // Instantiated by passing in the ZAMGen driver and the generic + // name for the operation. + ZAM_OpTemplate(ZAMGen* _g, string _base_name); + virtual ~ZAM_OpTemplate() { } + + // Constructs the template's data structures by parsing its + // description (beyond the initial description of the type of + // operation). + void Build(); + + // Tells the object to generate the code/files necessary for + // each of its underlying instructions. + virtual void Instantiate(); + + // Returns the generic name for the operation. + const string& BaseName() const { return base_name; } + + // Returns the canonical name for the operation. This is a + // version of the name that, for expression-based operations, + // can be concatenated with "EXPR_" to get the name of the + // corresponding AST node. + const string& CanonicalName() const { return cname; } + + // Returns a string version of the ZAMOp1Flavor associated + // with this operation. + const string& GetOp1Flavor() const { return op1_flavor; } + + // True if this is an operation with side effects (see OpSideEffects + // above). + bool HasSideEffects() const { return has_side_effects; } + +protected: + // Append to the list of operand types associated with this operation. + void AddOpType(ZAM_OperandType ot) { op_types.push_back(ot); } + // Retrieve the list of operand types associated with this operation. + const vector& OperandTypes() const { return op_types; } + + // Specify the ZAMOp1Flavor associated with this operation. See + // GetOp1Flavor() above for the corresponding accessor. + void SetOp1Flavor(string fl) { op1_flavor = fl; } + + // Specify/fetch the parameter (operand) from which to take the + // primary type of this operation. + void SetTypeParam(int param) { type_param = param; } + int GetTypeParam() const { return type_param; } + + // Specify/fetch the parameter (operand) from which to take the + // secondary type of this operation. + void SetType2Param(int param) { type2_param = param; } + int GetType2Param() const { return type2_param; } + + // Tracking of assignment values (C++ variables that hold the + // value that should be assigned to usual frame slot). + void SetAssignVal(string _av) { av = _av; } + bool HasAssignVal() const { return ! av.empty(); } + const string& GetAssignVal() const { return av; } + + // Management of C++ evaluation blocks. These are built up + // line-by-line. + void AddEval(string line) { eval += line; } + bool HasEval() const { return ! eval.empty(); } + const string& GetEval() const { return eval; } + + // Management of custom methods to be used rather than generating + // a method. + void SetCustomMethod(string cm) { custom_method = SkipWS(cm); } + bool HasCustomMethod() const { return ! custom_method.empty(); } + const string& GetCustomMethod() const { return custom_method; } + + // Management of code to execute at the end of a generated method. + void SetPostMethod(string cm) { post_method = SkipWS(cm); } + bool HasPostMethod() const { return ! post_method.empty(); } + const string& GetPostMethod() const { return post_method; } + + // Predicates indicating whether a subclass supports a given + // property. These are whether the operation: (1) should include + // a version that assigns to a record field as well as the normal + // assigning to a frame slot, (2) is a conditional branch, (3) does + // not have a corresponding AST node, (4) is a direct assignment + // (not an assignment to an expression), (5) is a direct assignment + // to a record field. + virtual bool IncludesFieldOp() const { return false; } + virtual bool IsConditionalOp() const { return false; } + virtual bool IsInternalOp() const { return false; } + virtual bool IsAssignOp() const { return false; } + virtual bool IsFieldOp() const { return false; } + + // Whether this operation does not have any C++ evaluation associated + // with it. Used for custom methods that compile into internal + // ZAM operations. + bool NoEval() const { return no_eval; } + void SetNoEval() { no_eval = true; } + + // Whether this operation does not have a version where one of + // its operands is a constant. + bool NoConst() const { return no_const; } + void SetNoConst() { no_const = true; } + + // Whether this operation also has a vectorized form. + bool IncludesVectorOp() const { return includes_vector_op; } + void SetIncludesVectorOp() { includes_vector_op = true; } + + // Whether this operation has side effects, and thus should + // not be elided even if its result is used in a dead assignment. + void SetHasSideEffects() { has_side_effects = true; } + + // An "assignment-less" operation is one that, if its result + // is used in a dead assignment, should be converted to a different + // operation that explictly omits any assignment. + bool HasAssignmentLess() const { return ! assignment_less_op.empty(); } + void SetAssignmentLess(string op, string op_type) + { + assignment_less_op = op; + assignment_less_op_type = op_type; + } + const string& AssignmentLessOp() const { return assignment_less_op; } + const string& AssignmentLessOpType() const { return assignment_less_op_type; } + + // Builds the instructions associated with this operation, assuming + // a single operand. + void UnaryInstantiate(); + + // Parses the next line in an operation template. "attr" is + // the first word on the line, which often specifies the + // attribute specified by the line. "line" is the entire line, + // for parsing when that's necessary, and for error reporting. + // "words" is "line" split into a vector of whitespace-delimited + // words. + virtual void Parse(const string& attr, const string& line, const Words& words); + + // Scans in a C++ evaluation block, which continues until encountering + // a line that does not start with whitespace, or that's empty. + string GatherEval(); + + // Parses a $-specifier of which operand to use to associate + // a Zeek scripting type with ZAM instructions. + int ExtractTypeParam(const string& arg); + + // Generates instructions for each of the different flavors of the + // given operation. "ot" specifies the types of operands for the + // instruction, and "do_vec" whether to generate a vector version. + void InstantiateOp(const vector& ot, bool do_vec); + + // Generates one specific flavor ("zc") of the given operation, + // using a method named 'm', the given operand types, and the class. + void InstantiateOp(const string& m, const vector& ot, ZAM_InstClass zc); + + // Generates the "assignment-less" version of the given op-code. + void GenAssignmentlessVersion(string op); + + // Generates the method 'm' for an operation, where "suffix" is + // a (potentially empty) string differentiating the method from + // others for that operation, and "ot" and "zc" are the same + // as above. + void InstantiateMethod(const string& m, const string& suffix, const vector& ot, + ZAM_InstClass zc); + + // Generates the main logic of an operation's method, given the + // specific operand types, an associated suffix for differentiating + // ZAM instructions, and the instruction class. + void InstantiateMethodCore(const vector& ot, string suffix, ZAM_InstClass zc); + + // Generates the specific code to create a ZInst for the given + // operation, operands, parameters to "GenInst", and suffix and + // class per the above. + virtual void BuildInstruction(const vector& ot, const string& params, + const string& suffix, ZAM_InstClass zc); + + // Top-level driver for generating the C++ evaluation code for + // a given flavor of operation. + virtual void InstantiateEval(const vector& ot, const string& suffix, + ZAM_InstClass zc); + + // Generates the C++ case statement for evaluating the given flavor + // of operation. + void InstantiateEval(EmitTarget et, const string& op_suffix, const string& eval, + ZAM_InstClass zc); + + // Generates a set of assignment C++ evaluations, one per each + // possible Zeek scripting type of operand. + void InstantiateAssignOp(const vector& ot, const string& suffix); + + // Generates a C++ evaluation for an assignment of the type + // corresponding to "accessor". If "is_managed" is true then + // generates the associated memory management, too. + void GenAssignOpCore(const vector& ot, const string& eval, + const string& accessor, bool is_managed); + + // The same, but for when there's an explicit assignment value. + void GenAssignOpValCore(const string& eval, const string& accessor, bool is_managed); + + // Returns the name of the method associated with the particular + // list of operand types. + string MethodName(const vector& ot) const; + + // Returns the parameter declarations to use in declaring a method. + string MethodDeclare(const vector& ot, ZAM_InstClass zc); + + // Returns a suffix that differentiates an operation name for + // a specific list of operand types. + string OpSuffix(const vector& ot) const; + + // Returns a copy of the given string with leading whitespace + // removed. + string SkipWS(const string& s) const; + + // Set the target to use for subsequent code emission. + void EmitTo(EmitTarget et) { curr_et = et; } + + // Emit the given string to the currently selected EmitTarget. + void Emit(const string& s); + + // Same, but temporarily indented up. + void EmitUp(const string& s) + { + IndentUp(); + Emit(s); + IndentDown(); + } + + // Same, but reframe from inserting a newline. + void EmitNoNL(const string& s); + + // Emit a newline. Implementation doesn't actually include a + // newline since that's implicit in a call to Emit(). + void NL() { Emit(""); } + + // Increase/decrease the indentation level, with the last two + // being used for brace-delimited code blocks. + void IndentUp(); + void IndentDown(); + void BeginBlock() + { + IndentUp(); + Emit("{"); + } + void EndBlock() + { + Emit("}"); + IndentDown(); + } + + // Maps an operand type to a character mnemonic used to distinguish + // it from others. + static std::unordered_map ot_to_char; + + // The associated driver object. + ZAMGen* g; + + // See BaseName() and CanonicalName() above. + string base_name; + string cname; + + // Tracks the beginning of this operation template's definition, + // for error reporting. + InputLoc op_loc; + + // The current emission target. + EmitTarget curr_et = None; + + // The operand types for operations that have a single fixed list. + // Some operations (like those evaluating expressions) instead have + // dynamically generated range of possible operand types. + vector op_types; + + // See the description of Op1Flavor above. + string op1_flavor = "OP1_WRITE"; + + // Tracks the result of ExtractTypeParam() used for "type" and + // "type2" attributes. + int type_param = 0; // 0 = not set + int type2_param = 0; + + // If non-empty, the value to assign to the target in an assignment + // operation. + string av; + + // The C++ evaluation; may span multiple lines. + string eval; + + // Any associated custom method. + string custom_method; + + // Any associated additional code to add at the end of a + // generated method. + string post_method; + + // If true, then this operation does not have C++ evaluation + // associated with it. + bool no_eval = false; + + // If true, then this operation should not include a version + // supporting operands of constant type. + bool no_const = false; + + // If true, then this operation includes a vectorized version. + bool includes_vector_op = false; + + // If true, then this operation has side effects. + bool has_side_effects = false; + + // If non-empty, then specifies the associated operation that + // is a version of this operation but without assigning the result; + // and the operand type (like "OP_V") of that associated operation. + string assignment_less_op; + string assignment_less_op_type; + }; + +// A subclass used for "unary-op" templates. +class ZAM_UnaryOpTemplate : public ZAM_OpTemplate + { +public: + ZAM_UnaryOpTemplate(ZAMGen* _g, string _base_name) : ZAM_OpTemplate(_g, _base_name) { } + +protected: + void Instantiate() override; + }; + +// A subclass for unary operations that are directly instantiated using +// custom methods. +class ZAM_DirectUnaryOpTemplate : public ZAM_OpTemplate + { +public: + ZAM_DirectUnaryOpTemplate(ZAMGen* _g, string _base_name, string _direct) + : ZAM_OpTemplate(_g, _base_name), direct(_direct) + { + } + +protected: + void Instantiate() override; + +private: + // The ZAMCompiler method to call to compile the operation. + string direct; + }; + +// A helper class for the ZAM_ExprOpTemplate class (which follows). +// This class tracks a single instance of creating an evaluation for +// an AST expression. +class EvalInstance + { +public: + // Initialized using the types of the LHS (result) and the + // first and second operand. Often all three types are the + // same, but they can differ for some particular expressions, + // and for relationals. "eval" provides the C++ evaluation code. + // "is_def" is true if this instance is for the default catch-all + // where the operand types don't match any of the explicitly + // specified evaluations; + EvalInstance(ZAM_ExprType lhs_et, ZAM_ExprType op1_et, ZAM_ExprType op2_et, string eval, + bool is_def); + + // Returns the accessor to use for assigning to the LHS. "is_ptr" + // indicates whether the value to which we're applying the + // accessor is a pointer, rather than a ZVal. + string LHSAccessor(bool is_ptr = false) const; + + // Same but for access to the first or second operand. + string Op1Accessor(bool is_ptr = false) const { return Accessor(op1_et, is_ptr); } + string Op2Accessor(bool is_ptr = false) const { return Accessor(op2_et, is_ptr); } + + // Provides an accessor for an operand of the given type. + string Accessor(ZAM_ExprType et, bool is_ptr = false) const; + + // Returns the "marker" use to make unique the opcode for this + // flavor of expression-evaluation instruction. + string OpMarker() const; + + const string& Eval() const { return eval; } + ZAM_ExprType LHS_ET() const { return lhs_et; } + bool IsDefault() const { return is_def; } + +private: + ZAM_ExprType lhs_et; + ZAM_ExprType op1_et; + ZAM_ExprType op2_et; + string eval; + bool is_def; + }; + +// A subclass for AST "Expr" nodes in reduced form. +class ZAM_ExprOpTemplate : public ZAM_OpTemplate + { +public: + ZAM_ExprOpTemplate(ZAMGen* _g, string _base_name); + + // The number of operands the operation takes (not including its + // assignment target). A value of 0 is used for expressions that + // require special handling. + virtual int Arity() const { return 0; } + + int HasExplicitResultType() const { return explicit_res_type; } + void SetHasExplicitResultType() { explicit_res_type = true; } + + void AddExprType(ZAM_ExprType et) { expr_types.insert(et); } + const std::unordered_set& ExprTypes() const { return expr_types; } + + void AddEvalSet(ZAM_ExprType et, string ev) { eval_set[et] += ev; } + void AddEvalSet(ZAM_ExprType et1, ZAM_ExprType et2, string ev) + { + eval_mixed_set[et1][et2] += ev; + } + + bool IncludesFieldOp() const override { return includes_field_op; } + void SetIncludesFieldOp() { includes_field_op = true; } + + bool HasPreEval() const { return ! pre_eval.empty(); } + void SetPreEval(string pe) { pre_eval = SkipWS(pe); } + const string& GetPreEval() const { return pre_eval; } + +protected: + // Returns a regular expression used to access the value of the + // expression suitable for assignment in a loop across the elements + // of a Zeek "vector" type. "have_target" is true if the template + // has an explicit "$$" assignment target. + virtual const char* VecEvalRE(bool have_target) const + { + return have_target ? "$$$$ = ZVal($1)" : "ZVal($&)"; + } + + void Parse(const string& attr, const string& line, const Words& words) override; + void Instantiate() override; + + // Instantiates versions of the operation that have a constant + // as the first, second, or third operand ... + void InstantiateC1(const vector& ots, int arity, bool do_vec = false); + void InstantiateC2(const vector& ots, int arity); + void InstantiateC3(const vector& ots); + + // ... or if all of the operands are non-constant. + void InstantiateV(const vector& ots); + + // Generates code that instantiates either the vectorized version + // of an operation, or the non-vector one, depending on whether + // the RHS of the reduced expression/assignment is a vector. + void DoVectorCase(const string& m, const string& args); + + // Iterates over the different Zeek types specified for an expression's + // operands and generates instructions for each. + void BuildInstructionCore(const string& params, const string& suffix, ZAM_InstClass zc); + + // Generates an if-else cascade element that matches one of the + // specific Zeek types associated with the instruction. + void GenMethodTest(ZAM_ExprType et1, ZAM_ExprType et2, const string& params, + const string& suffix, bool do_else, ZAM_InstClass zc); + + void InstantiateEval(const vector& ot, const string& suffix, + ZAM_InstClass zc) override; + +private: + // The Zeek types that can appear as operands for the expression. + std::unordered_set expr_types; + + // The C++ evaluation template for a given operand type. + std::unordered_map eval_set; + + // Some expressions take two operands of different types. This + // holds their C++ evaluation template. + std::unordered_map> eval_mixed_set; + + // Whether this expression's operand is a field access (and thus + // needs both the record as an operand and an additional constant + // offset into the record to get to the field). + bool includes_field_op = false; + + // If non-zero, code to generate prior to evaluating the expression. + string pre_eval; + + // If true, then the evaluations will take care of ensuring + // proper result types when assigning to $$. + bool explicit_res_type = false; + }; + +// A version of ZAM_ExprOpTemplate for unary expressions. +class ZAM_UnaryExprOpTemplate : public ZAM_ExprOpTemplate + { +public: + ZAM_UnaryExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_ExprOpTemplate(_g, _base_name) { } + + bool IncludesFieldOp() const override { return ExprTypes().count(ZAM_EXPR_TYPE_NONE) == 0; } + + int Arity() const override { return 1; } + +protected: + void Parse(const string& attr, const string& line, const Words& words) override; + void Instantiate() override; + + void BuildInstruction(const vector& ot, const string& params, + const string& suffix, ZAM_InstClass zc) override; + }; + +// A version of ZAM_UnaryExprOpTemplate where the point of the expression +// is to capture a direct assignment operation. +class ZAM_AssignOpTemplate : public ZAM_UnaryExprOpTemplate + { +public: + ZAM_AssignOpTemplate(ZAMGen* _g, string _base_name); + + bool IsAssignOp() const override { return true; } + bool IncludesFieldOp() const override { return false; } + bool IsFieldOp() const override { return field_op; } + void SetFieldOp() { field_op = true; } + +protected: + void Parse(const string& attr, const string& line, const Words& words) override; + void Instantiate() override; + +private: + bool field_op = false; + }; + +// A version of ZAM_ExprOpTemplate for binary expressions. +class ZAM_BinaryExprOpTemplate : public ZAM_ExprOpTemplate + { +public: + ZAM_BinaryExprOpTemplate(ZAMGen* _g, string _base_name) : ZAM_ExprOpTemplate(_g, _base_name) { } + + bool IncludesFieldOp() const override { return true; } + + int Arity() const override { return 2; } + +protected: + void Instantiate() override; + + void BuildInstruction(const vector& ot, const string& params, + const string& suffix, ZAM_InstClass zc) override; + }; + +// A version of ZAM_BinaryExprOpTemplate for relationals. +class ZAM_RelationalExprOpTemplate : public ZAM_BinaryExprOpTemplate + { +public: + ZAM_RelationalExprOpTemplate(ZAMGen* _g, string _base_name) + : ZAM_BinaryExprOpTemplate(_g, _base_name) + { + } + + bool IncludesFieldOp() const override { return false; } + bool IsConditionalOp() const override { return true; } + +protected: + const char* VecEvalRE(bool have_target) const override + { + if ( have_target ) + return "$$$$ = ZVal(bro_int_t($1))"; + else + return "ZVal(bro_int_t($&))"; + } + + void Instantiate() override; + + void BuildInstruction(const vector& ot, const string& params, + const string& suffix, ZAM_InstClass zc) override; + }; + +// A version of ZAM_BinaryExprOpTemplate for binary operations generated +// by custom methods rather than directly from the AST. +class ZAM_InternalBinaryOpTemplate : public ZAM_BinaryExprOpTemplate + { +public: + ZAM_InternalBinaryOpTemplate(ZAMGen* _g, string _base_name) + : ZAM_BinaryExprOpTemplate(_g, _base_name) + { + } + + bool IsInternalOp() const override { return true; } + + // The accessors used to get to the underlying Zeek script value + // of the first and second operand. + void SetOp1Accessor(string accessor) { op1_accessor = accessor; } + void SetOp2Accessor(string accessor) { op2_accessor = accessor; } + void SetOpAccessor(string accessor) + { + SetOp1Accessor(accessor); + SetOp2Accessor(accessor); + } + +protected: + void Parse(const string& attr, const string& line, const Words& words) override; + + void InstantiateEval(const vector& ot, const string& suffix, + ZAM_InstClass zc) override; + +private: + string op1_accessor; + string op2_accessor; + }; + +// A version of ZAM_OpTemplate for operations used internally (and not +// corresponding to AST elements). +class ZAM_InternalOpTemplate : public ZAM_OpTemplate + { +public: + ZAM_InternalOpTemplate(ZAMGen* _g, string _base_name) : ZAM_OpTemplate(_g, _base_name) { } + + bool IsInternalOp() const override { return true; } + +protected: + void Parse(const string& attr, const string& line, const Words& words) override; + +private: + // True if the internal operation corresponds to an indirect call, + // i.e., one through a variable rather than one directly specified. + bool is_indirect_call = false; + }; + +// An internal operation that assigns a result to a frame element. +class ZAM_InternalAssignOpTemplate : public ZAM_InternalOpTemplate + { +public: + ZAM_InternalAssignOpTemplate(ZAMGen* _g, string _base_name) + : ZAM_InternalOpTemplate(_g, _base_name) + { + } + + bool IsAssignOp() const override { return true; } + }; + +// Helper classes for managing input from the template file, including +// low-level scanning. + +class TemplateInput + { +public: + // Program name and file name are for generating error messages. + TemplateInput(FILE* _f, const char* _prog_name, const char* _file_name) + : f(_f), prog_name(_prog_name) + { + loc.file_name = _file_name; + } + + const InputLoc& CurrLoc() const { return loc; } + + // Fetch the next line of input, including trailing newline. + // Returns true on success, false on EOF or error. Skips over + // comments. + bool ScanLine(string& line); + + // Takes a line and splits it into white-space delimited words, + // returned in a vector. Removes trailing whitespace. + Words SplitIntoWords(const string& line) const; + + // Returns the line with the given number of initial words skipped. + string SkipWords(const string& line, int n) const; + + // Puts back the given line so that the next call to ScanLine will + // return it. Does not nest. + void PutBack(const string& line) { put_back = line; } + + // Report an error and exit. + [[noreturn]] void Gripe(const char* msg, const string& input) const; + [[noreturn]] void Gripe(const char* msg, const InputLoc& loc) const; + +private: + string put_back; // if non-empty, use this for the next ScanLine + + FILE* f; + const char* prog_name; + InputLoc loc; + }; + +// Driver class for the ZAM instruction generator. + +class ZAMGen + { +public: + ZAMGen(int argc, char** argv); + + // Reads in and records a macro definition, which ends upon + // encountering a blank line or a line that does not begin + // with whitespace. + void ReadMacro(const string& line); + + // Emits C++ #define's to implement the recorded macros. + void GenMacros(); + + // Generates a ZAM op-code for the given template, suffix, and + // instruction class. Also creates auxiliary information associated + // with the instruction. + string GenOpCode(const ZAM_OpTemplate* ot, const string& suffix, + ZAM_InstClass zc = ZIC_REGULAR); + + // These methods provide low-level parsing (and error-reporting) + // access to ZAM_OpTemplate objects. + const InputLoc& CurrLoc() const { return ti->CurrLoc(); } + bool ScanLine(string& line) { return ti->ScanLine(line); } + Words SplitIntoWords(const string& line) const { return ti->SplitIntoWords(line); } + string SkipWords(const string& line, int n) const { return ti->SkipWords(line, n); } + void PutBack(const string& line) { ti->PutBack(line); } + + // Methods made public to ZAM_OpTemplate objects for emitting code. + void Emit(EmitTarget et, const string& s); + + void IndentUp() { ++indent_level; } + void IndentDown() { --indent_level; } + void SetNoNL(bool _no_NL) { no_NL = _no_NL; } + + [[noreturn]] void Gripe(const char* msg, const string& input) const { ti->Gripe(msg, input); } + [[noreturn]] void Gripe(const char* msg, const InputLoc& loc) const { ti->Gripe(msg, loc); } + +private: + // Opens all of the code generation targets, and creates prologs + // for those requiring them (such as for embedding into switch + // statements). + void InitEmitTargets(); + void InitSwitch(EmitTarget et, string desc); + + // Closes all of the code generation targets, and creates epilogs + // for those requiring them. + void CloseEmitTargets(); + void FinishSwitches(); + + // Parses a single template, returning true on success and false + // if we've reached the end of the input. (Errors during parsing + // result instead in exiting.) + bool ParseTemplate(); + + // Maps code generation targets with their corresponding FILE*. + std::unordered_map gen_files; + + // Maps code generation targets to strings used to describe any + // associated switch (for error reporting). + std::unordered_map switch_targets; + + // The low-level TemplateInput object used to manage input. + std::unique_ptr ti; + + // Tracks all of the templates created so far. + vector> templates; + + // Tracks the macros recorded so far. + vector> macros; + + // Current indentation level. Maintained globally rather than + // per EmitTarget, so the caller needs to ensure it is managed + // consistently. + int indent_level = 0; + + // If true, refrain from appending a newline to any emitted lines. + bool no_NL = false; + };