the main ZAM code

This commit is contained in:
Vern Paxson 2021-08-16 13:01:05 -07:00
parent ddac6e5f67
commit 906d1fc1f2
22 changed files with 8081 additions and 0 deletions

View file

@ -250,6 +250,36 @@ set(_gen_zeek_script_cpp ${CMAKE_CURRENT_BINARY_DIR}/../CPP-gen.cc)
add_custom_command(OUTPUT ${_gen_zeek_script_cpp}
COMMAND ${CMAKE_COMMAND} -E touch ${_gen_zeek_script_cpp})
# define a command that's used to run the ZAM instruction generator;
# building the zeek binary depends on the outputs of this script
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ZAM-AssignFlavorsDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-Conds.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-DirectDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-EvalDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-EvalMacros.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC1.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC2.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsC3.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenExprsDefsV.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsC1.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsC2.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-GenFieldsDefsV.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDecls.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-Op1FlavorsDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpSideEffects.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpsDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-OpsNamesDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-Vec1EvalDefs.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-Vec2EvalDefs.h
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/Gen-ZAM
ARGS ${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/Ops.in
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Gen-ZAM
${CMAKE_CURRENT_SOURCE_DIR}/script_opt/ZAM/Ops.in
COMMENT "[sh] Generating ZAM operations"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
set_source_files_properties(nb_dns.c PROPERTIES COMPILE_FLAGS
-fno-strict-aliasing)
@ -396,6 +426,20 @@ set(MAIN_SRCS
script_opt/TempVar.cc
script_opt/UseDefs.cc
script_opt/ZAM/AM-Opt.cc
script_opt/ZAM/Branches.cc
script_opt/ZAM/BuiltIn.cc
script_opt/ZAM/Driver.cc
script_opt/ZAM/Expr.cc
script_opt/ZAM/Inst-Gen.cc
script_opt/ZAM/Low-Level.cc
script_opt/ZAM/Stmt.cc
script_opt/ZAM/Support.cc
script_opt/ZAM/Vars.cc
script_opt/ZAM/ZBody.cc
script_opt/ZAM/ZInst.cc
script_opt/ZAM/ZOp.cc
nb_dns.c
digest.h
)
@ -404,6 +448,10 @@ set(THIRD_PARTY_SRCS
3rdparty/sqlite3.c
)
set(GEN_ZAM_SRCS
script_opt/ZAM/Gen-ZAM.cc
)
# Highwayhash. Highwayhash is a bit special since it has architecture dependent code...
set(HH_SRCS
@ -470,12 +518,14 @@ set(zeek_SRCS
${FLEX_Scanner_INPUT}
${BISON_Parser_INPUT}
${CMAKE_CURRENT_BINARY_DIR}/DebugCmdConstants.h
${CMAKE_CURRENT_BINARY_DIR}/ZAM-MethodDecls.h
${THIRD_PARTY_SRCS}
${HH_SRCS}
${MAIN_SRCS}
)
collect_headers(zeek_HEADERS ${zeek_SRCS})
collect_headers(GEN_ZAM_HEADERS ${GEN_ZAM_SRCS})
add_library(zeek_objs OBJECT ${zeek_SRCS})
@ -491,6 +541,8 @@ set_target_properties(zeek PROPERTIES ENABLE_EXPORTS TRUE)
install(TARGETS zeek DESTINATION bin)
add_executable(Gen-ZAM ${GEN_ZAM_SRCS} ${GEN_ZAM_HEADERS})
# Install wrapper script for Bro-to-Zeek renaming.
include(InstallSymlink)
InstallSymlink("${CMAKE_INSTALL_PREFIX}/bin/zeek-wrapper" "${CMAKE_INSTALL_PREFIX}/bin/bro")

1070
src/script_opt/ZAM/AM-Opt.cc Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,174 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Methods for dealing with ZAM branches.
#include "zeek/Reporter.h"
#include "zeek/Desc.h"
#include "zeek/script_opt/ZAM/Compile.h"
namespace zeek::detail {
void ZAMCompiler::PushGoTos(GoToSets& gotos)
{
std::vector<ZAMStmt> vi;
gotos.push_back(vi);
}
void ZAMCompiler::ResolveGoTos(GoToSets& gotos, const InstLabel l)
{
auto& g = gotos.back();
for ( auto i = 0U; i < g.size(); ++i )
SetGoTo(g[i], l);
gotos.pop_back();
}
ZAMStmt ZAMCompiler::GenGoTo(GoToSet& v)
{
auto g = GoToStub();
v.push_back(g.stmt_num);
return g;
}
ZAMStmt ZAMCompiler::GoToStub()
{
ZInstI z(OP_GOTO_V, 0);
z.op_type = OP_V_I1;
return AddInst(z);
}
ZAMStmt ZAMCompiler::GoTo(const InstLabel l)
{
ZInstI inst(OP_GOTO_V, 0);
inst.target = l;
inst.target_slot = 1;
inst.op_type = OP_V_I1;
return AddInst(inst);
}
InstLabel ZAMCompiler::GoToTarget(const ZAMStmt s)
{
return insts1[s.stmt_num];
}
InstLabel ZAMCompiler::GoToTargetBeyond(const ZAMStmt s)
{
int n = s.stmt_num;
if ( n == int(insts1.size()) - 1 )
{
if ( ! pending_inst )
pending_inst = new ZInstI();
return pending_inst;
}
return insts1[n+1];
}
void ZAMCompiler::SetTarget(ZInstI* inst, const InstLabel l, int slot)
{
inst->target = l;
inst->target_slot = slot;
}
ZInstI* ZAMCompiler::FindLiveTarget(ZInstI* goto_target)
{
if ( goto_target == pending_inst )
return goto_target;
int idx = goto_target->inst_num;
ASSERT(idx >= 0 && idx <= insts1.size());
while ( idx < int(insts1.size()) && ! insts1[idx]->live )
++idx;
if ( idx == int(insts1.size()) )
return pending_inst;
else
return insts1[idx];
}
void ZAMCompiler::ConcretizeBranch(ZInstI* inst, ZInstI* target,
int target_slot)
{
int t; // instruction number of target
if ( target == pending_inst )
{
if ( insts2.size() == 0 )
// We're doing this in the context of concretizing
// intermediary instructions for dumping them out.
t = insts1.size();
else
t = insts2.size();
}
else
t = target->inst_num;
switch ( target_slot ) {
case 1: inst->v1 = t; break;
case 2: inst->v2 = t; break;
case 3: inst->v3 = t; break;
case 4: inst->v4 = t; break;
default:
reporter->InternalError("bad GoTo target");
}
}
void ZAMCompiler::SetV1(ZAMStmt s, const InstLabel l)
{
auto inst = insts1[s.stmt_num];
SetTarget(inst, l, 1);
ASSERT(inst->op_type == OP_V || inst->op_type == OP_V_I1);
inst->op_type = OP_V_I1;
}
void ZAMCompiler::SetV2(ZAMStmt s, const InstLabel l)
{
auto inst = insts1[s.stmt_num];
SetTarget(inst, l, 2);
auto& ot = inst->op_type;
if ( ot == OP_VV )
ot = OP_VV_I2;
else if ( ot == OP_VC || ot == OP_VVC )
ot = OP_VVC_I2;
else
ASSERT(ot == OP_VV_I2 || ot == OP_VV_I1_I2 || ot == OP_VVC_I2);
}
void ZAMCompiler::SetV3(ZAMStmt s, const InstLabel l)
{
auto inst = insts1[s.stmt_num];
SetTarget(inst, l, 3);
auto ot = inst->op_type;
if ( ot == OP_VVV_I2_I3 || ot == OP_VVVC_I3 )
return;
ASSERT(ot == OP_VV || ot == OP_VVV || ot == OP_VVV_I3);
inst->op_type = OP_VVV_I3;
}
void ZAMCompiler::SetV4(ZAMStmt s, const InstLabel l)
{
auto inst = insts1[s.stmt_num];
SetTarget(inst, l, 4);
auto ot = inst->op_type;
ASSERT(ot == OP_VVVV || ot == OP_VVVV_I4);
if ( ot != OP_VVVV_I4 )
inst->op_type = OP_VVVV_I4;
}
} // zeek::detail

View file

@ -0,0 +1,447 @@
// See the file "COPYING" in the main distribution directory for copyright.
// ZAM methods associated with instructions that replace calls to
// built-in functions.
#include "zeek/Func.h"
#include "zeek/Reporter.h"
#include "zeek/script_opt/ZAM/Compile.h"
namespace zeek::detail {
bool ZAMCompiler::IsZAM_BuiltIn(const Expr* e)
{
// The expression e is either directly a call (in which case there's
// no return value), or an assignment to a call.
const CallExpr* c;
if ( e->Tag() == EXPR_CALL )
c = e->AsCallExpr();
else
c = e->GetOp2()->AsCallExpr();
auto func_expr = c->Func();
if ( func_expr->Tag() != EXPR_NAME )
// An indirect call.
return false;
auto func_val = func_expr->AsNameExpr()->Id()->GetVal();
if ( ! func_val )
// A call to a function that hasn't been defined.
return false;
auto func = func_val->AsFunc();
if ( func->GetKind() != BuiltinFunc::BUILTIN_FUNC )
return false;
auto& args = c->Args()->Exprs();
const NameExpr* n = nullptr; // name to assign to, if any
if ( e->Tag() != EXPR_CALL )
n = e->GetOp1()->AsRefExpr()->GetOp1()->AsNameExpr();
using GenBuiltIn = bool (ZAMCompiler::*)(const NameExpr* n,
const ExprPList& args);
static std::vector<std::pair<const char*, GenBuiltIn>> builtins = {
{ "Analyzer::__name", &ZAMCompiler::BuiltIn_Analyzer__name },
{ "Broker::__flush_logs",
&ZAMCompiler::BuiltIn_Broker__flush_logs },
{ "Files::__enable_reassembly",
&ZAMCompiler::BuiltIn_Files__enable_reassembly },
{ "Files::__set_reassembly_buffer",
&ZAMCompiler::BuiltIn_Files__set_reassembly_buffer },
{ "Log::__write", &ZAMCompiler::BuiltIn_Log__write },
{ "current_time", &ZAMCompiler::BuiltIn_current_time },
{ "get_port_transport_proto",
&ZAMCompiler::BuiltIn_get_port_etc },
{ "network_time", &ZAMCompiler::BuiltIn_network_time },
{ "reading_live_traffic",
&ZAMCompiler::BuiltIn_reading_live_traffic },
{ "reading_traces", &ZAMCompiler::BuiltIn_reading_traces },
{ "strstr", &ZAMCompiler::BuiltIn_strstr },
{ "sub_bytes", &ZAMCompiler::BuiltIn_sub_bytes },
{ "to_lower", &ZAMCompiler::BuiltIn_to_lower },
};
for ( auto& b : builtins )
if ( util::streq(func->Name(), b.first) )
return (this->*(b.second))(n ,args);
return false;
}
bool ZAMCompiler::BuiltIn_Analyzer__name(const NameExpr* n,
const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
if ( args[0]->Tag() == EXPR_CONST )
// Doesn't seem worth developing a variant for this weird
// usage cast.
return false;
int nslot = Frame1Slot(n, OP1_WRITE);
auto arg_t = args[0]->AsNameExpr();
auto z = ZInstI(OP_ANALYZER__NAME_VV, nslot, FrameSlot(arg_t));
z.SetType(args[0]->GetType());
AddInst(z);
return true;
}
bool ZAMCompiler::BuiltIn_Broker__flush_logs(const NameExpr* n,
const ExprPList& args)
{
if ( n )
AddInst(ZInstI(OP_BROKER_FLUSH_LOGS_V,
Frame1Slot(n, OP1_WRITE)));
else
AddInst(ZInstI(OP_BROKER_FLUSH_LOGS_X));
return true;
}
bool ZAMCompiler::BuiltIn_Files__enable_reassembly(const NameExpr* n,
const ExprPList& args)
{
if ( n )
// While this built-in nominally returns a value, existing
// script code ignores it, so for now we don't bother
// special-casing the possibility that it doesn't.
return false;
if ( args[0]->Tag() == EXPR_CONST )
// Weird!
return false;
auto arg_f = args[0]->AsNameExpr();
AddInst(ZInstI(OP_FILES__ENABLE_REASSEMBLY_V, FrameSlot(arg_f)));
return true;
}
bool ZAMCompiler::BuiltIn_Files__set_reassembly_buffer(const NameExpr* n,
const ExprPList& args)
{
if ( n )
// See above for enable_reassembly
return false;
if ( args[0]->Tag() == EXPR_CONST )
// Weird!
return false;
auto arg_f = FrameSlot(args[0]->AsNameExpr());
ZInstI z;
if ( args[1]->Tag() == EXPR_CONST )
{
auto arg_cnt = args[1]->AsConstExpr()->Value()->AsCount();
z = ZInstI(OP_FILES__SET_REASSEMBLY_BUFFER_VC, arg_f, arg_cnt);
z.op_type = OP_VV_I2;
}
else
z = ZInstI(OP_FILES__SET_REASSEMBLY_BUFFER_VV, arg_f,
FrameSlot(args[1]->AsNameExpr()));
AddInst(z);
return true;
}
bool ZAMCompiler::BuiltIn_Log__write(const NameExpr* n, const ExprPList& args)
{
auto id = args[0];
auto columns = args[1];
if ( columns->Tag() != EXPR_NAME )
return false;
auto columns_n = columns->AsNameExpr();
auto col_slot = FrameSlot(columns_n);
bool const_id = (id->Tag() == EXPR_CONST);
ZInstAux* aux = nullptr;
if ( const_id )
{
aux = new ZInstAux(1);
aux->Add(0, id->AsConstExpr()->ValuePtr());
}
ZInstI z;
if ( n )
{
int nslot = Frame1Slot(n, OP1_WRITE);
if ( const_id )
{
z = ZInstI(OP_LOG_WRITEC_VV, nslot, col_slot);
z.aux = aux;
}
else
z = ZInstI(OP_LOG_WRITE_VVV, nslot,
FrameSlot(id->AsNameExpr()), col_slot);
}
else
{
if ( const_id )
{
z = ZInstI(OP_LOG_WRITEC_V, col_slot, id->AsConstExpr());
z.aux = aux;
}
else
z = ZInstI(OP_LOG_WRITE_VV, FrameSlot(id->AsNameExpr()),
col_slot);
}
z.SetType(columns_n->GetType());
AddInst(z);
return true;
}
bool ZAMCompiler::BuiltIn_current_time(const NameExpr* n, const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
int nslot = Frame1Slot(n, OP1_WRITE);
AddInst(ZInstI(OP_CURRENT_TIME_V, nslot));
return true;
}
bool ZAMCompiler::BuiltIn_get_port_etc(const NameExpr* n, const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
auto p = args[0];
if ( p->Tag() != EXPR_NAME )
return false;
auto pn = p->AsNameExpr();
int nslot = Frame1Slot(n, OP1_WRITE);
AddInst(ZInstI(OP_GET_PORT_TRANSPORT_PROTO_VV, nslot, FrameSlot(pn)));
return true;
}
bool ZAMCompiler::BuiltIn_network_time(const NameExpr* n, const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
int nslot = Frame1Slot(n, OP1_WRITE);
AddInst(ZInstI(OP_NETWORK_TIME_V, nslot));
return true;
}
bool ZAMCompiler::BuiltIn_reading_live_traffic(const NameExpr* n,
const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
int nslot = Frame1Slot(n, OP1_WRITE);
AddInst(ZInstI(OP_READING_LIVE_TRAFFIC_V, nslot));
return true;
}
bool ZAMCompiler::BuiltIn_reading_traces(const NameExpr* n,
const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
int nslot = Frame1Slot(n, OP1_WRITE);
AddInst(ZInstI(OP_READING_TRACES_V, nslot));
return true;
}
bool ZAMCompiler::BuiltIn_strstr(const NameExpr* n, const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
auto big = args[0];
auto little = args[1];
auto big_n = big->Tag() == EXPR_NAME ? big->AsNameExpr() : nullptr;
auto little_n = little->Tag() == EXPR_NAME ?
little->AsNameExpr() : nullptr;
ZInstI z;
if ( big_n && little_n )
z = GenInst(OP_STRSTR_VVV, n, big_n, little_n);
else if ( big_n )
z = GenInst(OP_STRSTR_VVC, n, big_n, little->AsConstExpr());
else if ( little_n )
z = GenInst(OP_STRSTR_VCV, n, little_n, big->AsConstExpr());
else
return false;
AddInst(z);
return true;
}
bool ZAMCompiler::BuiltIn_sub_bytes(const NameExpr* n, const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
auto arg_s = args[0];
auto arg_start = args[1];
auto arg_n = args[2];
int nslot = Frame1Slot(n, OP1_WRITE);
int v2 = FrameSlotIfName(arg_s);
int v3 = ConvertToCount(arg_start);
int v4 = ConvertToInt(arg_n);
auto c = arg_s->Tag() == EXPR_CONST ? arg_s->AsConstExpr() : nullptr;
ZInstI z;
switch ( ConstArgsMask(args, 3) ) {
case 0x0: // all variable
z = ZInstI(OP_SUB_BYTES_VVVV, nslot, v2, v3, v4);
z.op_type = OP_VVVV;
break;
case 0x1: // last argument a constant
z = ZInstI(OP_SUB_BYTES_VVVi, nslot, v2, v3, v4);
z.op_type = OP_VVVV_I4;
break;
case 0x2: // 2nd argument a constant; flip!
z = ZInstI(OP_SUB_BYTES_VViV, nslot, v2, v4, v3);
z.op_type = OP_VVVV_I4;
break;
case 0x3: // both 2nd and third are constants
z = ZInstI(OP_SUB_BYTES_VVii, nslot, v2, v3, v4);
z.op_type = OP_VVVV_I3_I4;
break;
case 0x4: // first argument a constant
z = ZInstI(OP_SUB_BYTES_VVVC, nslot, v3, v4, c);
z.op_type = OP_VVVC;
break;
case 0x5: // first and third constant
z = ZInstI(OP_SUB_BYTES_VViC, nslot, v3, v4, c);
z.op_type = OP_VVVC_I3;
break;
case 0x6: // first and second constant - flip!
z = ZInstI(OP_SUB_BYTES_ViVC, nslot, v4, v3, c);
z.op_type = OP_VVVC_I3;
break;
case 0x7: // whole shebang
z = ZInstI(OP_SUB_BYTES_ViiC, nslot, v3, v4, c);
z.op_type = OP_VVVC_I2_I3;
break;
default:
reporter->InternalError("bad constant mask");
}
AddInst(z);
return true;
}
bool ZAMCompiler::BuiltIn_to_lower(const NameExpr* n, const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
int nslot = Frame1Slot(n, OP1_WRITE);
if ( args[0]->Tag() == EXPR_CONST )
{
auto arg_c = args[0]->AsConstExpr()->Value()->AsStringVal();
ValPtr arg_lc = {AdoptRef{}, ZAM_to_lower(arg_c)};
auto arg_lce = make_intrusive<ConstExpr>(arg_lc);
auto z = ZInstI(OP_ASSIGN_CONST_VC, nslot, arg_lce.get());
z.is_managed = true;
AddInst(z);
}
else
{
auto arg_s = args[0]->AsNameExpr();
AddInst(ZInstI(OP_TO_LOWER_VV, nslot, FrameSlot(arg_s)));
}
return true;
}
bro_uint_t ZAMCompiler::ConstArgsMask(const ExprPList& args, int nargs) const
{
ASSERT(args.length() == nargs);
bro_uint_t mask = 0;
for ( int i = 0; i < nargs; ++i )
{
mask <<= 1;
if ( args[i]->Tag() == EXPR_CONST )
mask |= 1;
}
return mask;
}
} // zeek::detail

View file

@ -0,0 +1,27 @@
// See the file "COPYING" in the main distribution directory for copyright.
// ZAM compiler method declarations for built-in functions.
//
// This file is only included by ZAM.h, in the context of the ZAM class
// declaration (so these are methods, not standalone functions). We maintain
// it separately so that the conceptual overhead of adding a new built-in
// is lower.
// If the given expression corresponds to a call to a ZAM built-in,
// then compiles the call and returns true. Otherwise, returns false.
bool IsZAM_BuiltIn(const Expr* e);
// Built-ins return true if able to compile the call, false if not.
bool BuiltIn_Analyzer__name(const NameExpr* n, const ExprPList& args);
bool BuiltIn_Broker__flush_logs(const NameExpr* n, const ExprPList& args);
bool BuiltIn_Files__enable_reassembly(const NameExpr* n, const ExprPList& args);
bool BuiltIn_Files__set_reassembly_buffer(const NameExpr* n, const ExprPList& args);
bool BuiltIn_Log__write(const NameExpr* n, const ExprPList& args);
bool BuiltIn_current_time(const NameExpr* n, const ExprPList& args);
bool BuiltIn_get_port_etc(const NameExpr* n, const ExprPList& args);
bool BuiltIn_network_time(const NameExpr* n, const ExprPList& args);
bool BuiltIn_reading_live_traffic(const NameExpr* n, const ExprPList& args);
bool BuiltIn_reading_traces(const NameExpr* n, const ExprPList& args);
bool BuiltIn_strstr(const NameExpr* n, const ExprPList& args);
bool BuiltIn_sub_bytes(const NameExpr* n, const ExprPList& args);
bool BuiltIn_to_lower(const NameExpr* n, const ExprPList& args);

View file

@ -0,0 +1,639 @@
// See the file "COPYING" in the main distribution directory for copyright.
// ZAM: Zeek Abstract Machine compiler.
#pragma once
#include "zeek/Event.h"
#include "zeek/script_opt/UseDefs.h"
#include "zeek/script_opt/ZAM/ZBody.h"
namespace zeek {
class EventHandler;
}
namespace zeek::detail {
class NameExpr;
class ConstExpr;
class FieldExpr;
class ListExpr;
class Stmt;
class SwitchStmt;
class CatchReturnStmt;
class ProfileFunc;
typedef ZInstI* InstLabel;
// Class representing a single compiled statement. (This is different from,
// but related to, the ZAM instruction(s) generated for that compilation.)
// Designed to be fully opaque, but also effective without requiring pointer
// management.
class ZAMStmt {
protected:
friend class ZAMCompiler;
ZAMStmt() { stmt_num = -1; /* flag that it needs to be set */ }
ZAMStmt(int _stmt_num) { stmt_num = _stmt_num; }
int stmt_num;
};
// Class that holds values that only have meaning to the ZAM compiler,
// but that needs to be held (opaquely, via a pointer) by external
// objects.
class OpaqueVals {
public:
OpaqueVals(ZInstAux* _aux) { aux = _aux; }
ZInstAux* aux;
};
class ZAMCompiler {
public:
ZAMCompiler(ScriptFunc* f, std::shared_ptr<ProfileFunc> pf,
ScopePtr scope, StmtPtr body, std::shared_ptr<UseDefs> ud,
std::shared_ptr<Reducer> rd);
StmtPtr CompileBody();
const FrameReMap& FrameDenizens() const
{ return shared_frame_denizens_final; }
const std::vector<int>& ManagedSlots() const
{ return managed_slotsI; }
const std::vector<GlobalInfo>& Globals() const
{ return globalsI; }
bool NonRecursive() const { return non_recursive; }
const TableIterVec& GetTableIters() const { return table_iters; }
int NumStepIters() const { return num_step_iters; }
template <typename T>
const CaseMaps<T>& GetCases() const
{
if constexpr ( std::is_same_v<T, bro_int_t> )
return int_cases;
else if constexpr ( std::is_same_v<T, bro_uint_t> )
return uint_cases;
else if constexpr ( std::is_same_v<T, double> )
return double_cases;
else if constexpr ( std::is_same_v<T, std::string> )
return str_cases;
}
void Dump();
private:
void Init();
void InitGlobals();
void InitArgs();
void InitLocals();
void TrackMemoryManagement();
void ResolveHookBreaks();
void ComputeLoopLevels();
void AdjustBranches();
void RetargetBranches();
void RemapFrameDenizens(const std::vector<int>& inst1_to_inst2);
void CreateSharedFrameDenizens();
void ConcretizeSwitches();
// The following are used for switch statements, mapping the
// switch value (which can be any atomic type) to a branch target.
// We have vectors of them because functions can contain multiple
// switches.
// See ZBody.h for their concrete counterparts, which we've
// already #include'd.
template<typename T> using CaseMapI = std::map<T, InstLabel>;
template<typename T> using CaseMapsI = std::vector<CaseMapI<T>>;
template <typename T>
void ConcretizeSwitchTables(const CaseMapsI<T>& abstract_cases,
CaseMaps<T>& concrete_cases);
template <typename T>
void DumpCases(const T& cases, const char* type_name) const;
void DumpInsts1(const FrameReMap* remappings);
#include "zeek/ZAM-MethodDecls.h"
const ZAMStmt CompileStmt(const StmtPtr& body)
{ return CompileStmt(body.get()); }
const ZAMStmt CompileStmt(const Stmt* body);
void SetCurrStmt(const Stmt* stmt) { curr_stmt = stmt; }
const ZAMStmt CompilePrint(const PrintStmt* ps);
const ZAMStmt CompileExpr(const ExprStmt* es);
const ZAMStmt CompileIf(const IfStmt* is);
const ZAMStmt CompileSwitch(const SwitchStmt* sw);
const ZAMStmt CompileAdd(const AddStmt* as);
const ZAMStmt CompileDel(const DelStmt* ds);
const ZAMStmt CompileWhile(const WhileStmt* ws);
const ZAMStmt CompileFor(const ForStmt* f);
const ZAMStmt CompileReturn(const ReturnStmt* r);
const ZAMStmt CompileCatchReturn(const CatchReturnStmt* cr);
const ZAMStmt CompileStmts(const StmtList* sl);
const ZAMStmt CompileInit(const InitStmt* is);
const ZAMStmt CompileWhen(const WhenStmt* ws);
const ZAMStmt CompileNext()
{ return GenGoTo(nexts.back()); }
const ZAMStmt CompileBreak()
{ return GenGoTo(breaks.back()); }
const ZAMStmt CompileFallThrough()
{ return GenGoTo(fallthroughs.back()); }
const ZAMStmt CompileCatchReturn()
{ return GenGoTo(catches.back()); }
const ZAMStmt IfElse(const Expr* e, const Stmt* s1, const Stmt* s2);
const ZAMStmt While(const Stmt* cond_stmt, const Expr* cond,
const Stmt* body);
const ZAMStmt InitRecord(IDPtr id, RecordType* rt);
const ZAMStmt InitVector(IDPtr id, VectorType* vt);
const ZAMStmt InitTable(IDPtr id, TableType* tt, Attributes* attrs);
const ZAMStmt ValueSwitch(const SwitchStmt* sw, const NameExpr* v,
const ConstExpr* c);
const ZAMStmt TypeSwitch(const SwitchStmt* sw, const NameExpr* v,
const ConstExpr* c);
void PushNexts() { PushGoTos(nexts); }
void PushBreaks() { PushGoTos(breaks); }
void PushFallThroughs() { PushGoTos(fallthroughs); }
void PushCatchReturns() { PushGoTos(catches); }
void ResolveNexts(const InstLabel l)
{ ResolveGoTos(nexts, l); }
void ResolveBreaks(const InstLabel l)
{ ResolveGoTos(breaks, l); }
void ResolveFallThroughs(const InstLabel l)
{ ResolveGoTos(fallthroughs, l); }
void ResolveCatchReturns(const InstLabel l)
{ ResolveGoTos(catches, l); }
const ZAMStmt LoopOverTable(const ForStmt* f, const NameExpr* val);
const ZAMStmt LoopOverVector(const ForStmt* f, const NameExpr* val);
const ZAMStmt LoopOverString(const ForStmt* f, const Expr* e);
const ZAMStmt FinishLoop(const ZAMStmt iter_head, ZInstI iter_stmt,
const Stmt* body, int iter_slot,
bool is_table);
const ZAMStmt Loop(const Stmt* body);
const ZAMStmt CompileExpr(const ExprPtr& e)
{ return CompileExpr(e.get()); }
const ZAMStmt CompileExpr(const Expr* body);
const ZAMStmt CompileIncrExpr(const IncrExpr* e);
const ZAMStmt CompileAppendToExpr(const AppendToExpr* e);
const ZAMStmt CompileAssignExpr(const AssignExpr* e);
const ZAMStmt CompileAssignToIndex(const NameExpr* lhs,
const IndexExpr* rhs);
const ZAMStmt CompileFieldLHSAssignExpr(const FieldLHSAssignExpr* e);
const ZAMStmt CompileScheduleExpr(const ScheduleExpr* e);
const ZAMStmt CompileSchedule(const NameExpr* n, const ConstExpr* c,
int is_interval, EventHandler* h,
const ListExpr* l);
const ZAMStmt CompileEvent(EventHandler* h, const ListExpr* l);
const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2,
const NameExpr* n3)
{ return CompileInExpr(n1, n2, nullptr, n3, nullptr); }
const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2,
const ConstExpr* c)
{ return CompileInExpr(n1, n2, nullptr, nullptr, c); }
const ZAMStmt CompileInExpr(const NameExpr* n1, const ConstExpr* c,
const NameExpr* n3)
{ return CompileInExpr(n1, nullptr, c, n3, nullptr); }
// In the following, one of n2 or c2 (likewise, n3/c3) will be nil.
const ZAMStmt CompileInExpr(const NameExpr* n1, const NameExpr* n2,
const ConstExpr* c2, const NameExpr* n3,
const ConstExpr* c3);
const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l,
const NameExpr* n2)
{ return CompileInExpr(n1, l, n2, nullptr); }
const ZAMStmt CompileInExpr(const NameExpr* n, const ListExpr* l,
const ConstExpr* c)
{ return CompileInExpr(n, l, nullptr, c); }
const ZAMStmt CompileInExpr(const NameExpr* n1, const ListExpr* l,
const NameExpr* n2, const ConstExpr* c);
const ZAMStmt CompileIndex(const NameExpr* n1, const NameExpr* n2,
const ListExpr* l);
const ZAMStmt CompileIndex(const NameExpr* n1, const ConstExpr* c,
const ListExpr* l);
const ZAMStmt CompileIndex(const NameExpr* n1, int n2_slot,
const TypePtr& n2_type, const ListExpr* l);
// Second argument is which instruction slot holds the branch target.
const ZAMStmt GenCond(const Expr* e, int& branch_v);
const ZAMStmt Call(const ExprStmt* e);
const ZAMStmt AssignToCall(const ExprStmt* e);
const ZAMStmt DoCall(const CallExpr* c, const NameExpr* n);
const ZAMStmt AssignVecElems(const Expr* e);
const ZAMStmt AssignTableElem(const Expr* e);
const ZAMStmt AppendToField(const NameExpr* n1, const NameExpr* n2,
const ConstExpr* c, int offset);
const ZAMStmt ConstructTable(const NameExpr* n, const Expr* e);
const ZAMStmt ConstructSet(const NameExpr* n, const Expr* e);
const ZAMStmt ConstructRecord(const NameExpr* n, const Expr* e);
const ZAMStmt ConstructVector(const NameExpr* n, const Expr* e);
const ZAMStmt ArithCoerce(const NameExpr* n, const Expr* e);
const ZAMStmt RecordCoerce(const NameExpr* n, const Expr* e);
const ZAMStmt TableCoerce(const NameExpr* n, const Expr* e);
const ZAMStmt VectorCoerce(const NameExpr* n, const Expr* e);
const ZAMStmt Is(const NameExpr* n, const Expr* e);
#include "zeek/script_opt/ZAM/Inst-Gen.h"
#include "zeek/script_opt/ZAM/BuiltIn.h"
// A bit weird, but handy for switch statements used in built-in
// operations: returns a bit mask of which of the arguments in the
// given list correspond to constants, with the high-ordered bit
// being the first argument (argument "0" in the list) and the
// low-ordered bit being the last. Second parameter is the number
// of arguments that should be present.
bro_uint_t ConstArgsMask(const ExprPList& args, int nargs) const;
int ConvertToInt(const Expr* e)
{
if ( e->Tag() == EXPR_NAME )
return FrameSlot(e->AsNameExpr()->Id());
else
return e->AsConstExpr()->Value()->AsInt();
}
int ConvertToCount(const Expr* e)
{
if ( e->Tag() == EXPR_NAME )
return FrameSlot(e->AsNameExpr()->Id());
else
return e->AsConstExpr()->Value()->AsCount();
}
typedef std::vector<ZAMStmt> GoToSet;
typedef std::vector<GoToSet> GoToSets;
void PushGoTos(GoToSets& gotos);
void ResolveGoTos(GoToSets& gotos, const InstLabel l);
ZAMStmt GenGoTo(GoToSet& v);
ZAMStmt GoToStub();
ZAMStmt GoTo(const InstLabel l);
InstLabel GoToTarget(const ZAMStmt s);
InstLabel GoToTargetBeyond(const ZAMStmt s);
void SetTarget(ZInstI* inst, const InstLabel l, int slot);
// Given a GoTo target, find its live equivalent (first instruction
// at that location or beyond that's live).
ZInstI* FindLiveTarget(ZInstI* goto_target);
// Given an instruction that has a slot associated with the
// given target, updates the slot to correspond with the current
// instruction number of the target.
void ConcretizeBranch(ZInstI* inst, ZInstI* target, int target_slot);
void SetV(ZAMStmt s, const InstLabel l, int v)
{
if ( v == 1 )
SetV1(s, l);
else if ( v == 2 )
SetV2(s, l);
else if ( v == 3 )
SetV3(s, l);
else
SetV4(s, l);
}
void SetV1(ZAMStmt s, const InstLabel l);
void SetV2(ZAMStmt s, const InstLabel l);
void SetV3(ZAMStmt s, const InstLabel l);
void SetV4(ZAMStmt s, const InstLabel l);
void SetGoTo(ZAMStmt s, const InstLabel targ)
{ SetV1(s, targ); }
const ZAMStmt StartingBlock();
const ZAMStmt FinishBlock(const ZAMStmt start);
bool NullStmtOK() const;
const ZAMStmt EmptyStmt();
const ZAMStmt ErrorStmt();
const ZAMStmt LastInst();
// Returns a handle to state associated with building
// up a list of values.
OpaqueVals* BuildVals(const ListExprPtr&);
// "stride" is how many slots each element of l will consume.
ZInstAux* InternalBuildVals(const ListExpr* l, int stride = 1);
// Returns how many values were added.
int InternalAddVal(ZInstAux* zi, int i, Expr* e);
const ZAMStmt AddInst(const ZInstI& inst);
// Returns the statement just before the given one.
ZAMStmt PrevStmt(const ZAMStmt s);
// Returns the last (interpreter) statement in the body.
const Stmt* LastStmt(const Stmt* s) const;
// Returns the most recent added instruction *other* than those
// added for bookkeeping.
ZInstI* TopMainInst() { return insts1[top_main_inst]; }
bool IsUnused(const IDPtr& id, const Stmt* where) const;
void LoadParam(ID* id);
const ZAMStmt LoadGlobal(ID* id);
int AddToFrame(ID*);
int FrameSlot(const IDPtr& id) { return FrameSlot(id.get()); }
int FrameSlot(const ID* id);
int FrameSlotIfName(const Expr* e)
{
auto n = e->Tag() == EXPR_NAME ? e->AsNameExpr() : nullptr;
return n ? FrameSlot(n->Id()) : 0;
}
int FrameSlot(const NameExpr* id)
{ return FrameSlot(id->AsNameExpr()->Id()); }
int Frame1Slot(const NameExpr* id, ZOp op)
{ return Frame1Slot(id->AsNameExpr()->Id(), op); }
int Frame1Slot(const ID* id, ZOp op)
{ return Frame1Slot(id, op1_flavor[op]); }
int Frame1Slot(const NameExpr* n, ZAMOp1Flavor fl)
{ return Frame1Slot(n->Id(), fl); }
int Frame1Slot(const ID* id, ZAMOp1Flavor fl);
// The slot without doing any global-related checking.
int RawSlot(const NameExpr* n) { return RawSlot(n->Id()); }
int RawSlot(const ID* id);
bool HasFrameSlot(const ID* id) const;
int NewSlot(const TypePtr& t)
{ return NewSlot(ZVal::IsManagedType(t)); }
int NewSlot(bool is_managed);
int TempForConst(const ConstExpr* c);
////////////////////////////////////////////////////////////
// The following methods relate to optimizing the low-level
// ZAM function body after it is initially generated. They're
// factored out into ZOpt.cc since they're structurally quite
// different from the methods above that relate to the initial
// compilation.
// Optimizing the low-level compiled instructions.
void OptimizeInsts();
// Tracks which instructions can be branched to via the given
// set of switches.
template<typename T>
void TallySwitchTargets(const CaseMapsI<T>& switches);
// Remove code that can't be reached. True if some removal happened.
bool RemoveDeadCode();
// Collapse chains of gotos. True if some something changed.
bool CollapseGoTos();
// Prune statements that are unnecessary. True if something got
// pruned.
bool PruneUnused();
// For the current state of insts1, compute lifetimes of frame
// denizens (variable(s) using a given frame slot) in terms of
// first-instruction-to-last-instruction during which they're
// relevant, including consideration for loops.
void ComputeFrameLifetimes();
// Given final frame lifetime information, remaps frame members
// with non-overlapping lifetimes to share slots.
void ReMapFrame();
// Given final frame lifetime information, remaps slots in the
// interpreter frame. (No longer strictly necessary.)
void ReMapInterpreterFrame();
// Computes the remapping for a variable currently in the given slot,
// whose scope begins at the given instruction.
void ReMapVar(ID* id, int slot, int inst);
// Look to initialize the beginning of local lifetime based on slot
// assignment at instruction inst.
void CheckSlotAssignment(int slot, const ZInstI* inst);
// Track that a local's lifetime begins at the given statement.
void SetLifetimeStart(int slot, const ZInstI* inst);
// Look for extension of local lifetime based on slot usage
// at instruction inst.
void CheckSlotUse(int slot, const ZInstI* inst);
// Extend (or create) the end of a local's lifetime.
void ExtendLifetime(int slot, const ZInstI* inst);
// Returns the (live) instruction at the beginning/end of the loop(s)
// within which the given instruction lies; or that instruction
// itself if it's not inside a loop. The second argument specifies
// the loop depth. For example, a value of '2' means "extend to
// the beginning/end of any loop(s) of depth >= 2".
const ZInstI* BeginningOfLoop(const ZInstI* inst, int depth) const;
const ZInstI* EndOfLoop(const ZInstI* inst, int depth) const;
// True if any statement other than a frame sync assigns to the
// given slot.
bool VarIsAssigned(int slot) const;
// True if the given statement assigns to the given slot, and
// it's not a frame sync.
bool VarIsAssigned(int slot, const ZInstI* i) const;
// True if any statement other than a frame sync uses the given slot.
bool VarIsUsed(int slot) const;
// Find the first non-dead instruction after i (inclusive).
// If follow_gotos is true, then if that instruction is
// an unconditional branch, continues the process until
// a different instruction is found (and report if there
// are infinite loops).
//
// First form returns nil if there's nothing live after i.
// Second form returns insts1.size() in that case.
ZInstI* FirstLiveInst(ZInstI* i, bool follow_gotos = false);
int FirstLiveInst(int i, bool follow_gotos = false);
// Same, but not including i.
ZInstI* NextLiveInst(ZInstI* i, bool follow_gotos = false)
{
if ( i->inst_num == insts1.size() - 1 )
return nullptr;
return FirstLiveInst(insts1[i->inst_num + 1], follow_gotos);
}
int NextLiveInst(int i, bool follow_gotos = false)
{ return FirstLiveInst(i + 1, follow_gotos); }
// Mark an instruction as unnecessary and remove its influence on
// other statements. The instruction is indicated as an offset
// into insts1; any labels associated with it are transferred
// to its next live successor, if any.
void KillInst(ZInstI* i) { KillInst(i->inst_num); }
void KillInst(int i);
// The same, but kills any successor instructions until finding
// one that's labeled.
void KillInsts(ZInstI* i) { KillInsts(i->inst_num); }
void KillInsts(int i);
// The first of these is used as we compile down to ZInstI's.
// The second is the final intermediary code. They're separate
// to make it easy to remove dead code.
std::vector<ZInstI*> insts1;
std::vector<ZInstI*> insts2;
// Used as a placeholder when we have to generate a GoTo target
// beyond the end of what we've compiled so far.
ZInstI* pending_inst = nullptr;
// Indices of break/next/fallthrough/catch-return goto's, so they
// can be patched up post-facto. These are vectors-of-vectors
// so that nesting works properly.
GoToSets breaks;
GoToSets nexts;
GoToSets fallthroughs;
GoToSets catches;
// The following tracks return variables for catch-returns.
// Can be nil if the usage doesn't include using the return value
// (and/or no return value generated).
std::vector<const NameExpr*> retvars;
ScriptFunc* func;
std::shared_ptr<ProfileFunc> pf;
ScopePtr scope;
StmtPtr body;
std::shared_ptr<UseDefs> ud;
std::shared_ptr<Reducer> reducer;
// Maps identifiers to their (unique) frame location.
std::unordered_map<const ID*, int> frame_layout1;
// Inverse mapping, used for tracking frame usage (and for dumping
// statements).
FrameMap frame_denizens;
// The same, but for remapping identifiers to shared frame slots.
FrameReMap shared_frame_denizens;
// The same, but renumbered to take into account removal of
// dead statements.
FrameReMap shared_frame_denizens_final;
// Maps frame1 slots to frame2 slots. A value < 0 means the
// variable doesn't exist in frame2 - it's an error to encounter
// one of these when remapping instructions!
std::vector<int> frame1_to_frame2;
// A type for mapping an instruction to a set of locals associated
// with it.
typedef std::unordered_map<const ZInstI*, std::unordered_set<ID*>>
AssociatedLocals;
// Maps (live) instructions to which frame denizens begin their
// lifetime via an initialization at that instruction, if any ...
// (it can be more than one local due to extending lifetimes to
// span loop bodies)
AssociatedLocals inst_beginnings;
// ... and which frame denizens had their last usage at the
// given instruction. (These are insts1 instructions, prior to
// removing dead instructions, compressing the frames, etc.)
AssociatedLocals inst_endings;
// A type for inverse mappings.
typedef std::unordered_map<int, const ZInstI*> AssociatedInsts;
// Inverse mappings: for a given frame denizen's slot, where its
// lifetime begins and ends.
AssociatedInsts denizen_beginning;
AssociatedInsts denizen_ending;
// In the following, member variables ending in 'I' are intermediary
// values that get finalized when constructing the corresponding
// ZBody.
std::vector<GlobalInfo> globalsI;
std::unordered_map<const ID*, int> global_id_to_info; // inverse
// Intermediary switch tables (branching to ZInst's rather
// than concrete instruction offsets).
CaseMapsI<bro_int_t> int_casesI;
CaseMapsI<bro_uint_t> uint_casesI;
CaseMapsI<double> double_casesI;
// Note, we use this not only for strings but for addresses
// and prefixes.
CaseMapsI<std::string> str_casesI;
// Same, but for the concretized versions.
CaseMaps<bro_int_t> int_cases;
CaseMaps<bro_uint_t> uint_cases;
CaseMaps<double> double_cases;
CaseMaps<std::string> str_cases;
std::vector<int> managed_slotsI;
int frame_sizeI;
TableIterVec table_iters;
int num_step_iters = 0;
bool non_recursive = false;
// Most recent instruction, other than for housekeeping.
int top_main_inst;
// Used for communication between Frame1Slot and a subsequent
// AddInst. If >= 0, then upon adding the next instruction,
// it should be followed by Store-Global for the given slot.
int pending_global_store = -1;
};
// Invokes after compiling all of the function bodies.
class FuncInfo;
extern void finalize_functions(const std::vector<FuncInfo>& funcs);
} // namespace zeek::detail

View file

@ -0,0 +1,503 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Driver (and other high-level) methods for ZAM compilation.
#include "zeek/CompHash.h"
#include "zeek/RE.h"
#include "zeek/Frame.h"
#include "zeek/module_util.h"
#include "zeek/Scope.h"
#include "zeek/Reporter.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/ZAM/Compile.h"
namespace zeek::detail {
ZAMCompiler::ZAMCompiler(ScriptFunc* f, std::shared_ptr<ProfileFunc> _pf,
ScopePtr _scope, StmtPtr _body,
std::shared_ptr<UseDefs> _ud,
std::shared_ptr<Reducer> _rd)
{
func = f;
pf = std::move(_pf);
scope = std::move(_scope);
body = std::move(_body);
ud = std::move(_ud);
reducer = std::move(_rd);
frame_sizeI = 0;
Init();
}
void ZAMCompiler::Init()
{
InitGlobals();
InitArgs();
InitLocals();
#if 0
// Complain about unused aggregates ... but not if we're inlining,
// as that can lead to optimizations where they wind up being unused
// but the original logic for using them was sound.
if ( ! analysis_options.inliner )
for ( auto a : pf->Inits() )
{
if ( pf->Locals().find(a) == pf->Locals().end() )
reporter->Warning("%s unused", a->Name());
}
#endif
TrackMemoryManagement();
non_recursive = non_recursive_funcs.count(func) > 0;
}
void ZAMCompiler::InitGlobals()
{
for ( auto g : pf->Globals() )
{
auto non_const_g = const_cast<ID*>(g);
GlobalInfo info;
info.id = {NewRef{}, non_const_g};
info.slot = AddToFrame(non_const_g);
global_id_to_info[non_const_g] = globalsI.size();
globalsI.push_back(info);
}
}
void ZAMCompiler::InitArgs()
{
auto uds = ud->HasUsage(body.get()) ? ud->GetUsage(body.get()) :
nullptr;
auto args = scope->OrderedVars();
int nparam = func->GetType()->Params()->NumFields();
push_existing_scope(scope);
for ( auto a : args )
{
if ( --nparam < 0 )
break;
auto arg_id = a.get();
if ( uds && uds->HasID(arg_id) )
LoadParam(arg_id);
else
{
// printf("param %s unused\n", obj_desc(arg_id.get()));
}
}
pop_scope();
}
void ZAMCompiler::InitLocals()
{
// Assign slots for locals (which includes temporaries).
for ( auto l : pf->Locals() )
{
auto non_const_l = const_cast<ID*>(l);
// ### should check for unused variables.
// Don't add locals that were already added because they're
// parameters.
if ( ! HasFrameSlot(non_const_l) )
(void) AddToFrame(non_const_l);
}
}
void ZAMCompiler::TrackMemoryManagement()
{
for ( auto& slot : frame_layout1 )
{
// Look for locals with values of types for which
// we do explicit memory management on (re)assignment.
auto t = slot.first->GetType();
if ( ZVal::IsManagedType(t) )
managed_slotsI.push_back(slot.second);
}
}
StmtPtr ZAMCompiler::CompileBody()
{
curr_stmt = nullptr;
if ( func->Flavor() == FUNC_FLAVOR_HOOK )
PushBreaks();
(void) CompileStmt(body);
if ( reporter->Errors() > 0 )
return nullptr;
ResolveHookBreaks();
if ( nexts.size() > 0 )
reporter->Error("\"next\" used without an enclosing \"for\"");
if ( fallthroughs.size() > 0 )
reporter->Error("\"fallthrough\" used without an enclosing \"switch\"");
if ( catches.size() > 0 )
reporter->InternalError("untargeted inline return");
// Make sure we have a (pseudo-)instruction at the end so we
// can use it as a branch label.
if ( ! pending_inst )
pending_inst = new ZInstI();
// Concretize instruction numbers in inst1 so we can
// easily move through the code.
for ( auto i = 0U; i < insts1.size(); ++i )
insts1[i]->inst_num = i;
ComputeLoopLevels();
if ( ! analysis_options.no_ZAM_opt )
OptimizeInsts();
AdjustBranches();
// Construct the final program with the dead code eliminated
// and branches resolved.
// Make sure we don't include the empty pending-instruction, if any.
if ( pending_inst )
pending_inst->live = false;
// Maps inst1 instructions to where they are in inst2.
// Dead instructions map to -1.
std::vector<int> inst1_to_inst2;
for ( auto i = 0U; i < insts1.size(); ++i )
{
if ( insts1[i]->live )
{
inst1_to_inst2.push_back(insts2.size());
insts2.push_back(insts1[i]);
}
else
inst1_to_inst2.push_back(-1);
}
// Re-concretize instruction numbers, and concretize GoTo's.
for ( auto i = 0U; i < insts2.size(); ++i )
insts2[i]->inst_num = i;
RetargetBranches();
// If we have remapped frame denizens, update them. If not,
// create them.
if ( shared_frame_denizens.size() > 0 )
RemapFrameDenizens(inst1_to_inst2);
else
CreateSharedFrameDenizens();
delete pending_inst;
ConcretizeSwitches();
// Could erase insts1 here to recover memory, but it's handy
// for debugging.
#if 0
if ( non_recursive )
func->UseStaticFrame();
#endif
auto zb = make_intrusive<ZBody>(func->Name(), this);
zb->SetInsts(insts2);
return zb;
}
void ZAMCompiler::ResolveHookBreaks()
{
if ( breaks.size() > 0 )
{
ASSERT(breaks.size() == 1);
if ( func->Flavor() == FUNC_FLAVOR_HOOK )
{
// Rewrite the breaks.
for ( auto& b : breaks[0] )
{
auto& i = insts1[b.stmt_num];
delete i;
i = new ZInstI(OP_HOOK_BREAK_X);
}
}
else
reporter->Error("\"break\" used without an enclosing \"for\" or \"switch\"");
}
}
void ZAMCompiler::ComputeLoopLevels()
{
// Compute which instructions are inside loops.
for ( auto i = 0; i < int(insts1.size()); ++i )
{
auto inst = insts1[i];
auto t = inst->target;
if ( ! t || t == pending_inst )
continue;
if ( t->inst_num < i )
{
auto j = t->inst_num;
if ( ! t->loop_start )
{
// Loop is newly discovered.
t->loop_start = true;
}
else
{
// We're extending an existing loop. Find
// its current end.
auto depth = t->loop_depth;
while ( j < i &&
insts1[j]->loop_depth == depth )
++j;
ASSERT(insts1[j]->loop_depth == depth - 1);
}
// Run from j's current position to i, bumping
// the loop depth.
while ( j <= i )
{
++insts1[j]->loop_depth;
++j;
}
}
}
}
void ZAMCompiler::AdjustBranches()
{
// Move branches to dead code forward to their successor live code.
for ( auto i = 0U; i < insts1.size(); ++i )
{
auto inst = insts1[i];
if ( ! inst->live )
continue;
auto t = inst->target;
if ( ! t )
continue;
inst->target = FindLiveTarget(t);
}
}
void ZAMCompiler::RetargetBranches()
{
for ( auto i = 0U; i < insts2.size(); ++i )
{
auto inst = insts2[i];
if ( ! inst->target )
continue;
ConcretizeBranch(inst, inst->target, inst->target_slot);
}
}
void ZAMCompiler::RemapFrameDenizens(const std::vector<int>& inst1_to_inst2)
{
for ( auto i = 0U; i < shared_frame_denizens.size(); ++i )
{
auto& info = shared_frame_denizens[i];
for ( auto& start : info.id_start )
{
// It can happen that the identifier's
// origination instruction was optimized
// away, if due to slot sharing it's of
// the form "slotX = slotX". In that
// case, look forward for the next viable
// instruction.
while ( start < int(insts1.size()) &&
inst1_to_inst2[start] == -1 )
++start;
ASSERT(start < insts1.size());
start = inst1_to_inst2[start];
}
shared_frame_denizens_final.push_back(info);
}
}
void ZAMCompiler::CreateSharedFrameDenizens()
{
for ( auto i = 0U; i < frame_denizens.size(); ++i )
{
FrameSharingInfo info;
info.ids.push_back(frame_denizens[i]);
info.id_start.push_back(0);
info.scope_end = insts2.size();
// The following doesn't matter since the value
// is only used during compiling, not during
// execution.
info.is_managed = false;
shared_frame_denizens_final.push_back(info);
}
}
void ZAMCompiler::ConcretizeSwitches()
{
// Create concretized versions of any case tables.
ConcretizeSwitchTables(int_casesI, int_cases);
ConcretizeSwitchTables(uint_casesI, uint_cases);
ConcretizeSwitchTables(double_casesI, double_cases);
ConcretizeSwitchTables(str_casesI, str_cases);
}
template <typename T>
void ZAMCompiler::ConcretizeSwitchTables(const CaseMapsI<T>& abstract_cases,
CaseMaps<T>& concrete_cases)
{
for ( auto& targs : abstract_cases )
{
CaseMap<T> cm;
for ( auto& targ : targs )
cm[targ.first] = targ.second->inst_num;
concrete_cases.push_back(cm);
}
}
#include "ZAM-MethodDefs.h"
void ZAMCompiler::Dump()
{
bool remapped_frame = ! analysis_options.no_ZAM_opt;
if ( remapped_frame )
printf("Original frame for %s:\n", func->Name());
for ( auto elem : frame_layout1 )
printf("frame[%d] = %s\n", elem.second, elem.first->Name());
if ( remapped_frame )
{
printf("Final frame for %s:\n", func->Name());
for ( auto i = 0U; i < shared_frame_denizens.size(); ++i )
{
printf("frame2[%d] =", i);
for ( auto& id : shared_frame_denizens[i].ids )
printf(" %s", id->Name());
printf("\n");
}
}
if ( insts2.size() > 0 )
printf("Pre-removal of dead code for %s:\n", func->Name());
auto remappings = remapped_frame ? &shared_frame_denizens : nullptr;
DumpInsts1(remappings);
if ( insts2.size() > 0 )
printf("Final intermediary code for %s:\n", func->Name());
remappings = remapped_frame ? &shared_frame_denizens_final : nullptr;
for ( auto i = 0U; i < insts2.size(); ++i )
{
auto& inst = insts2[i];
std::string liveness, depth;
if ( inst->live )
liveness = util::fmt("(labels %d)", inst->num_labels);
else
liveness = "(dead)";
if ( inst->loop_depth )
depth = util::fmt(" (loop %d)", inst->loop_depth);
printf("%d %s%s: ", i, liveness.c_str(), depth.c_str());
inst->Dump(&frame_denizens, remappings);
}
if ( insts2.size() > 0 )
printf("Final code for %s:\n", func->Name());
for ( auto i = 0U; i < insts2.size(); ++i )
{
auto& inst = insts2[i];
printf("%d: ", i);
inst->Dump(&frame_denizens, remappings);
}
DumpCases(int_casesI, "int");
DumpCases(uint_casesI, "uint");
DumpCases(double_casesI, "double");
DumpCases(str_casesI, "str");
}
template <typename T>
void ZAMCompiler::DumpCases(const T& cases, const char* type_name) const
{
for ( auto i = 0U; i < cases.size(); ++i )
{
printf("%s switch table #%d:", type_name, i);
for ( auto& m : cases[i] )
{
std::string case_val;
if constexpr ( std::is_same_v<T, std::string> )
case_val = m.first;
else if constexpr ( std::is_same_v<T, bro_int_t> ||
std::is_same_v<T, bro_uint_t> ||
std::is_same_v<T, double> )
case_val = std::to_string(m.first);
printf(" %s->%d", case_val.c_str(), m.second->inst_num);
}
printf("\n");
}
}
void ZAMCompiler::DumpInsts1(const FrameReMap* remappings)
{
for ( auto i = 0U; i < insts1.size(); ++i )
{
auto& inst = insts1[i];
if ( inst->target )
// To get meaningful branch information in the dump,
// we need to concretize the branch slots
ConcretizeBranch(inst, inst->target, inst->target_slot);
std::string liveness, depth;
if ( inst->live )
liveness = util::fmt("(labels %d)", inst->num_labels);
else
liveness = "(dead)";
if ( inst->loop_depth )
depth = util::fmt(" (loop %d)", inst->loop_depth);
printf("%d %s%s: ", i, liveness.c_str(), depth.c_str());
inst->Dump(&frame_denizens, remappings);
}
}
} // zeek::detail

1221
src/script_opt/ZAM/Expr.cc Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,167 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Helper functions for generating ZAM code.
#include "zeek/script_opt/ZAM/Compile.h"
namespace zeek::detail {
ZInstI ZAMCompiler::GenInst(ZOp op)
{
return ZInstI(op);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1)
{
return ZInstI(op, Frame1Slot(v1, op));
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, int i)
{
auto z = ZInstI(op, Frame1Slot(v1, op), i);
z.op_type = OP_VV_I2;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const ConstExpr* c, const NameExpr* v1,
int i)
{
auto z = ZInstI(op, Frame1Slot(v1, op), i, c);
z.op_type = OP_VVC_I2;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2)
{
int nv2 = FrameSlot(v2);
return ZInstI(op, Frame1Slot(v1, op), nv2);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3)
{
int nv2 = FrameSlot(v2);
int nv3 = FrameSlot(v3);
return ZInstI(op, Frame1Slot(v1, op), nv2, nv3);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3, const NameExpr* v4)
{
int nv2 = FrameSlot(v2);
int nv3 = FrameSlot(v3);
int nv4 = FrameSlot(v4);
return ZInstI(op, Frame1Slot(v1, op), nv2, nv3, nv4);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const ConstExpr* ce)
{
return ZInstI(op, ce);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce)
{
return ZInstI(op, Frame1Slot(v1, op), ce);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const ConstExpr* ce, const NameExpr* v1)
{
return ZInstI(op, Frame1Slot(v1, op), ce);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce,
const NameExpr* v2)
{
int nv2 = FrameSlot(v2);
return ZInstI(op, Frame1Slot(v1, op), nv2, ce);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const ConstExpr* ce)
{
int nv2 = FrameSlot(v2);
return ZInstI(op, Frame1Slot(v1, op), nv2, ce);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3, const ConstExpr* ce)
{
int nv2 = FrameSlot(v2);
int nv3 = FrameSlot(v3);
return ZInstI(op, Frame1Slot(v1, op), nv2, nv3, ce);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const ConstExpr* ce, const NameExpr* v3)
{
// Note that here we reverse the order of the arguments; saves
// us from needing to implement a redundant constructor.
int nv2 = FrameSlot(v2);
int nv3 = FrameSlot(v3);
return ZInstI(op, Frame1Slot(v1, op), nv2, nv3, ce);
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c,
int i)
{
auto z = ZInstI(op, Frame1Slot(v1, op), i, c);
z.op_type = OP_VVC_I2;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
int i)
{
int nv2 = FrameSlot(v2);
auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i);
z.op_type = OP_VVV_I3;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
int i1, int i2)
{
int nv2 = FrameSlot(v2);
auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i1, i2);
z.op_type = OP_VVVV_I3_I4;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v, const ConstExpr* c,
int i1, int i2)
{
auto z = ZInstI(op, Frame1Slot(v, op), i1, i2, c);
z.op_type = OP_VVVC_I2_I3;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3, int i)
{
int nv2 = FrameSlot(v2);
int nv3 = FrameSlot(v3);
auto z = ZInstI(op, Frame1Slot(v1, op), nv2, nv3, i);
z.op_type = OP_VVVV_I4;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const ConstExpr* c, int i)
{
int nv2 = FrameSlot(v2);
auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i, c);
z.op_type = OP_VVVC_I3;
return z;
}
ZInstI ZAMCompiler::GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c,
const NameExpr* v2, int i)
{
int nv2 = FrameSlot(v2);
auto z = ZInstI(op, Frame1Slot(v1, op), nv2, i, c);
z.op_type = OP_VVVC_I3;
return z;
}
} // zeek::detail

View file

@ -0,0 +1,39 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Methods for generating ZAM instructions, mainly to aid in translating
// NameExpr*'s to slots. Some aren't needed, but we provide a complete
// set mirroring the ZInstI constructors for consistency.
//
// Maintained separately from Compile.h to make it conceptually simple to
// add new helpers.
ZInstI GenInst(ZOp op);
ZInstI GenInst(ZOp op, const NameExpr* v1);
ZInstI GenInst(ZOp op, const NameExpr* v1, int i);
ZInstI GenInst(ZOp op, const ConstExpr* c, const NameExpr* v1, int i);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3, const NameExpr* v4);
ZInstI GenInst(ZOp op, const ConstExpr* ce);
ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce);
ZInstI GenInst(ZOp op, const ConstExpr* ce, const NameExpr* v1);
ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* ce,
const NameExpr* v2);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const ConstExpr* ce);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3, const ConstExpr* ce);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const ConstExpr* ce, const NameExpr* v3);
ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c, int i);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, int i);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2, int i1, int i2);
ZInstI GenInst(ZOp op, const NameExpr* v, const ConstExpr* c, int i1, int i2);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const NameExpr* v3, int i);
ZInstI GenInst(ZOp op, const NameExpr* v1, const NameExpr* v2,
const ConstExpr* c, int i);
ZInstI GenInst(ZOp op, const NameExpr* v1, const ConstExpr* c,
const NameExpr* v2, int i);

View file

@ -0,0 +1,146 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Classes to support ZAM for-loop iterations.
#pragma once
#include "zeek/Val.h"
#include "zeek/ZeekString.h"
#include "zeek/script_opt/ZAM/ZInst.h"
namespace zeek::detail {
// Class for iterating over the elements of a table. Requires some care
// because the dictionary iterators need to be destructed when done.
class TableIterInfo {
public:
// No constructor needed, as all of our member variables are
// instead instantiated via BeginLoop(). This allows us to
// reuse TableIterInfo objects to lower the overhead associated
// with executing ZBody::DoExec for non-recursive functions.
// We do, however, want to make sure that when we go out of scope,
// if we have any pending iterators we clear them.
~TableIterInfo() { Clear(); }
// Start looping over the elements of the given table. "_aux"
// provides information about the index variables, their types,
// and the type of the value variable (if any).
void BeginLoop(const TableVal* _tv, ZInstAux* _aux)
{
tv = _tv;
aux = _aux;
auto tvd = tv->AsTable();
tbl_iter = tvd->begin();
tbl_end = tvd->end();
}
// True if we're done iterating, false if not.
bool IsDoneIterating() const
{
return *tbl_iter == *tbl_end;
}
// Indicates that the current iteration is finished.
void IterFinished()
{
++*tbl_iter;
}
// Performs the next iteration (assuming IsDoneIterating() returned
// false), assigning to the index variables.
void NextIter(ZVal* frame)
{
auto ind_lv = tv->RecreateIndex(*(*tbl_iter)->GetHashKey());
for ( int i = 0; i < ind_lv->Length(); ++i )
{
ValPtr ind_lv_p = ind_lv->Idx(i);
auto& var = frame[aux->loop_vars[i]];
auto& t = aux->loop_var_types[i];
if ( ZVal::IsManagedType(t) )
ZVal::DeleteManagedType(var);
var = ZVal(ind_lv_p, t);
}
IterFinished();
}
// For the current iteration, returns the corresponding value.
ZVal IterValue()
{
auto tev = (*tbl_iter)->GetValue<TableEntryVal*>();
return ZVal(tev->GetVal(), aux->value_var_type);
}
// Called upon finishing the iteration.
void EndIter() { Clear(); }
// Called to explicitly clear any iteration state.
void Clear()
{
tbl_iter = std::nullopt;
tbl_end = std::nullopt;
}
private:
// The table we're looping over. If we want to allow for the table
// going away before we're able to clear our iterators then we
// could change this to non-const and use Ref/Unref.
const TableVal* tv = nullptr;
// Associated auxiliary information.
ZInstAux* aux;
std::optional<DictIterator> tbl_iter;
std::optional<DictIterator> tbl_end;
};
// Class for simple step-wise iteration across an integer range.
// Suitable for iterating over vectors or strings.
class StepIterInfo {
public:
// We do some cycle-squeezing by not having a constructor to
// initialize our member variables, since we impose a discipline
// that any use of the object starts with InitLoop(). That lets
// us use quasi-static objects for non-recursive functions.
// Initializes for looping over the elements of a raw vector.
void InitLoop(const std::vector<std::optional<ZVal>>* _vv)
{
vv = _vv;
n = vv->size();
iter = 0;
}
// Initializes for looping over the elements of a raw string.
void InitLoop(const String* _s)
{
s = _s;
n = s->Len();
iter = 0;
}
// True if we're done iterating, false if not.
bool IsDoneIterating() const
{
return iter >= n;
}
// Indicates that the current iteration is finished.
void IterFinished()
{
++iter;
}
// Counter of where we are in the iteration.
bro_uint_t iter; // initialized to 0 at start of loop
bro_uint_t n; // we loop from 0 ... n-1
// The low-level value we're iterating over.
const std::vector<std::optional<ZVal>>* vv;
const String* s;
};
} // namespace zeek::detail

View file

@ -0,0 +1,172 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Methods relating to low-level ZAM instruction manipulation.
#include "zeek/Reporter.h"
#include "zeek/Desc.h"
#include "zeek/script_opt/ZAM/Compile.h"
#include "zeek/script_opt/ScriptOpt.h"
namespace zeek::detail {
const ZAMStmt ZAMCompiler::StartingBlock()
{
return ZAMStmt(insts1.size());
}
const ZAMStmt ZAMCompiler::FinishBlock(const ZAMStmt /* start */)
{
return ZAMStmt(insts1.size() - 1);
}
bool ZAMCompiler::NullStmtOK() const
{
// They're okay iff they're the entire statement body.
return insts1.size() == 0;
}
const ZAMStmt ZAMCompiler::EmptyStmt()
{
return ZAMStmt(insts1.size() - 1);
}
const ZAMStmt ZAMCompiler::LastInst()
{
return ZAMStmt(insts1.size() - 1);
}
const ZAMStmt ZAMCompiler::ErrorStmt()
{
return ZAMStmt(0);
}
OpaqueVals* ZAMCompiler::BuildVals(const ListExprPtr& l)
{
return new OpaqueVals(InternalBuildVals(l.get()));
}
ZInstAux* ZAMCompiler::InternalBuildVals(const ListExpr* l, int stride)
{
auto exprs = l->Exprs();
int n = exprs.length();
auto aux = new ZInstAux(n * stride);
int offset = 0; // offset into aux info
for ( int i = 0; i < n; ++i )
{
auto& e = exprs[i];
int num_vals = InternalAddVal(aux, offset, e);
ASSERT(num_vals == stride);
offset += num_vals;
}
return aux;
}
int ZAMCompiler::InternalAddVal(ZInstAux* zi, int i, Expr* e)
{
if ( e->Tag() == EXPR_ASSIGN )
{ // We're building up a table constructor
auto& indices = e->GetOp1()->AsListExpr()->Exprs();
auto val = e->GetOp2();
int width = indices.length();
for ( int j = 0; j < width; ++j )
ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1);
ASSERT(InternalAddVal(zi, i + width, val.get()) == 1);
return width + 1;
}
if ( e->Tag() == EXPR_LIST )
{ // We're building up a set constructor
auto& indices = e->AsListExpr()->Exprs();
int width = indices.length();
for ( int j = 0; j < width; ++j )
ASSERT(InternalAddVal(zi, i + j, indices[j]) == 1);
return width;
}
if ( e->Tag() == EXPR_FIELD_ASSIGN )
{
// These can appear when we're processing the expression
// list for a record constructor.
auto fa = e->AsFieldAssignExpr();
e = fa->GetOp1().get();
if ( e->GetType()->Tag() == TYPE_TYPE )
{
// Ugh - we actually need a "type" constant.
auto v = e->Eval(nullptr);
ASSERT(v);
zi->Add(i, v);
return 1;
}
// Now that we've adjusted, fall through.
}
if ( e->Tag() == EXPR_NAME )
zi->Add(i, FrameSlot(e->AsNameExpr()), e->GetType());
else
zi->Add(i, e->AsConstExpr()->ValuePtr());
return 1;
}
const ZAMStmt ZAMCompiler::AddInst(const ZInstI& inst)
{
ZInstI* i;
if ( pending_inst )
{
i = pending_inst;
pending_inst = nullptr;
}
else
i = new ZInstI();
*i = inst;
insts1.push_back(i);
top_main_inst = insts1.size() - 1;
if ( pending_global_store < 0 )
return ZAMStmt(top_main_inst);
auto global_slot = pending_global_store;
pending_global_store = -1;
auto store_inst = ZInstI(OP_STORE_GLOBAL_V, global_slot);
store_inst.op_type = OP_V_I1;
store_inst.t = globalsI[global_slot].id->GetType();
return AddInst(store_inst);
}
const Stmt* ZAMCompiler::LastStmt(const Stmt* s) const
{
if ( s->Tag() == STMT_LIST )
{
auto sl = s->AsStmtList()->Stmts();
return sl[sl.length() - 1];
}
else
return s;
}
ZAMStmt ZAMCompiler::PrevStmt(const ZAMStmt s)
{
return ZAMStmt(s.stmt_num - 1);
}
} // zeek::detail

1154
src/script_opt/ZAM/Stmt.cc Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,106 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Low-level support utilities/globals for ZAM compilation.
#include "zeek/Reporter.h"
#include "zeek/Desc.h"
#include "zeek/ZeekString.h"
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/ZAM/Support.h"
namespace zeek::detail {
const Stmt* curr_stmt;
TypePtr log_ID_enum_type;
TypePtr any_base_type;
bool ZAM_error = false;
bool is_ZAM_compilable(const ProfileFunc* pf, const char** reason)
{
if ( pf->NumLambdas() > 0 )
{
if ( reason )
*reason = "use of lambda";
return false;
}
if ( pf->NumWhenStmts() > 0 )
{
if ( reason )
*reason = "use of \"when\"";
return false;
}
return true;
}
bool IsAny(const Type* t)
{
return t->Tag() == TYPE_ANY;
}
StringVal* ZAM_to_lower(const StringVal* sv)
{
auto bs = sv->AsString();
const u_char* s = bs->Bytes();
int n = bs->Len();
u_char* lower_s = new u_char[n + 1];
u_char* ls = lower_s;
for ( int i = 0; i < n; ++i )
{
if ( isascii(s[i]) && isupper(s[i]) )
*ls++ = tolower(s[i]);
else
*ls++ = s[i];
}
*ls++ = '\0';
return new StringVal(new String(1, lower_s, n));
}
StringVal* ZAM_sub_bytes(const StringVal* s, bro_uint_t start, bro_int_t n)
{
if ( start > 0 )
--start; // make it 0-based
auto ss = s->AsString()->GetSubstring(start, n);
return new StringVal(ss ? ss : new String(""));
}
void ZAM_run_time_error(const char* msg)
{
fprintf(stderr, "%s\n", msg);
ZAM_error = true;
}
void ZAM_run_time_error(const Location* loc, const char* msg)
{
reporter->RuntimeError(loc, "%s", msg);
ZAM_error = true;
}
void ZAM_run_time_error(const char* msg, const Obj* o)
{
fprintf(stderr, "%s: %s\n", msg, obj_desc(o).c_str());
ZAM_error = true;
}
void ZAM_run_time_error(const Location* loc, const char* msg, const Obj* o)
{
reporter->RuntimeError(loc, "%s (%s)", msg, obj_desc(o).c_str());
ZAM_error = true;
}
void ZAM_run_time_warning(const Location* loc, const char* msg)
{
ODesc d;
loc->Describe(&d);
reporter->Warning("%s: %s", d.Description(), msg);
}
} // namespace zeek::detail

View file

@ -0,0 +1,53 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Low-level support utilities/globals for ZAM compilation.
#pragma once
#include "zeek/Expr.h"
#include "zeek/Stmt.h"
namespace zeek::detail {
typedef std::vector<ValPtr> val_vec;
// The (reduced) statement currently being compiled. Used for both
// tracking "use" and "reaching" definitions, and for error messages.
extern const Stmt* curr_stmt;
// True if a function with the given profile can be compiled to ZAM.
// If not, returns the reason in *reason, if non-nil.
class ProfileFunc;
extern bool is_ZAM_compilable(const ProfileFunc* pf,
const char** reason = nullptr);
// True if a given type is one that we treat internally as an "any" type.
extern bool IsAny(const Type* t);
// Convenience functions for getting to these.
inline bool IsAny(const TypePtr& t) { return IsAny(t.get()); }
inline bool IsAny(const Expr* e) { return IsAny(e->GetType()); }
// Needed for the logging built-in. Exported so that ZAM can make sure it's
// defined when compiling.
extern TypePtr log_ID_enum_type;
// Needed for a slight performance gain when dealing with "any" types.
extern TypePtr any_base_type;
extern void ZAM_run_time_error(const char* msg);
extern void ZAM_run_time_error(const Location* loc, const char* msg);
extern void ZAM_run_time_error(const Location* loc, const char* msg,
const Obj* o);
extern void ZAM_run_time_error(const Stmt* stmt, const char* msg);
extern void ZAM_run_time_error(const char* msg, const Obj* o);
extern bool ZAM_error;
extern void ZAM_run_time_warning(const Location* loc, const char* msg);
extern StringVal* ZAM_to_lower(const StringVal* sv);
extern StringVal* ZAM_sub_bytes(const StringVal* s, bro_uint_t start, bro_int_t n);
} // namespace zeek::detail

160
src/script_opt/ZAM/Vars.cc Normal file
View file

@ -0,0 +1,160 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Methods for dealing with variables (both ZAM and script-level).
#include "zeek/Reporter.h"
#include "zeek/Desc.h"
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/Reduce.h"
#include "zeek/script_opt/ZAM/Compile.h"
namespace zeek::detail {
bool ZAMCompiler::IsUnused(const IDPtr& id, const Stmt* where) const
{
if ( ! ud->HasUsage(where) )
return true;
auto usage = ud->GetUsage(where);
// "usage" can be nil if due to constant propagation we've prune
// all of the uses of the given identifier.
return ! usage || ! usage->HasID(id.get());
}
void ZAMCompiler::LoadParam(ID* id)
{
if ( id->IsType() )
reporter->InternalError("don't know how to compile local variable that's a type not a value");
bool is_any = IsAny(id->GetType());
ZOp op;
op = AssignmentFlavor(OP_LOAD_VAL_VV, id->GetType()->Tag());
int slot = AddToFrame(id);
ZInstI z(op, slot, id->Offset());
z.SetType(id->GetType());
z.op_type = OP_VV_FRAME;
(void) AddInst(z);
}
const ZAMStmt ZAMCompiler::LoadGlobal(ID* id)
{
ZOp op;
if ( id->IsType() )
// Need a special load for these, as they don't fit
// with the usual template.
op = OP_LOAD_GLOBAL_TYPE_VV;
else
op = AssignmentFlavor(OP_LOAD_GLOBAL_VV, id->GetType()->Tag());
auto slot = RawSlot(id);
ZInstI z(op, slot, global_id_to_info[id]);
z.SetType(id->GetType());
z.op_type = OP_VV_I2;
// We use the id_val for reporting used-but-not-set errors.
z.aux = new ZInstAux(0);
z.aux->id_val = id;
return AddInst(z);
}
int ZAMCompiler::AddToFrame(ID* id)
{
frame_layout1[id] = frame_sizeI;
frame_denizens.push_back(id);
return frame_sizeI++;
}
int ZAMCompiler::FrameSlot(const ID* id)
{
auto slot = RawSlot(id);
if ( id->IsGlobal() )
(void) LoadGlobal(frame_denizens[slot]);
return slot;
}
int ZAMCompiler::Frame1Slot(const ID* id, ZAMOp1Flavor fl)
{
auto slot = RawSlot(id);
switch ( fl ) {
case OP1_READ:
if ( id->IsGlobal() )
(void) LoadGlobal(frame_denizens[slot]);
break;
case OP1_WRITE:
if ( id->IsGlobal() )
pending_global_store = global_id_to_info[id];
break;
case OP1_READ_WRITE:
if ( id->IsGlobal() )
{
(void) LoadGlobal(frame_denizens[slot]);
pending_global_store = global_id_to_info[id];
}
break;
case OP1_INTERNAL:
break;
}
return slot;
}
int ZAMCompiler::RawSlot(const ID* id)
{
auto id_slot = frame_layout1.find(id);
if ( id_slot == frame_layout1.end() )
reporter->InternalError("ID %s missing from frame layout", id->Name());
return id_slot->second;
}
bool ZAMCompiler::HasFrameSlot(const ID* id) const
{
return frame_layout1.find(id) != frame_layout1.end();
}
int ZAMCompiler::NewSlot(bool is_managed)
{
char buf[8192];
snprintf(buf, sizeof buf, "#internal-%d#", frame_sizeI);
// In the following, all that matters is that for managed types
// we pick a tag that will be viewed as managed, and vice versa.
auto tag = is_managed ? TYPE_TABLE : TYPE_VOID;
auto internal_reg = new ID(buf, SCOPE_FUNCTION, false);
internal_reg->SetType(base_type(tag));
return AddToFrame(internal_reg);
}
int ZAMCompiler::TempForConst(const ConstExpr* c)
{
auto slot = NewSlot(c->GetType());
auto z = ZInstI(OP_ASSIGN_CONST_VC, slot, c);
z.CheckIfManaged(c->GetType());
(void) AddInst(z);
return slot;
}
} // zeek::detail

539
src/script_opt/ZAM/ZBody.cc Normal file
View file

@ -0,0 +1,539 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/Desc.h"
#include "zeek/RE.h"
#include "zeek/Frame.h"
#include "zeek/EventHandler.h"
#include "zeek/Trigger.h"
#include "zeek/Traverse.h"
#include "zeek/Reporter.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/ZAM/Compile.h"
// Needed for managing the corresponding values.
#include "zeek/File.h"
#include "zeek/Func.h"
#include "zeek/OpaqueVal.h"
// Just needed for BiFs.
#include "zeek/analyzer/Manager.h"
#include "zeek/broker/Manager.h"
#include "zeek/file_analysis/Manager.h"
#include "zeek/logging/Manager.h"
namespace zeek::detail {
using std::vector;
static bool did_init = false;
// Count of how often each type of ZOP executed, and how much CPU it
// cumulatively took.
int ZOP_count[OP_NOP+1];
double ZOP_CPU[OP_NOP+1];
void report_ZOP_profile()
{
for ( int i = 1; i <= OP_NOP; ++i )
if ( ZOP_count[i] > 0 )
printf("%s\t%d\t%.06f\n", ZOP_name(ZOp(i)),
ZOP_count[i], ZOP_CPU[i]);
}
// Sets the given element to a copy of an existing (not newly constructed)
// ZVal, including underlying memory management. Returns false if the
// assigned value was missing (which we can only tell for managed types),
// true otherwise.
static bool copy_vec_elem(VectorVal* vv, int ind, ZVal zv, const TypePtr& t)
{
if ( vv->Size() <= ind )
vv->Resize(ind + 1);
auto& elem = (*vv->RawVec())[ind];
if ( ! ZVal::IsManagedType(t) )
{
elem = zv;
return true;
}
if ( elem )
ZVal::DeleteManagedType(*elem);
elem = zv;
auto managed_elem = elem->ManagedVal();
if ( ! managed_elem )
{
elem = std::nullopt;
return false;
}
zeek::Ref(managed_elem);
return true;
}
// Unary vector operations never work on managed types, so no need
// to pass in the type ... However, the RHS, which normally would
// be const, needs to be non-const so we can use its Type() method
// to get at a shareable VectorType.
static void vec_exec(ZOp op, VectorVal*& v1, VectorVal* v2, const ZInst& z);
// Binary operations *can* have managed types (strings).
static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1, VectorVal* v2,
const VectorVal* v3, const ZInst& z);
// Vector coercion.
//
// ### Should check for underflow/overflow.
#define VEC_COERCE(tag, lhs_type, cast, rhs_accessor) \
static VectorVal* vec_coerce_##tag(VectorVal* vec) \
{ \
auto& v = *vec->RawVec(); \
auto yt = make_intrusive<VectorType>(base_type(lhs_type)); \
auto res_zv = new VectorVal(yt); \
auto n = v.size(); \
res_zv->Resize(n); \
auto& res = *res_zv->RawVec(); \
for ( auto i = 0U; i < n; ++i ) \
if ( v[i] ) \
res[i] = ZVal(cast((*v[i]).rhs_accessor)); \
else \
res[i] = std::nullopt; \
return res_zv; \
}
VEC_COERCE(IU, TYPE_INT, bro_int_t, AsCount())
VEC_COERCE(ID, TYPE_INT, bro_int_t, AsDouble())
VEC_COERCE(UI, TYPE_COUNT, bro_int_t, AsInt())
VEC_COERCE(UD, TYPE_COUNT, bro_uint_t, AsDouble())
VEC_COERCE(DI, TYPE_DOUBLE, double, AsInt())
VEC_COERCE(DU, TYPE_DOUBLE, double, AsCount())
double curr_CPU_time()
{
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
return double(ts.tv_sec) + double(ts.tv_nsec) / 1e9;
}
ZBody::ZBody(const char* _func_name, const ZAMCompiler* zc)
: Stmt(STMT_ZAM)
{
func_name = _func_name;
frame_denizens = zc->FrameDenizens();
frame_size = frame_denizens.size();
// Concretize the names of the frame denizens.
for ( auto& f : frame_denizens )
for ( auto i = 0U; i < f.ids.size(); ++i )
f.names.push_back(f.ids[i]->Name());
managed_slots = zc->ManagedSlots();
globals = zc->Globals();
num_globals = globals.size();
int_cases = zc->GetCases<bro_int_t>();
uint_cases = zc->GetCases<bro_uint_t>();
double_cases = zc->GetCases<double>();
str_cases = zc->GetCases<std::string>();
if ( zc->NonRecursive() )
{
fixed_frame = new ZVal[frame_size];
for ( auto i = 0U; i < managed_slots.size(); ++i )
fixed_frame[managed_slots[i]].ClearManagedVal();
}
table_iters = zc->GetTableIters();
num_step_iters = zc->NumStepIters();
// It's a little weird doing this in the constructor, but unless
// we add a general "initialize for ZAM" function, this is as good
// a place as any.
if ( ! did_init )
{
auto log_ID_type = lookup_ID("ID", "Log");
ASSERT(log_ID_type);
log_ID_enum_type = log_ID_type->GetType<EnumType>();
any_base_type = base_type(TYPE_ANY);
ZVal::SetZValNilStatusAddr(&ZAM_error);
did_init = false;
}
}
ZBody::~ZBody()
{
delete[] fixed_frame;
delete[] insts;
delete inst_count;
delete CPU_time;
}
void ZBody::SetInsts(vector<ZInst*>& _insts)
{
ninst = _insts.size();
auto insts_copy = new ZInst[ninst];
for ( auto i = 0U; i < ninst; ++i )
insts_copy[i] = *_insts[i];
insts = insts_copy;
InitProfile();
}
void ZBody::SetInsts(vector<ZInstI*>& instsI)
{
ninst = instsI.size();
auto insts_copy = new ZInst[ninst];
for ( auto i = 0U; i < ninst; ++i )
{
auto& iI = *instsI[i];
insts_copy[i] = iI;
if ( iI.stmt )
insts_copy[i].loc = iI.stmt->Original()->GetLocationInfo();
}
insts = insts_copy;
InitProfile();
}
void ZBody::InitProfile()
{
if ( analysis_options.profile_ZAM )
{
inst_count = new vector<int>;
inst_CPU = new vector<double>;
for ( auto i = 0U; i < ninst; ++i )
{
inst_count->push_back(0);
inst_CPU->push_back(0.0);
}
CPU_time = new double;
*CPU_time = 0.0;
}
}
ValPtr ZBody::Exec(Frame* f, StmtFlowType& flow)
{
#ifdef DEBUG
double t = analysis_options.profile_ZAM ? curr_CPU_time() : 0.0;
#endif
auto val = DoExec(f, 0, flow);
#ifdef DEBUG
if ( analysis_options.profile_ZAM )
*CPU_time += curr_CPU_time() - t;
#endif
return val;
}
ValPtr ZBody::DoExec(Frame* f, int start_pc, StmtFlowType& flow)
{
int pc = start_pc;
const int end_pc = ninst;
// Return value, or nil if none.
const ZVal* ret_u;
// Type of the return value. If nil, then we don't have a value.
TypePtr ret_type;
#ifdef DEBUG
bool do_profile = analysis_options.profile_ZAM;
#endif
ZVal* frame;
std::unique_ptr<TableIterVec> local_table_iters;
std::vector<StepIterInfo> step_iters(num_step_iters);
if ( fixed_frame )
frame = fixed_frame;
else
{
frame = new ZVal[frame_size];
// Clear slots for which we do explicit memory management.
for ( auto s : managed_slots )
frame[s].ClearManagedVal();
if ( table_iters.size() > 0 )
{
local_table_iters =
std::make_unique<TableIterVec>(table_iters.size());
*local_table_iters = table_iters;
tiv_ptr = &(*local_table_iters);
}
}
flow = FLOW_RETURN; // can be over-written by a Hook-Break
while ( pc < end_pc && ! ZAM_error ) {
auto& z = insts[pc];
#ifdef DEBUG
int profile_pc;
double profile_CPU;
if ( do_profile )
{
++ZOP_count[z.op];
++(*inst_count)[pc];
profile_pc = pc;
profile_CPU = curr_CPU_time();
}
#endif
switch ( z.op ) {
case OP_NOP:
break;
#include "ZAM-EvalMacros.h"
#include "ZAM-EvalDefs.h"
default:
reporter->InternalError("bad ZAM opcode");
}
#ifdef DEBUG
if ( do_profile )
{
double dt = curr_CPU_time() - profile_CPU;
(*inst_CPU)[profile_pc] += dt;
ZOP_CPU[z.op] += dt;
}
#endif
++pc;
}
auto result = ret_type ? ret_u->ToVal(ret_type) : nullptr;
if ( fixed_frame )
{
// Make sure we don't have any dangling iterators.
for ( auto& ti : table_iters )
ti.Clear();
// Free slots for which we do explicit memory management,
// preparing them for reuse.
for ( auto i = 0U; i < managed_slots.size(); ++i )
{
auto& v = frame[managed_slots[i]];
ZVal::DeleteManagedType(v);
v.ClearManagedVal();
}
}
else
{
// Free those slots for which we do explicit memory management.
// No need to then clear them, as we're about to throw away
// the entire frame.
for ( auto i = 0U; i < managed_slots.size(); ++i )
{
auto& v = frame[managed_slots[i]];
ZVal::DeleteManagedType(v);
}
delete [] frame;
}
// Clear any error state.
ZAM_error = false;
return result;
}
void ZBody::ProfileExecution() const
{
if ( inst_count->size() == 0 )
{
printf("%s has an empty body\n", func_name);
return;
}
if ( (*inst_count)[0] == 0 )
{
printf("%s did not execute\n", func_name);
return;
}
printf("%s CPU time: %.06f\n", func_name, *CPU_time);
for ( auto i = 0U; i < inst_count->size(); ++i )
{
printf("%s %d %d %.06f ", func_name, i,
(*inst_count)[i], (*inst_CPU)[i]);
insts[i].Dump(i, &frame_denizens);
}
}
bool ZBody::CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type,
const Location* loc) const
{
if ( IsAny(expected_type) )
return true;
if ( ! same_type(any_type, expected_type, false, false) )
{
auto at = any_type->Tag();
auto et = expected_type->Tag();
if ( at == TYPE_RECORD && et == TYPE_RECORD )
{
auto at_r = any_type->AsRecordType();
auto et_r = expected_type->AsRecordType();
if ( record_promotion_compatible(et_r, at_r) )
return true;
}
char buf[8192];
snprintf(buf, sizeof buf, "run-time type clash (%s/%s)",
type_name(at), type_name(et));
reporter->RuntimeError(loc, "%s", buf);
return false;
}
return true;
}
void ZBody::Dump() const
{
printf("Frame:\n");
for ( unsigned i = 0; i < frame_denizens.size(); ++i )
{
auto& d = frame_denizens[i];
printf("frame[%d] =", i);
if ( d.names.size() > 0 )
for ( auto& n : d.names )
printf(" %s", n);
else
for ( auto& id : d.ids )
printf(" %s", id->Name());
printf("\n");
}
printf("Final code:\n");
for ( unsigned i = 0; i < ninst; ++i )
{
auto& inst = insts[i];
printf("%d: ", i);
inst.Dump(i, &frame_denizens);
}
}
void ZBody::StmtDescribe(ODesc* d) const
{
d->AddSP("ZAM-code");
d->AddSP(func_name);
}
TraversalCode ZBody::Traverse(TraversalCallback* cb) const
{
TraversalCode tc = cb->PreStmt(this);
HANDLE_TC_STMT_PRE(tc);
tc = cb->PostStmt(this);
HANDLE_TC_STMT_POST(tc);
}
ValPtr ZAMResumption::Exec(Frame* f, StmtFlowType& flow)
{
return am->DoExec(f, xfer_pc, flow);
}
void ZAMResumption::StmtDescribe(ODesc* d) const
{
d->Add("<resumption of compiled code>");
}
TraversalCode ZAMResumption::Traverse(TraversalCallback* cb) const
{
TraversalCode tc = cb->PreStmt(this);
HANDLE_TC_STMT_PRE(tc);
tc = cb->PostStmt(this);
HANDLE_TC_STMT_POST(tc);
}
// Unary vector operation of v1 <vec-op> v2.
static void vec_exec(ZOp op, VectorVal*& v1, VectorVal* v2, const ZInst& z)
{
// We could speed this up further still by gen'ing up an instance
// of the loop inside each switch case (in which case we might as
// well move the whole kit-and-caboodle into the Exec method). But
// that seems like a lot of code bloat for only a very modest gain.
auto& vec2 = *v2->RawVec();
auto n = vec2.size();
auto vec1_ptr = new vector<std::optional<ZVal>>(n);
auto& vec1 = *vec1_ptr;
for ( auto i = 0U; i < n; ++i )
switch ( op ) {
#include "ZAM-Vec1EvalDefs.h"
default:
reporter->InternalError("bad invocation of VecExec");
}
auto vt = cast_intrusive<VectorType>(v2->GetType());
auto old_v1 = v1;
v1 = new VectorVal(std::move(vt), vec1_ptr);
Unref(old_v1);
}
// Binary vector operation of v1 = v2 <vec-op> v3.
static void vec_exec(ZOp op, TypePtr t, VectorVal*& v1,
VectorVal* v2, const VectorVal* v3, const ZInst& z)
{
// See comment above re further speed-up.
auto& vec2 = *v2->RawVec();
auto& vec3 = *v3->RawVec();
auto n = vec2.size();
auto vec1_ptr = new vector<std::optional<ZVal>>(n);
auto& vec1 = *vec1_ptr;
for ( auto i = 0U; i < vec2.size(); ++i )
switch ( op ) {
#include "ZAM-Vec2EvalDefs.h"
default:
reporter->InternalError("bad invocation of VecExec");
}
auto vt = cast_intrusive<VectorType>(std::move(t));
auto old_v1 = v1;
v1 = new VectorVal(std::move(vt), vec1_ptr);
Unref(old_v1);
}
} // zeek::detail

147
src/script_opt/ZAM/ZBody.h Normal file
View file

@ -0,0 +1,147 @@
// See the file "COPYING" in the main distribution directory for copyright.
// ZBody: ZAM function body that replaces a function's original AST body.
#pragma once
#include "zeek/script_opt/ZAM/IterInfo.h"
#include "zeek/script_opt/ZAM/Support.h"
namespace zeek::detail {
// Static information about globals used in a function.
class GlobalInfo {
public:
IDPtr id;
int slot;
};
// These are the counterparts to CaseMapI and CaseMapsI in ZAM.h,
// but concretized to use instruction numbers rather than pointers
// to instructions.
template<typename T> using CaseMap = std::map<T, int>;
template<typename T> using CaseMaps = std::vector<CaseMap<T>>;
using TableIterVec = std::vector<TableIterInfo>;
class ZBody : public Stmt {
public:
ZBody(const char* _func_name, const ZAMCompiler* zc);
~ZBody() override;
// These are split out from the constructor to allow construction
// of a ZBody from either save-file full instructions (first method)
// or intermediary instructions (second method).
void SetInsts(std::vector<ZInst*>& insts);
void SetInsts(std::vector<ZInstI*>& instsI);
ValPtr Exec(Frame* f, StmtFlowType& flow) override;
// Older code exists for save files, but let's see if we can
// avoid having to support them, as they're a fairly elaborate
// production.
//
// void SaveTo(FILE* f, int interp_frame_size) const;
void Dump() const;
void ProfileExecution() const;
protected:
friend class ZAMResumption;
// Initializes profiling information, if needed.
void InitProfile();
ValPtr DoExec(Frame* f, int start_pc, StmtFlowType& flow);
// Run-time checking for "any" type being consistent with
// expected typed. Returns true if the type match is okay.
bool CheckAnyType(const TypePtr& any_type, const TypePtr& expected_type,
const Location* loc) const;
StmtPtr Duplicate() override { return {NewRef{}, this}; }
void StmtDescribe(ODesc* d) const override;
TraversalCode Traverse(TraversalCallback* cb) const override;
private:
const char* func_name;
const ZInst* insts = nullptr;
unsigned int ninst;
FrameReMap frame_denizens;
int frame_size;
// A list of frame slots that correspond to managed values.
std::vector<int> managed_slots;
// This is non-nil if the function is (asserted to be) non-recursive,
// in which case we pre-allocate this.
ZVal* fixed_frame = nullptr;
// Pre-allocated table iteration values. For recursive invocations,
// these are copied into a local stack variable, but for non-recursive
// functions they can be used directly.
TableIterVec table_iters;
// Points to the TableIterVec used to manage iteration over tables.
// For non-recursive functions, we just use the static one, but
// for recursive ones this points to the local stack variable.
TableIterVec* tiv_ptr = &table_iters;
// Number of StepIterInfo's required by the function. These we
// always create using a local stack variable, since they don't
// require any overhead or cleanup.
int num_step_iters;
std::vector<GlobalInfo> globals;
int num_globals;
// The following are only maintained if we're doing profiling.
//
// These need to be pointers so we can manipulate them in a
// const method.
std::vector<int>* inst_count = nullptr; // for profiling
double* CPU_time = nullptr; // cumulative CPU time for the program
std::vector<double>* inst_CPU; // per-instruction CPU time.
CaseMaps<bro_int_t> int_cases;
CaseMaps<bro_uint_t> uint_cases;
CaseMaps<double> double_cases;
CaseMaps<std::string> str_cases;
};
// This is a statement that resumes execution into a code block in a
// ZBody. Used for deferred execution for "when" statements.
class ZAMResumption : public Stmt {
public:
ZAMResumption(ZBody* _am, int _xfer_pc)
: Stmt(STMT_ZAM_RESUMPTION)
{
am = _am;
xfer_pc = _xfer_pc;
}
ValPtr Exec(Frame* f, StmtFlowType& flow) override;
StmtPtr Duplicate() override { return {NewRef{}, this}; }
void StmtDescribe(ODesc* d) const override;
protected:
TraversalCode Traverse(TraversalCallback* cb) const override;
ZBody* am;
int xfer_pc = 0;
};
// Prints the execution profile.
extern void report_ZOP_profile();
} // namespace zeek::detail

615
src/script_opt/ZAM/ZInst.cc Normal file
View file

@ -0,0 +1,615 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/Desc.h"
#include "zeek/Reporter.h"
#include "zeek/Func.h"
#include "zeek/script_opt/ZAM/ZInst.h"
using std::string;
namespace zeek::detail {
void ZInst::Dump(int inst_num, const FrameReMap* mappings) const
{
// printf("v%d ", n);
auto id1 = VName(1, inst_num, mappings);
auto id2 = VName(2, inst_num, mappings);
auto id3 = VName(3, inst_num, mappings);
auto id4 = VName(4, inst_num, mappings);
Dump(id1, id2, id3, id4);
}
void ZInst::Dump(const string& id1, const string& id2, const string& id3,
const string& id4) const
{
printf("%s ", ZOP_name(op));
// printf("(%s) ", op_type_name(op_type));
if ( t && 0 )
printf("(%s) ", type_name(t->Tag()));
switch ( op_type ) {
case OP_X:
break;
case OP_V:
printf("%s", id1.c_str());
break;
case OP_VV:
printf("%s, %s", id1.c_str(), id2.c_str());
break;
case OP_VVV:
printf("%s, %s, %s", id1.c_str(), id2.c_str(), id3.c_str());
break;
case OP_VVVV:
printf("%s, %s, %s, %s", id1.c_str(), id2.c_str(), id3.c_str(),
id4.c_str());
break;
case OP_VVVC:
printf("%s, %s, %s, %s", id1.c_str(), id2.c_str(), id3.c_str(),
ConstDump().c_str());
break;
case OP_C:
printf("%s", ConstDump().c_str());
break;
case OP_VC:
printf("%s, %s", id1.c_str(), ConstDump().c_str());
break;
case OP_VVC:
printf("%s, %s, %s", id1.c_str(), id2.c_str(),
ConstDump().c_str());
break;
case OP_V_I1:
printf("%d", v1);
break;
case OP_VC_I1:
printf("%d %s", v1, ConstDump().c_str());
break;
case OP_VV_FRAME:
printf("%s, interpreter frame[%d]", id1.c_str(), v2);
break;
case OP_VV_I2:
printf("%s, %d", id1.c_str(), v2);
break;
case OP_VV_I1_I2:
printf("%d, %d", v1, v2);
break;
case OP_VVC_I2:
printf("%s, %d, %s", id1.c_str(), v2, ConstDump().c_str());
break;
case OP_VVV_I3:
printf("%s, %s, %d", id1.c_str(), id2.c_str(), v3);
break;
case OP_VVV_I2_I3:
printf("%s, %d, %d", id1.c_str(), v2, v3);
break;
case OP_VVVV_I4:
printf("%s, %s, %s, %d", id1.c_str(), id2.c_str(), id3.c_str(),
v4);
break;
case OP_VVVV_I3_I4:
printf("%s, %s, %d, %d", id1.c_str(), id2.c_str(), v3, v4);
break;
case OP_VVVV_I2_I3_I4:
printf("%s, %d, %d, %d", id1.c_str(), v2, v3, v4);
break;
case OP_VVVC_I3:
printf("%s, %s, %d, %s", id1.c_str(), id2.c_str(), v3,
ConstDump().c_str());
break;
case OP_VVVC_I2_I3:
printf("%s, %d, %d, %s", id1.c_str(), v2, v3,
ConstDump().c_str());
break;
case OP_VVVC_I1_I2_I3:
printf("%d, %d, %d, %s", v1, v2, v3, ConstDump().c_str());
break;
}
if ( func )
printf(" (func %s)", func->Name());
printf("\n");
}
int ZInst::NumFrameSlots() const
{
switch ( op_type ) {
case OP_X: return 0;
case OP_V: return 1;
case OP_VV: return 2;
case OP_VVV: return 3;
case OP_VVVV: return 4;
case OP_VVVC: return 3;
case OP_C: return 0;
case OP_VC: return 1;
case OP_VVC: return 2;
case OP_V_I1: return 0;
case OP_VC_I1: return 0;
case OP_VV_I1_I2: return 0;
case OP_VV_FRAME: return 1;
case OP_VV_I2: return 1;
case OP_VVC_I2: return 1;
case OP_VVV_I3: return 2;
case OP_VVV_I2_I3: return 1;
case OP_VVVV_I4: return 3;
case OP_VVVV_I3_I4: return 2;
case OP_VVVV_I2_I3_I4: return 1;
case OP_VVVC_I3: return 2;
case OP_VVVC_I2_I3: return 1;
case OP_VVVC_I1_I2_I3: return 0;
}
}
int ZInst::NumSlots() const
{
switch ( op_type ) {
case OP_X: return 0;
case OP_C: return 0;
case OP_V: return 1;
case OP_VC: return 1;
case OP_VV: return 2;
case OP_VVC: return 2;
case OP_VVV: return 3;
case OP_VVVC: return 3;
case OP_VVVV: return 4;
case OP_V_I1: return 1;
case OP_VC_I1: return 1;
case OP_VV_I1_I2: return 2;
case OP_VV_FRAME: return 2;
case OP_VV_I2: return 2;
case OP_VVC_I2: return 2;
case OP_VVV_I3: return 3;
case OP_VVV_I2_I3: return 3;
case OP_VVVC_I3: return 3;
case OP_VVVC_I2_I3: return 3;
case OP_VVVC_I1_I2_I3: return 3;
case OP_VVVV_I4: return 4;
case OP_VVVV_I3_I4: return 4;
case OP_VVVV_I2_I3_I4: return 4;
}
}
string ZInst::VName(int n, int inst_num, const FrameReMap* mappings) const
{
if ( n > NumFrameSlots() )
return "";
int slot = n == 1 ? v1 : (n == 2 ? v2 : (n == 3 ? v3 : v4));
if ( slot < 0 )
return "<special>";
// Find which identifier manifests at this instruction.
ASSERT(slot >= 0 && slot < mappings->size());
auto& map = (*mappings)[slot];
unsigned int i;
for ( i = 0; i < map.id_start.size(); ++i )
{
// If the slot is right at the boundary between two
// identifiers, then it matters whether this is slot 1
// (starts right here) vs. slot > 1 (ignore change right
// at the boundary and stick with older value).
if ( (n == 1 && map.id_start[i] > inst_num) ||
(n > 1 && map.id_start[i] >= inst_num) )
// Went too far.
break;
}
if ( i < map.id_start.size() )
{
ASSERT(i > 0);
}
auto id = map.names.size() > 0 ? map.names[i-1] : map.ids[i-1]->Name();
return util::fmt("%d (%s)", slot, id);
}
ValPtr ZInst::ConstVal() const
{
switch ( op_type ) {
case OP_C:
case OP_VC:
case OP_VC_I1:
case OP_VVC:
case OP_VVC_I2:
case OP_VVVC:
case OP_VVVC_I3:
case OP_VVVC_I2_I3:
case OP_VVVC_I1_I2_I3:
return c.ToVal(t);
case OP_X:
case OP_V:
case OP_VV:
case OP_VVV:
case OP_VVVV:
case OP_V_I1:
case OP_VV_FRAME:
case OP_VV_I2:
case OP_VV_I1_I2:
case OP_VVV_I3:
case OP_VVV_I2_I3:
case OP_VVVV_I4:
case OP_VVVV_I3_I4:
case OP_VVVV_I2_I3_I4:
return nullptr;
}
}
string ZInst::ConstDump() const
{
auto v = ConstVal();
ODesc d;
d.Clear();
v->Describe(&d);
return d.Description();
}
void ZInstI::Dump(const FrameMap* frame_ids, const FrameReMap* remappings) const
{
int n = NumFrameSlots();
// printf("v%d ", n);
auto id1 = VName(1, frame_ids, remappings);
auto id2 = VName(2, frame_ids, remappings);
auto id3 = VName(3, frame_ids, remappings);
auto id4 = VName(4, frame_ids, remappings);
ZInst::Dump(id1, id2, id3, id4);
}
string ZInstI::VName(int n, const FrameMap* frame_ids,
const FrameReMap* remappings) const
{
if ( n > NumFrameSlots() )
return "";
int slot = n == 1 ? v1 : (n == 2 ? v2 : (n == 3 ? v3 : v4));
if ( slot < 0 )
return "<special>";
const ID* id;
if ( remappings && live )
{ // Find which identifier manifests at this instruction.
ASSERT(slot >= 0 && slot < remappings->size());
auto& map = (*remappings)[slot];
unsigned int i;
for ( i = 0; i < map.id_start.size(); ++i )
{
// See discussion for ZInst::VName.
if ( (n == 1 && map.id_start[i] > inst_num) ||
(n > 1 && map.id_start[i] >= inst_num) )
// Went too far.
break;
}
if ( i < map.id_start.size() )
{
ASSERT(i > 0);
}
// For ZInstI's, map.ids is always populated.
id = map.ids[i-1];
}
else
id = (*frame_ids)[slot];
return util::fmt("%d (%s)", slot, id->Name());
}
bool ZInstI::DoesNotContinue() const
{
switch ( op ) {
case OP_GOTO_V:
case OP_HOOK_BREAK_X:
case OP_RETURN_C:
case OP_RETURN_V:
case OP_RETURN_X:
return true;
default:
return false;
}
}
bool ZInstI::IsDirectAssignment() const
{
if ( op_type != OP_VV )
return false;
switch ( op ) {
case OP_ASSIGN_VV_N:
case OP_ASSIGN_VV_A:
case OP_ASSIGN_VV_O:
case OP_ASSIGN_VV_P:
case OP_ASSIGN_VV_R:
case OP_ASSIGN_VV_S:
case OP_ASSIGN_VV_F:
case OP_ASSIGN_VV_T:
case OP_ASSIGN_VV_V:
case OP_ASSIGN_VV_L:
case OP_ASSIGN_VV_f:
case OP_ASSIGN_VV_t:
case OP_ASSIGN_VV:
return true;
default:
return false;
}
}
bool ZInstI::HasSideEffects() const
{
return op_side_effects[op];
}
bool ZInstI::AssignsToSlot1() const
{
switch ( op_type ) {
case OP_X:
case OP_C:
case OP_V_I1:
case OP_VC_I1:
case OP_VV_I1_I2:
case OP_VVVC_I1_I2_I3:
return false;
// We use this ginormous set of cases rather than "default" so
// that when we add a new operand type, we have to consider
// its behavior here. (Same for many of the other switch's
// used for ZInst/ZinstI.)
case OP_V:
case OP_VC:
case OP_VV_FRAME:
case OP_VV_I2:
case OP_VVC_I2:
case OP_VVV_I2_I3:
case OP_VVVC_I2_I3:
case OP_VVVV_I2_I3_I4:
case OP_VV:
case OP_VVC:
case OP_VVV_I3:
case OP_VVVV_I3_I4:
case OP_VVVC_I3:
case OP_VVV:
case OP_VVVC:
case OP_VVVV_I4:
case OP_VVVV:
auto fl = op1_flavor[op];
return fl == OP1_WRITE || fl == OP1_READ_WRITE;
}
}
bool ZInstI::UsesSlot(int slot) const
{
auto fl = op1_flavor[op];
auto v1_relevant = fl == OP1_READ || fl == OP1_READ_WRITE;
auto v1_match = v1_relevant && v1 == slot;
switch ( op_type ) {
case OP_X:
case OP_C:
case OP_V_I1:
case OP_VC_I1:
case OP_VV_I1_I2:
case OP_VVVC_I1_I2_I3:
return false;
case OP_V:
case OP_VC:
case OP_VV_FRAME:
case OP_VV_I2:
case OP_VVC_I2:
case OP_VVV_I2_I3:
case OP_VVVC_I2_I3:
case OP_VVVV_I2_I3_I4:
return v1_match;
case OP_VV:
case OP_VVC:
case OP_VVV_I3:
case OP_VVVV_I3_I4:
case OP_VVVC_I3:
return v1_match || v2 == slot;
case OP_VVV:
case OP_VVVC:
case OP_VVVV_I4:
return v1_match || v2 == slot || v3 == slot;
case OP_VVVV:
return v1_match || v2 == slot || v3 == slot || v4 == slot;
}
}
bool ZInstI::UsesSlots(int& s1, int& s2, int& s3, int& s4) const
{
s1 = s2 = s3 = s4 = -1;
auto fl = op1_flavor[op];
auto v1_relevant = fl == OP1_READ || fl == OP1_READ_WRITE;
switch ( op_type ) {
case OP_X:
case OP_C:
case OP_V_I1:
case OP_VC_I1:
case OP_VV_I1_I2:
case OP_VVVC_I1_I2_I3:
return false;
case OP_V:
case OP_VC:
case OP_VV_FRAME:
case OP_VV_I2:
case OP_VVC_I2:
case OP_VVV_I2_I3:
case OP_VVVC_I2_I3:
case OP_VVVV_I2_I3_I4:
if ( ! v1_relevant )
return false;
s1 = v1;
return true;
case OP_VV:
case OP_VVC:
case OP_VVV_I3:
case OP_VVVV_I3_I4:
case OP_VVVC_I3:
s1 = v2;
if ( v1_relevant )
s2 = v1;
return true;
case OP_VVV:
case OP_VVVC:
case OP_VVVV_I4:
s1 = v2;
s2 = v3;
if ( v1_relevant )
s3 = v1;
return true;
case OP_VVVV:
s1 = v2;
s2 = v3;
s3 = v4;
if ( v1_relevant )
s4 = v1;
return true;
}
}
void ZInstI::UpdateSlots(std::vector<int>& slot_mapping)
{
switch ( op_type ) {
case OP_X:
case OP_C:
case OP_V_I1:
case OP_VC_I1:
case OP_VV_I1_I2:
case OP_VVVC_I1_I2_I3:
return; // so we don't do any v1 remapping.
case OP_V:
case OP_VC:
case OP_VV_FRAME:
case OP_VV_I2:
case OP_VVC_I2:
case OP_VVV_I2_I3:
case OP_VVVC_I2_I3:
case OP_VVVV_I2_I3_I4:
break;
case OP_VV:
case OP_VVC:
case OP_VVV_I3:
case OP_VVVV_I3_I4:
case OP_VVVC_I3:
v2 = slot_mapping[v2];
break;
case OP_VVV:
case OP_VVVC:
case OP_VVVV_I4:
v2 = slot_mapping[v2];
v3 = slot_mapping[v3];
break;
case OP_VVVV:
v2 = slot_mapping[v2];
v3 = slot_mapping[v3];
v4 = slot_mapping[v4];
break;
}
// Note, unlike for UsesSlots() we do *not* include OP1_READ_WRITE
// here, because such instructions will already have v1 remapped
// given it's an assignment target.
if ( op1_flavor[op] == OP1_READ && v1 >= 0 )
v1 = slot_mapping[v1];
}
bool ZInstI::IsGlobalLoad() const
{
if ( op == OP_LOAD_GLOBAL_TYPE_VV )
// These don't have flavors.
return true;
static std::unordered_set<ZOp> global_ops;
if ( global_ops.size() == 0 )
{ // Initialize the set.
for ( int t = 0; t < NUM_TYPES; ++t )
{
TypeTag tag = TypeTag(t);
ZOp global_op_flavor =
AssignmentFlavor(OP_LOAD_GLOBAL_VV, tag, false);
if ( global_op_flavor != OP_NOP )
global_ops.insert(global_op_flavor);
}
}
return global_ops.count(op) > 0;
}
void ZInstI::InitConst(const ConstExpr* ce)
{
auto v = ce->ValuePtr();
t = ce->GetType();
c = ZVal(v, t);
if ( ZAM_error )
reporter->InternalError("bad value compiling code");
}
} // zeek::detail

469
src/script_opt/ZAM/ZInst.h Normal file
View file

@ -0,0 +1,469 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Operators and instructions used in ZAM execution.
#pragma once
#include "zeek/script_opt/ZAM/Support.h"
#include "zeek/script_opt/ZAM/ZOp.h"
namespace zeek::detail {
class Expr;
class ConstExpr;
class Attributes;
class Stmt;
using AttributesPtr = IntrusivePtr<Attributes>;
// Maps ZAM frame slots to associated identifiers.
typedef std::vector<ID*> FrameMap;
// Maps ZAM frame slots to information for sharing the slot across
// multiple script variables.
class FrameSharingInfo {
public:
// The variables sharing the slot. ID's need to be non-const so we
// can manipulate them, for example by changing their interpreter
// frame offset.
std::vector<ID*> ids;
// A parallel vector, only used for fully compiled code, which
// gives the names of the identifiers. When in use, the above
// "ids" member variable may be empty.
std::vector<const char*> names;
// The ZAM instruction number where a given identifier starts its
// scope, parallel to "ids".
std::vector<int> id_start;
// The current end of the frame slot's scope. Gets updated as
// new IDs are added to share the slot.
int scope_end;
// Whether this is a managed slot.
bool is_managed;
};
typedef std::vector<FrameSharingInfo> FrameReMap;
class ZInstAux;
// A ZAM instruction. This base class has all the information for
// execution, but omits information and methods only necessary for
// compiling.
class ZInst {
public:
ZInst(ZOp _op, ZAMOpType _op_type)
{
op = _op;
op_type = _op_type;
}
// Create a stub instruction that will be populated later.
ZInst() { }
virtual ~ZInst() { }
// Methods for printing out the instruction for debugging/maintenance.
void Dump(int inst_num, const FrameReMap* mappings) const;
void Dump(const std::string& id1, const std::string& id2,
const std::string& id3, const std::string& id4) const;
// Returns the name to use in identifying one of the slots/integer
// values (designated by "n"). "inst_num" identifes the instruction
// by its number within a larger set. "mappings" provides the
// mappings used to translate raw slots to the corresponding
// script variable(s).
std::string VName(int n, int inst_num,
const FrameReMap* mappings) const;
// Number of slots that refer to a frame element. These always
// come first, if we use additional slots.
int NumFrameSlots() const;
// Total number of slots in use. >= NumFrameSlots()
int NumSlots() const;
// Returns nil if this instruction doesn't have an associated constant.
ValPtr ConstVal() const;
// Returns a string describing the constant.
std::string ConstDump() const;
ZOp op;
ZAMOpType op_type;
// Usually indices into frame, though sometimes hold integer constants.
// When an instruction has both frame slots and integer constants,
// the former always come first, even if conceptually in the operation
// the constant is an "earlier" operand.
int v1, v2, v3, v4;
ZVal c; // constant associated with instruction, if any
// Meta-data associated with the execution.
// Type, usually for interpreting the constant.
TypePtr t = nullptr;
TypePtr t2 = nullptr; // just a few ops need two types
const Expr* e = nullptr; // only needed for "when" expressions
Func* func = nullptr; // used for calls
EventHandler* event_handler = nullptr; // used for referring to events
AttributesPtr attrs = nullptr; // used for things like constructors
// Auxiliary information. We could in principle use this to
// consolidate a bunch of the above, though at the cost of
// slightly slower access. Most instructions don't need "aux",
// which is why we bundle these separately.
ZInstAux* aux = nullptr;
// Location associated with this instruction, for error reporting.
const Location* loc = nullptr;
// Whether v1 represents a frame slot type for which we
// explicitly manage the memory.
bool is_managed = false;
};
// A intermediary ZAM instruction, one that includes information/methods
// needed for compiling. Intermediate instructions use pointers to other
// such instructions for branches, rather than concrete instruction
// numbers. This allows the AM optimizer to easily prune instructions.
class ZInstI : public ZInst {
public:
// These constructors can be used directly, but often instead
// they'll be generated via the use of Inst-Gen methods.
ZInstI(ZOp _op) : ZInst(_op, OP_X)
{
op = _op;
op_type = OP_X;
}
ZInstI(ZOp _op, int _v1) : ZInst(_op, OP_V)
{
v1 = _v1;
}
ZInstI(ZOp _op, int _v1, int _v2) : ZInst(_op, OP_VV)
{
v1 = _v1;
v2 = _v2;
}
ZInstI(ZOp _op, int _v1, int _v2, int _v3) : ZInst(_op, OP_VVV)
{
v1 = _v1;
v2 = _v2;
v3 = _v3;
}
ZInstI(ZOp _op, int _v1, int _v2, int _v3, int _v4)
: ZInst(_op, OP_VVVV)
{
v1 = _v1;
v2 = _v2;
v3 = _v3;
v4 = _v4;
}
ZInstI(ZOp _op, const ConstExpr* ce) : ZInst(_op, OP_C)
{
InitConst(ce);
}
ZInstI(ZOp _op, int _v1, const ConstExpr* ce) : ZInst(_op, OP_VC)
{
v1 = _v1;
InitConst(ce);
}
ZInstI(ZOp _op, int _v1, int _v2, const ConstExpr* ce)
: ZInst(_op, OP_VVC)
{
v1 = _v1;
v2 = _v2;
InitConst(ce);
}
ZInstI(ZOp _op, int _v1, int _v2, int _v3, const ConstExpr* ce)
: ZInst(_op, OP_VVVC)
{
v1 = _v1;
v2 = _v2;
v3 = _v3;
InitConst(ce);
}
// Constructor used when we're going to just copy in another ZInstI.
ZInstI() { }
// If "remappings" is non-nil, then it is used instead of frame_ids.
void Dump(const FrameMap* frame_ids, const FrameReMap* remappings) const;
// Note that this is *not* an override of the base class's VName
// but instead a method with similar functionality but somewhat
// different behavior (namely, being cognizant of frame_ids).
std::string VName(int n, const FrameMap* frame_ids,
const FrameReMap* remappings) const;
// True if this instruction definitely won't proceed to the one
// after it.
bool DoesNotContinue() const;
// True if this instruction always branches elsewhere. Different
// from DoesNotContinue() in that returns & hook breaks do not
// continue, but they are not branches.
bool IsUnconditionalBranch() const { return op == OP_GOTO_V; }
// True if this instruction is of the form "v1 = v2".
bool IsDirectAssignment() const;
// True if this instruction has side effects when executed, so
// should not be pruned even if it has a dead assignment.
bool HasSideEffects() const;
// True if the given instruction assigns to the frame location
// given by slot 1 (v1).
bool AssignsToSlot1() const;
// True if the given instruction uses the value in the given frame
// slot. (Assigning to the slot does not constitute using the value.)
bool UsesSlot(int slot) const;
// Returns the slots used (not assigned to). Any slot not used
// is set to -1. Returns true if at least one slot was used.
bool UsesSlots(int& s1, int& s2, int& s3, int& s4) const;
// Updates used (not assigned) slots per the given mapping.
void UpdateSlots(std::vector<int>& slot_mapping);
// True if the instruction corresponds to loading a global into
// the ZAM frame.
bool IsGlobalLoad() const;
// True if the instruction corresponds to some sort of load,
// either from the interpreter frame or of a global.
bool IsLoad() const
{
return op_type == OP_VV_FRAME || IsGlobalLoad();
}
// True if the instruction corresponds to storing a global.
bool IsGlobalStore() const
{
return op == OP_STORE_GLOBAL_V;
}
void CheckIfManaged(const TypePtr& t)
{ if ( ZVal::IsManagedType(t) ) is_managed = true; }
void SetType(TypePtr _t)
{
t = std::move(_t);
if ( t )
CheckIfManaged(t);
}
// Whether the instruction should be included in final code
// generation.
bool live = true;
// Whether the instruction is the beginning of a loop, meaning
// it's the target of backward control flow.
bool loop_start = false;
// How deep the instruction is within loop bodies (for all
// instructions in a loop, not just their beginnings). For
// example, a value of 2 means the instruction is inside a
// loop that itself is inside one more loop.
int loop_depth = 0;
// Branch target, prior to concretizing into PC target.
ZInstI* target = nullptr;
int target_slot = 0; // which of v1/v2/v3 should hold the target
// The final PC location of the statement. -1 indicates not
// yet assigned.
int inst_num = -1;
// Number of associated label(s) (indicating the statement is
// a branch target).
int num_labels = 0;
// Used for debugging. Transformed into the ZInst "loc" field.
const Stmt* stmt = curr_stmt;
private:
// Initialize 'c' from the given ConstExpr.
void InitConst(const ConstExpr* ce);
};
// Auxiliary information, used when the fixed ZInst layout lacks
// sufficient expressiveness to represent all of the elements that
// an instruction needs.
class ZInstAux {
public:
// if n is positive then it gives the size of parallel arrays
// tracking slots, constants, and types.
ZInstAux(int _n)
{
n = _n;
if ( n > 0 )
{
slots = ints = new int[n];
constants = new ValPtr[n];
types = new TypePtr[n];
}
}
~ZInstAux()
{
delete [] ints;
delete [] constants;
delete [] types;
}
// Returns the i'th element of the parallel arrays as a ValPtr.
ValPtr ToVal(const ZVal* frame, int i) const
{
if ( constants[i] )
return constants[i];
else
return frame[slots[i]].ToVal(types[i]);
}
// Returns the parallel arrays as a ListValPtr.
ListValPtr ToListVal(const ZVal* frame) const
{
auto lv = make_intrusive<ListVal>(TYPE_ANY);
for ( auto i = 0; i < n; ++i )
lv->Append(ToVal(frame, i));
return lv;
}
// Converts the parallel arrays to a ListValPtr suitable for
// use as indices for indexing a table or set. "offset" specifies
// which index we're looking for (there can be a bunch for
// constructors), and "width" the number of elements in a single
// index.
ListValPtr ToIndices(const ZVal* frame, int offset, int width) const
{
auto lv = make_intrusive<ListVal>(TYPE_ANY);
for ( auto i = 0; i < 0 + width; ++i )
lv->Append(ToVal(frame, offset + i));
return lv;
}
// Returns the parallel arrays converted to a vector of ValPtr's.
const val_vec& ToValVec(const ZVal* frame)
{
vv.clear();
FillValVec(vv, frame);
return vv;
}
// Populates the given vector of ValPtr's with the conversion
// of the parallel arrays.
void FillValVec(val_vec& vec, const ZVal* frame) const
{
for ( auto i = 0; i < n; ++i )
vec.push_back(ToVal(frame, i));
}
// When building up a ZInstAux, sets one element of the parallel
// arrays to a given frame slot and type.
void Add(int i, int slot, TypePtr t)
{
ints[i] = slot;
constants[i] = nullptr;
types[i] = t;
}
// Same but for constants.
void Add(int i, ValPtr c)
{
ints[i] = -1;
constants[i] = c;
types[i] = nullptr;
}
// Member variables. We could add accessors for manipulating
// these (and make the variables private), but for convenience we
// make them directly available.
// These are parallel arrays, used to build up lists of values.
// Each element is either an integer or a constant. Usually the
// integer is a frame slot (in which case "slots" points to "ints";
// if not, it's nil).
//
// We track associated types, too, enabling us to use
// ZVal::ToVal to convert frame slots or constants to ValPtr's.
int n; // size of arrays
int* slots = nullptr; // either nil or points to ints
int* ints = nullptr;
ValPtr* constants = nullptr;
TypePtr* types = nullptr;
// Used for accessing function names.
ID* id_val = nullptr;
// Whether the instruction can lead to globals changing.
// Currently only needed by the optimizer, but convenient
// to store here.
bool can_change_globals = false;
// The following is only used for OP_CONSTRUCT_KNOWN_RECORD_V,
// to map elements in slots/constants/types to record field offsets.
std::vector<int> map;
///// The following three apply to looping over the elements of tables.
// Frame slots of iteration variables, such as "[v1, v2, v3] in aggr".
std::vector<int> loop_vars;
// Their types.
std::vector<TypePtr> loop_var_types;
// Type associated with the "value" entry, for "k, value in aggr"
// iteration.
TypePtr value_var_type;
// This is only used to return values stored elsewhere in this
// object - it's not set directly.
//
// If we cared about memory penny-pinching, we could make this
// a pointer and only instantiate as needed.
val_vec vv;
};
// Returns a human-readable version of the given ZAM op-code.
extern const char* ZOP_name(ZOp op);
// Maps a generic operation to a specific one associated with the given type.
// The third argument governs what to do if the given type has no assignment
// flavor. If true, this leads to an assertion failure. If false, and
// if there's no flavor for the type, then OP_NOP is returned.
extern ZOp AssignmentFlavor(ZOp orig, TypeTag tag, bool strict=true);
// The following all use initializations produced by Gen-ZAM.
// Maps first operands, and then type tags, to operands.
extern std::unordered_map<ZOp, std::unordered_map<TypeTag, ZOp>> assignment_flavor;
// Maps flavorful assignments to their non-assignment counterpart.
// Used for optimization when we determine that the assigned-to
// value is superfluous.
extern std::unordered_map<ZOp, ZOp> assignmentless_op;
// Maps flavorful assignments to what op-type their non-assignment
// counterpart uses.
extern std::unordered_map<ZOp, ZAMOpType> assignmentless_op_type;
} // namespace zeek::detail

116
src/script_opt/ZAM/ZOp.cc Normal file
View file

@ -0,0 +1,116 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/ZAM/Support.h"
#include "zeek/script_opt/ZAM/ZOp.h"
namespace zeek::detail {
const char* ZOP_name(ZOp op)
{
switch ( op ) {
#include "zeek/ZAM-OpsNamesDefs.h"
case OP_NOP: return "nop";
}
}
static const char* op_type_name(ZAMOpType ot)
{
switch ( ot ) {
case OP_X: return "X";
case OP_C: return "C";
case OP_V: return "V";
case OP_V_I1: return "V_I1";
case OP_VC_I1: return "VC_I1";
case OP_VC: return "VC";
case OP_VV: return "VV";
case OP_VV_I2: return "VV_I2";
case OP_VV_I1_I2: return "VV_I1_I2";
case OP_VV_FRAME: return "VV_FRAME";
case OP_VVC: return "VVC";
case OP_VVC_I2: return "VVC_I2";
case OP_VVV: return "VVV";
case OP_VVV_I3: return "VVV_I3";
case OP_VVV_I2_I3: return "VVV_I2_I3";
case OP_VVVC: return "VVVC";
case OP_VVVC_I3: return "VVVC_I3";
case OP_VVVC_I2_I3: return "VVVC_I2_I3";
case OP_VVVC_I1_I2_I3: return "VVVC_I1_I2_I3";
case OP_VVVV: return "VVVV";
case OP_VVVV_I4: return "VVVV_I4";
case OP_VVVV_I3_I4: return "VVVV_I3_I4";
case OP_VVVV_I2_I3_I4: return "VVVV_I2_I3_I4";
}
}
ZAMOp1Flavor op1_flavor[] = {
#include "zeek/ZAM-Op1FlavorsDefs.h"
OP1_INTERNAL, // OP_NOP
};
bool op_side_effects[] = {
#include "zeek/ZAM-OpSideEffects.h"
false, // OP_NOP
};
std::unordered_map<ZOp, std::unordered_map<TypeTag, ZOp>> assignment_flavor;
std::unordered_map<ZOp, ZOp> assignmentless_op;
std::unordered_map<ZOp, ZAMOpType> assignmentless_op_type;
ZOp AssignmentFlavor(ZOp orig, TypeTag tag, bool strict)
{
static bool did_init = false;
if ( ! did_init )
{
std::unordered_map<TypeTag, ZOp> empty_map;
#include "zeek/ZAM-AssignFlavorsDefs.h"
did_init = true;
}
// Map type tag to equivalent, as needed.
switch ( tag ) {
case TYPE_BOOL:
case TYPE_ENUM:
tag = TYPE_INT;
break;
case TYPE_PORT:
tag = TYPE_COUNT;
break;
case TYPE_TIME:
case TYPE_INTERVAL:
tag = TYPE_DOUBLE;
break;
default:
break;
}
if ( assignment_flavor.count(orig) == 0 )
{
if ( strict )
ASSERT(false);
else
return OP_NOP;
}
auto orig_map = assignment_flavor[orig];
if ( orig_map.count(tag) == 0 )
{
if ( strict )
ASSERT(false);
else
return OP_NOP;
}
return orig_map[tag];
}
} // zeek::detail

65
src/script_opt/ZAM/ZOp.h Normal file
View file

@ -0,0 +1,65 @@
// See the file "COPYING" in the main distribution directory for copyright.
// ZAM instruction opcodes and associated information.
#pragma once
namespace zeek::detail {
// Opcodes associated with ZAM instructions.
typedef enum {
#include "zeek/ZAM-OpsDefs.h"
OP_NOP,
} ZOp;
// Possible types of instruction operands in terms of which fields they use.
// Used for low-level optimization (so important that they're correct),
// and for dumping instructions.
// V: one of the instruction's integer values, treated as a frame slot
// C: the instruction's associated constant
// I1/I2/I3/I4: the instruction's integer value, used directly (not as a slot)
// FRAME: a slot in the (intrepreter) Frame object
// X: no operands
typedef enum {
OP_X, OP_C, OP_V, OP_V_I1, OP_VC_I1,
OP_VC,
OP_VV,
OP_VV_I2,
OP_VV_I1_I2,
OP_VV_FRAME,
OP_VVC,
OP_VVC_I2,
OP_VVV,
OP_VVV_I3,
OP_VVV_I2_I3,
OP_VVVC,
OP_VVVC_I3,
OP_VVVC_I2_I3,
OP_VVVC_I1_I2_I3,
OP_VVVV,
OP_VVVV_I4,
OP_VVVV_I3_I4,
OP_VVVV_I2_I3_I4,
} ZAMOpType;
// Possible "flavors" for an operator's first slot.
typedef enum {
OP1_READ, // the slot is read, not modified
OP1_WRITE, // the slot is modified, not read - the most common
OP1_READ_WRITE, // the slot is both read and then modified, e.g. "++"
OP1_INTERNAL, // we're doing some internal manipulation of the slot
} ZAMOp1Flavor;
// Maps an operand to its flavor.
extern ZAMOp1Flavor op1_flavor[];
// Maps an operand to whether it has side effects.
extern bool op_side_effects[];
} // namespace zeek::detail