the bulk of the compiler

This commit is contained in:
Vern Paxson 2021-04-19 16:32:04 -07:00
parent 158e82a2c1
commit 863be9436b
40 changed files with 7730 additions and 0 deletions

View file

@ -327,6 +327,27 @@ set(MAIN_SRCS
plugin/Manager.cc plugin/Manager.cc
plugin/Plugin.cc plugin/Plugin.cc
script_opt/CPP/Attrs.cc
script_opt/CPP/Consts.cc
script_opt/CPP/DeclFunc.cc
script_opt/CPP/Driver.cc
script_opt/CPP/Emit.cc
script_opt/CPP/Exprs.cc
script_opt/CPP/Func.cc
script_opt/CPP/GenFunc.cc
script_opt/CPP/HashMgr.cc
script_opt/CPP/Inits.cc
script_opt/CPP/RuntimeInit.cc
script_opt/CPP/RuntimeOps.cc
script_opt/CPP/RuntimeVec.cc
script_opt/CPP/Stmts.cc
script_opt/CPP/Tracker.cc
script_opt/CPP/Types.cc
script_opt/CPP/Util.cc
script_opt/CPP/Vars.cc
script_opt/CPP/CPP-gen.cc
script_opt/DefItem.cc script_opt/DefItem.cc
script_opt/DefSetsMgr.cc script_opt/DefSetsMgr.cc
script_opt/Expr.cc script_opt/Expr.cc

176
src/script_opt/CPP/Attrs.cc Normal file
View file

@ -0,0 +1,176 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
void CPPCompile::RegisterAttributes(const AttributesPtr& attrs)
{
if ( ! attrs || attributes.HasKey(attrs) )
return;
attributes.AddKey(attrs);
AddInit(attrs);
auto a_rep = attributes.GetRep(attrs);
if ( a_rep != attrs.get() )
{
NoteInitDependency(attrs.get(), a_rep);
return;
}
for ( const auto& a : attrs->GetAttrs() )
{
const auto& e = a->GetExpr();
if ( e )
{
if ( IsSimpleInitExpr(e) )
{
// Make sure any dependencies it has get noted.
(void) GenExpr(e, GEN_VAL_PTR);
continue;
}
init_exprs.AddKey(e);
AddInit(e);
NoteInitDependency(attrs, e);
auto e_rep = init_exprs.GetRep(e);
if ( e_rep != e.get() )
NoteInitDependency(e.get(), e_rep);
}
}
}
void CPPCompile::BuildAttrs(const AttributesPtr& attrs, std::string& attr_tags,
std::string& attr_vals)
{
if ( attrs )
{
for ( const auto& a : attrs->GetAttrs() )
{
if ( attr_tags.size() > 0 )
{
attr_tags += ", ";
attr_vals += ", ";
}
attr_tags += Fmt(int(a->Tag()));
const auto& e = a->GetExpr();
if ( e )
attr_vals += GenExpr(e, GEN_VAL_PTR, false);
else
attr_vals += "nullptr";
}
}
attr_tags = std::string("{") + attr_tags + "}";
attr_vals = std::string("{") + attr_vals + "}";
}
void CPPCompile::GenAttrs(const AttributesPtr& attrs)
{
NL();
Emit("AttributesPtr %s", AttrsName(attrs));
StartBlock();
const auto& avec = attrs->GetAttrs();
Emit("auto attrs = std::vector<AttrPtr>();");
AddInit(attrs);
for ( auto i = 0; i < avec.size(); ++i )
{
const auto& attr = avec[i];
const auto& e = attr->GetExpr();
if ( ! e )
{
Emit("attrs.emplace_back(make_intrusive<Attr>(%s));",
AttrName(attr));
continue;
}
NoteInitDependency(attrs, e);
AddInit(e);
std::string e_arg;
if ( IsSimpleInitExpr(e) )
e_arg = GenAttrExpr(e);
else
e_arg = InitExprName(e);
Emit("attrs.emplace_back(make_intrusive<Attr>(%s, %s));",
AttrName(attr), e_arg);
}
Emit("return make_intrusive<Attributes>(attrs, nullptr, true, false);");
EndBlock();
}
std::string CPPCompile::GenAttrExpr(const ExprPtr& e)
{
switch ( e->Tag() ) {
case EXPR_CONST:
return std::string("make_intrusive<ConstExpr>(") +
GenExpr(e, GEN_VAL_PTR) + ")";
case EXPR_NAME:
NoteInitDependency(e, e->AsNameExpr()->IdPtr());
return std::string("make_intrusive<NameExpr>(") +
globals[e->AsNameExpr()->Id()->Name()] + ")";
case EXPR_RECORD_COERCE:
NoteInitDependency(e, TypeRep(e->GetType()));
return std::string("make_intrusive<RecordCoerceExpr>(make_intrusive<RecordConstructorExpr>(make_intrusive<ListExpr>()), cast_intrusive<RecordType>(") +
GenTypeName(e->GetType()) + "))";
default:
reporter->InternalError("bad expr tag in CPPCompile::GenAttrs");
return "###";
}
}
std::string CPPCompile::AttrsName(const AttributesPtr& a)
{
return attributes.KeyName(a) + "()";
}
const char* CPPCompile::AttrName(const AttrPtr& attr)
{
switch ( attr->Tag() ) {
case ATTR_OPTIONAL: return "ATTR_OPTIONAL";
case ATTR_DEFAULT: return "ATTR_DEFAULT";
case ATTR_REDEF: return "ATTR_REDEF";
case ATTR_ADD_FUNC: return "ATTR_ADD_FUNC";
case ATTR_DEL_FUNC: return "ATTR_DEL_FUNC";
case ATTR_EXPIRE_FUNC: return "ATTR_EXPIRE_FUNC";
case ATTR_EXPIRE_READ: return "ATTR_EXPIRE_READ";
case ATTR_EXPIRE_WRITE: return "ATTR_EXPIRE_WRITE";
case ATTR_EXPIRE_CREATE: return "ATTR_EXPIRE_CREATE";
case ATTR_RAW_OUTPUT: return "ATTR_RAW_OUTPUT";
case ATTR_PRIORITY: return "ATTR_PRIORITY";
case ATTR_GROUP: return "ATTR_GROUP";
case ATTR_LOG: return "ATTR_LOG";
case ATTR_ERROR_HANDLER: return "ATTR_ERROR_HANDLER";
case ATTR_TYPE_COLUMN: return "ATTR_TYPE_COLUMN";
case ATTR_TRACKED: return "ATTR_TRACKED";
case ATTR_ON_CHANGE: return "ATTR_ON_CHANGE";
case ATTR_BROKER_STORE: return "ATTR_BROKER_STORE";
case ATTR_BROKER_STORE_ALLOW_COMPLEX: return "ATTR_BROKER_STORE_ALLOW_COMPLEX";
case ATTR_BACKEND: return "ATTR_BACKEND";
case ATTR_DEPRECATED: return "ATTR_DEPRECATED";
case ATTR_IS_ASSIGNED: return "ATTR_IS_ASSIGNED";
case ATTR_IS_USED: return "ATTR_IS_USED";
case NUM_ATTRS: return "<busted>";
}
}
} // zeek::detail

View file

@ -0,0 +1,40 @@
##! Definitions of built-in functions related to loading compiled-to-C++
##! scripts.
%%{ // C segment
#include "zeek/Reporter.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/CPP/Func.h"
%%}
## Activates the compile-to-C++ scripts associated with the given hash.
##
## h: Hash of the set of C++ scripts.
##
## Returns: True if it was present and loaded, false if not.
##
function load_CPP%(h: count%): bool
%{
auto cb = detail::standalone_callbacks.find(h);
if ( cb == detail::standalone_callbacks.end() )
{
reporter->Error("load of non-existing C++ code (%llu)", h);
return zeek::val_mgr->False();
}
// Ensure that any compiled scripts are used. If instead
// the AST is used, then when we activate the standalone
// scripts, they won't be able to avoid installing redundant
// event handlers.
detail::analysis_options.use_CPP = true;
// Mark this script as one we should activate after loading
// compiled scripts.
detail::standalone_activations.push_back(cb->second);
return zeek::val_mgr->True();
%}

1022
src/script_opt/CPP/Compile.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,292 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/File.h"
#include "zeek/RE.h"
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
std::string CPPCompile::BuildConstant(const Obj* parent, const ValPtr& vp)
{
if ( ! vp )
return "nullptr";
if ( AddConstant(vp) )
{
auto v = vp.get();
AddInit(parent);
NoteInitDependency(parent, v);
// Make sure the value pointer, which might be transient
// in construction, sticks around so we can track its
// value.
cv_indices.push_back(vp);
return const_vals[v];
}
else
return NativeToGT(GenVal(vp), vp->GetType(), GEN_VAL_PTR);
}
void CPPCompile::AddConstant(const ConstExpr* c)
{
auto v = c->ValuePtr();
if ( AddConstant(v) )
{
AddInit(c);
NoteInitDependency(c, v.get());
}
}
bool CPPCompile::AddConstant(const ValPtr& vp)
{
auto v = vp.get();
if ( IsNativeType(v->GetType()) )
// These we instantiate directly.
return false;
if ( const_vals.count(v) > 0 )
// Already did this one.
return true;
// Formulate a key that's unique per distinct constant.
const auto& t = v->GetType();
std::string c_desc;
if ( t->Tag() == TYPE_STRING )
{
// We can't rely on these to render with consistent
// escaping, sigh. Just use the raw string.
auto s = v->AsString();
auto b = (const char*)(s->Bytes());
c_desc = std::string(b, s->Len()) + "string";
}
else
{
ODesc d;
v->Describe(&d);
// Don't confuse constants of different types that happen to
// render the same.
t->Describe(&d);
c_desc = d.Description();
}
if ( constants.count(c_desc) > 0 )
{
const_vals[v] = constants[c_desc];
auto orig_v = constants_to_vals[c_desc];
ASSERT(v != orig_v);
AddInit(v);
NoteInitDependency(v, orig_v);
return true;
}
// Need a C++ global for this constant.
auto const_name = std::string("CPP__const__") +
Fmt(int(constants.size()));
const_vals[v] = constants[c_desc] = const_name;
constants_to_vals[c_desc] = v;
auto tag = t->Tag();
switch ( tag ) {
case TYPE_STRING:
AddStringConstant(vp, const_name);
break;
case TYPE_PATTERN:
AddPatternConstant(vp, const_name);
break;
case TYPE_LIST:
AddListConstant(vp, const_name);
break;
case TYPE_RECORD:
AddRecordConstant(vp, const_name);
break;
case TYPE_TABLE:
AddTableConstant(vp, const_name);
break;
case TYPE_VECTOR:
AddVectorConstant(vp, const_name);
break;
case TYPE_ADDR:
case TYPE_SUBNET:
{
auto prefix = (tag == TYPE_ADDR) ? "Addr" : "SubNet";
Emit("%sValPtr %s;", prefix, const_name);
ODesc d;
v->Describe(&d);
AddInit(v, const_name,
std::string("make_intrusive<") + prefix +
"Val>(\"" + d.Description() + "\")");
}
break;
case TYPE_FUNC:
Emit("FuncValPtr %s;", const_name);
// We can't generate the initialization now because it
// depends on first having compiled the associated body,
// so we know its hash. So for now we just note it
// to deal with later.
func_vars[v->AsFuncVal()] = const_name;
break;
case TYPE_FILE:
{
Emit("FileValPtr %s;", const_name);
auto f = cast_intrusive<FileVal>(vp)->Get();
AddInit(v, const_name,
std::string("make_intrusive<FileVal>(") +
"make_intrusive<File>(\"" + f->Name() + "\", \"w\"))");
}
break;
default:
reporter->InternalError("bad constant type in CPPCompile::AddConstant");
}
return true;
}
void CPPCompile::AddStringConstant(const ValPtr& v, std::string& const_name)
{
Emit("StringValPtr %s;", const_name);
auto s = v->AsString();
const char* b = (const char*)(s->Bytes());
auto len = s->Len();
AddInit(v, const_name, GenString(b, len));
}
void CPPCompile::AddPatternConstant(const ValPtr& v, std::string& const_name)
{
Emit("PatternValPtr %s;", const_name);
auto re = v->AsPatternVal()->Get();
AddInit(v, std::string("{ auto re = new RE_Matcher(") +
CPPEscape(re->OrigText()) + ");");
if ( re->IsCaseInsensitive() )
AddInit(v, "re->MakeCaseInsensitive();");
AddInit(v, "re->Compile();");
AddInit(v, const_name, "make_intrusive<PatternVal>(re)");
AddInit(v, "}");
}
void CPPCompile::AddListConstant(const ValPtr& v, std::string& const_name)
{
Emit("ListValPtr %s;", const_name);
// No initialization dependency on the main type since we don't
// use the underlying TypeList. However, we *do* use the types of
// the elements.
AddInit(v, const_name, std::string("make_intrusive<ListVal>(TYPE_ANY)"));
auto lv = cast_intrusive<ListVal>(v);
auto n = lv->Length();
for ( auto i = 0; i < n; ++i )
{
const auto& l_i = lv->Idx(i);
auto l_i_c = BuildConstant(v, l_i);
AddInit(v, const_name + "->Append(" + l_i_c + ");");
NoteInitDependency(v, TypeRep(l_i->GetType()));
}
}
void CPPCompile::AddRecordConstant(const ValPtr& v, std::string& const_name)
{
const auto& t = v->GetType();
Emit("RecordValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name, std::string("make_intrusive<RecordVal>(") +
"cast_intrusive<RecordType>(" + GenTypeName(t) + "))");
auto r = cast_intrusive<RecordVal>(v);
auto n = r->NumFields();
for ( auto i = 0; i < n; ++i )
{
const auto& r_i = r->GetField(i);
if ( r_i )
{
auto r_i_c = BuildConstant(v, r_i);
AddInit(v, const_name + "->Assign(" + Fmt(i) +
", " + r_i_c + ");");
}
}
}
void CPPCompile::AddTableConstant(const ValPtr& v, std::string& const_name)
{
const auto& t = v->GetType();
Emit("TableValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name, std::string("make_intrusive<TableVal>(") +
"cast_intrusive<TableType>(" + GenTypeName(t) + "))");
auto tv = cast_intrusive<TableVal>(v);
auto tv_map = tv->ToMap();
for ( auto& tv_i : tv_map )
{
auto ind = BuildConstant(v, tv_i.first);
auto val = BuildConstant(v, tv_i.second);
AddInit(v, const_name + "->Assign(" + ind + ", " + val + ");");
}
}
void CPPCompile::AddVectorConstant(const ValPtr& v, std::string& const_name)
{
const auto& t = v->GetType();
Emit("VectorValPtr %s;", const_name);
NoteInitDependency(v, TypeRep(t));
AddInit(v, const_name, std::string("make_intrusive<VectorVal>(") +
"cast_intrusive<VectorType>(" + GenTypeName(t) + "))");
auto vv = cast_intrusive<VectorVal>(v);
auto n = vv->Size();
for ( auto i = 0; i < n; ++i )
{
const auto& v_i = vv->ValAt(i);
auto v_i_c = BuildConstant(v, v_i);
AddInit(v, const_name + "->Append(" + v_i_c + ");");
}
}
} // zeek::detail

View file

@ -0,0 +1,320 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
void CPPCompile::DeclareFunc(const FuncInfo& func)
{
if ( ! IsCompilable(func) )
return;
auto fname = Canonicalize(BodyName(func).c_str()) + "_zf";
auto pf = func.Profile();
auto f = func.Func();
auto body = func.Body();
auto priority = func.Priority();
DeclareSubclass(f->GetType(), pf, fname, body, priority, nullptr,
f->Flavor());
}
void CPPCompile::DeclareLambda(const LambdaExpr* l, const ProfileFunc* pf)
{
ASSERT(is_CPP_compilable(pf));
auto lname = Canonicalize(l->Name().c_str()) + "_lb";
auto body = l->Ingredients().body;
auto l_id = l->Ingredients().id;
auto& ids = l->OuterIDs();
for ( auto id : ids )
lambda_names[id] = LocalName(id);
DeclareSubclass(l_id->GetType<FuncType>(), pf, lname, body, 0, l,
FUNC_FLAVOR_FUNCTION);
}
void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf,
const std::string& fname,
const StmtPtr& body, int priority,
const LambdaExpr* l, FunctionFlavor flavor)
{
const auto& yt = ft->Yield();
in_hook = flavor == FUNC_FLAVOR_HOOK;
const IDPList* lambda_ids = l ? &l->OuterIDs() : nullptr;
auto yt_decl = in_hook ? "bool" : FullTypeName(yt);
NL();
Emit("static %s %s(%s);", yt_decl, fname, ParamDecl(ft, lambda_ids, pf));
Emit("class %s_cl : public CPPStmt", fname);
StartBlock();
Emit("public:");
std::string addl_args; // captures passed in on construction
std::string inits; // initializers for corresponding member vars
if ( lambda_ids )
{
for ( auto& id : *lambda_ids )
{
auto name = lambda_names[id];
auto tn = FullTypeName(id->GetType());
addl_args = addl_args + ", " + tn + " _" + name;
inits = inits + ", " + name + "(_" + name + ")";
}
}
Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }",
fname, addl_args.c_str(), inits.c_str());
// An additional constructor just used to generate place-holder
// instances, due to the mis-design that lambdas are identified
// by their Func objects rather than their FuncVal objects.
if ( lambda_ids && lambda_ids->length() > 0 )
Emit("%s_cl(const char* name) : CPPStmt(name) { }", fname);
Emit("ValPtr Exec(Frame* f, StmtFlowType& flow) override final");
StartBlock();
Emit("flow = FLOW_RETURN;");
if ( in_hook )
{
Emit("if ( ! %s(%s) )", fname, BindArgs(ft, lambda_ids));
StartBlock();
Emit("flow = FLOW_BREAK;");
EndBlock();
Emit("return nullptr;");
}
else if ( IsNativeType(yt) )
GenInvokeBody(fname, yt, BindArgs(ft, lambda_ids));
else
Emit("return %s(%s);", fname, BindArgs(ft, lambda_ids));
EndBlock();
if ( lambda_ids )
BuildLambda(ft, pf, fname, body, l, lambda_ids);
else
{
// Track this function as known to have been compiled.
// We don't track lambda bodies as compiled because they
// can't be instantiated directly without also supplying
// the captures. In principle we could make an exception
// for lambdas that don't take any arguments, but that
// seems potentially more confusing than beneficial.
compiled_funcs.emplace(fname);
auto loc_f = script_specific_filename(body);
cf_locs[fname] = loc_f;
// Some guidance for those looking through the generated code.
Emit("// compiled body for: %s", loc_f);
}
EndBlock(true);
auto h = pf->HashVal();
body_hashes[fname] = h;
body_priorities[fname] = priority;
body_names.emplace(body.get(), fname);
names_to_bodies.emplace(std::move(fname), body.get());
total_hash = merge_p_hashes(total_hash, h);
}
void CPPCompile::BuildLambda(const FuncTypePtr& ft, const ProfileFunc* pf,
const std::string& fname, const StmtPtr& body,
const LambdaExpr* l, const IDPList* lambda_ids)
{
// Declare the member variables for holding the captures.
for ( auto& id : *lambda_ids )
{
auto name = lambda_names[id];
auto tn = FullTypeName(id->GetType());
Emit("%s %s;", tn, name.c_str());
}
// Generate initialization to create and register the lambda.
auto literal_name = std::string("\"") + l->Name() + "\"";
auto instantiate = std::string("make_intrusive<") + fname + "_cl>(" +
literal_name + ")";
int nl = lambda_ids->length();
auto h = Fmt(pf->HashVal());
auto has_captures = nl > 0 ? "true" : "false";
auto l_init = std::string("register_lambda__CPP(") + instantiate +
", " + h + ", \"" + l->Name() + "\", " +
GenTypeName(ft) + ", " + has_captures + ");";
AddInit(l, l_init);
NoteInitDependency(l, TypeRep(ft));
// Make the lambda's body's initialization depend on the lambda's
// initialization. That way GenFuncVarInits() can generate
// initializations with the assurance that the associated body
// hashes will have been registered.
AddInit(body.get());
NoteInitDependency(body.get(), l);
// Generate method to extract the lambda captures from a deserialized
// Frame object.
Emit("void SetLambdaCaptures(Frame* f) override");
StartBlock();
for ( int i = 0; i < nl; ++i )
{
auto l_i = (*lambda_ids)[i];
const auto& t_i = l_i->GetType();
auto cap_i = std::string("f->GetElement(") + Fmt(i) + ")";
Emit("%s = %s;", lambda_names[l_i],
GenericValPtrToGT(cap_i, t_i, GEN_NATIVE));
}
EndBlock();
// Generate the method for serializing the captures.
Emit("std::vector<ValPtr> SerializeLambdaCaptures() const override");
StartBlock();
Emit("std::vector<ValPtr> vals;");
for ( int i = 0; i < nl; ++i )
{
auto l_i = (*lambda_ids)[i];
const auto& t_i = l_i->GetType();
Emit("vals.emplace_back(%s);",
NativeToGT(lambda_names[l_i], t_i, GEN_VAL_PTR));
}
Emit("return vals;");
EndBlock();
// Generate the Clone() method.
Emit("CPPStmtPtr Clone() override");
StartBlock();
auto arg_clones = GenLambdaClone(l, true);
Emit("return make_intrusive<%s_cl>(name.c_str()%s);", fname, arg_clones);
EndBlock();
}
std::string CPPCompile::BindArgs(const FuncTypePtr& ft, const IDPList* lambda_ids)
{
const auto& params = ft->Params();
auto t = params->Types();
std::string res;
int n = t ? t->size() : 0;
for ( auto i = 0; i < n; ++i )
{
auto arg_i = std::string("f->GetElement(") + Fmt(i) + ")";
const auto& ft = params->GetFieldType(i);
if ( IsNativeType(ft) )
res += arg_i + NativeAccessor(ft);
else
res += GenericValPtrToGT(arg_i, ft, GEN_VAL_PTR);
res += ", ";
}
if ( lambda_ids )
{
for ( auto& id : *lambda_ids )
res += lambda_names[id] + ", ";
}
// Add the final frame argument.
return res + "f";
}
std::string CPPCompile::ParamDecl(const FuncTypePtr& ft,
const IDPList* lambda_ids,
const ProfileFunc* pf)
{
const auto& params = ft->Params();
int n = params->NumFields();
std::string decl;
for ( auto i = 0; i < n; ++i )
{
const auto& t = params->GetFieldType(i);
auto tn = FullTypeName(t);
auto param_id = FindParam(i, pf);
std::string fn;
if ( param_id )
{
if ( t->Tag() == TYPE_ANY &&
param_id->GetType()->Tag() != TYPE_ANY )
// We'll need to translate the parameter
// from its current representation to
// type "any".
fn = std::string("any_param__CPP_") + Fmt(i);
else
fn = LocalName(param_id);
}
else
// Parameters that are unused don't wind up
// in the ProfileFunc. Rather than dig their
// name out of the function's declaration, we
// explicitly name them to reflect that they're
// unused.
fn = std::string("unused_param__CPP_") + Fmt(i);
if ( IsNativeType(t) )
// Native types are always pass-by-value.
decl = decl + tn + " " + fn;
else
{
if ( param_id && pf->Assignees().count(param_id) > 0 )
// We modify the parameter.
decl = decl + tn + " " + fn;
else
// Not modified, so pass by const reference.
decl = decl + "const " + tn + "& " + fn;
}
decl += ", ";
}
if ( lambda_ids )
{
// Add the captures as additional parameters.
for ( auto& id : *lambda_ids )
{
auto name = lambda_names[id];
const auto& t = id->GetType();
auto tn = FullTypeName(t);
// Allow the captures to be modified.
decl = decl + tn + "& " + name + ", ";
}
}
// Add in the declaration of the frame.
return decl + "Frame* f__CPP";
}
const ID* CPPCompile::FindParam(int i, const ProfileFunc* pf)
{
const auto& params = pf->Params();
for ( const auto& p : params )
if ( p->Offset() == i )
return p;
return nullptr;
}
} // zeek::detail

View file

@ -0,0 +1,329 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
CPPCompile::CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& _pfs,
const char* gen_name, CPPHashManager& _hm,
bool _update, bool _standalone)
: funcs(_funcs), pfs(_pfs), hm(_hm), update(_update), standalone(_standalone)
{
auto mode = hm.IsAppend() ? "a" : "w";
write_file = fopen(gen_name, mode);
if ( ! write_file )
{
reporter->Error("can't open C++ target file %s", gen_name);
exit(1);
}
if ( hm.IsAppend() )
{
// We need a unique number to associate with the name
// space for the code we're adding. A convenient way to
// generate this safely is to use the present size of the
// file we're appending to. That guarantees that every
// incremental compilation will wind up with a different
// number.
struct stat st;
if ( fstat(fileno(write_file), &st) != 0 )
{
char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("fstat failed on %s: %s", gen_name, buf);
exit(1);
}
// We use a value of "0" to mean "we're not appending,
// we're generating from scratch", so make sure we're
// distinct from that.
addl_tag = st.st_size + 1;
}
Compile();
}
CPPCompile::~CPPCompile()
{
fclose(write_file);
}
void CPPCompile::Compile()
{
// Get the working directory so we can use it in diagnostic messages
// as a way to identify this compilation. Only germane when doing
// incremental compilation (particularly of the test suite).
char buf[8192];
getcwd(buf, sizeof buf);
working_dir = buf;
if ( update && addl_tag > 0 && CheckForCollisions() )
// Inconsistent compilation environment.
exit(1);
GenProlog();
// Determine which functions we can call directly, and reuse
// previously compiled instances of those if present.
for ( const auto& func : funcs )
{
if ( func.Func()->Flavor() != FUNC_FLAVOR_FUNCTION )
// Can't be called directly.
continue;
if ( IsCompilable(func) )
compilable_funcs.insert(BodyName(func));
auto h = func.Profile()->HashVal();
if ( hm.HasHash(h) )
{
// Track the previously compiled instance
// of this function.
auto n = func.Func()->Name();
hashed_funcs[n] = hm.FuncBodyName(h);
}
}
// Track all of the types we'll be using.
for ( const auto& t : pfs.RepTypes() )
{
TypePtr tp{NewRef{}, (Type*)(t)};
types.AddKey(tp, pfs.HashType(t));
}
for ( const auto& t : types.DistinctKeys() )
if ( ! types.IsInherited(t) )
// Type is new to this compilation, so we'll
// be generating it.
Emit("TypePtr %s;", types.KeyName(t));
NL();
for ( const auto& c : pfs.Constants() )
AddConstant(c);
NL();
for ( auto& g : pfs.AllGlobals() )
CreateGlobal(g);
// Now that the globals are created, register their attributes,
// if any, and generate their initialization for use in standalone
// scripts. We can't do these in CreateGlobal() because at that
// point it's possible that some of the globals refer to other
// globals not-yet-created.
for ( auto& g : pfs.AllGlobals() )
{
RegisterAttributes(g->GetAttrs());
if ( g->HasVal() )
{
auto gn = std::string(g->Name());
GenGlobalInit(g, globals[gn], g->GetVal());
}
}
for ( const auto& e : pfs.Events() )
if ( AddGlobal(e, "gl", false) )
Emit("EventHandlerPtr %s_ev;", globals[std::string(e)]);
for ( const auto& t : pfs.RepTypes() )
{
ASSERT(types.HasKey(t));
TypePtr tp{NewRef{}, (Type*)(t)};
RegisterType(tp);
}
// The scaffolding is now in place to go ahead and generate
// the functions & lambdas. First declare them ...
for ( const auto& func : funcs )
DeclareFunc(func);
// We track lambdas by their internal names, because two different
// LambdaExpr's can wind up referring to the same underlying lambda
// if the bodies happen to be identical. In that case, we don't
// want to generate the lambda twice.
std::unordered_set<std::string> lambda_names;
for ( const auto& l : pfs.Lambdas() )
{
const auto& n = l->Name();
if ( lambda_names.count(n) > 0 )
// Skip it.
continue;
DeclareLambda(l, pfs.ExprProf(l).get());
lambda_names.insert(n);
}
NL();
// ... and now generate their bodies.
for ( const auto& func : funcs )
CompileFunc(func);
lambda_names.clear();
for ( const auto& l : pfs.Lambdas() )
{
const auto& n = l->Name();
if ( lambda_names.count(n) > 0 )
continue;
CompileLambda(l, pfs.ExprProf(l).get());
lambda_names.insert(n);
}
for ( const auto& f : compiled_funcs )
RegisterCompiledBody(f);
GenFuncVarInits();
GenEpilog();
}
void CPPCompile::GenProlog()
{
if ( addl_tag == 0 )
{
Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n");
Emit("namespace zeek::detail { //\n");
}
Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir.c_str());
// The following might-or-might-not wind up being populated/used.
Emit("std::vector<int> field_mapping;");
Emit("std::vector<int> enum_mapping;");
NL();
}
void CPPCompile::RegisterCompiledBody(const std::string& f)
{
auto h = body_hashes[f];
auto p = body_priorities[f];
// Build up an initializer of the events relevant to the function.
std::string events;
if ( body_events.count(f) > 0 )
for ( auto e : body_events[f] )
{
if ( events.size() > 0 )
events += ", ";
events = events + "\"" + e + "\"";
}
events = std::string("{") + events + "}";
if ( addl_tag > 0 )
// Hash in the location associated with this compilation
// pass, to get a final hash that avoids conflicts with
// identical-but-in-a-different-context function bodies
// when compiling potentially conflicting additional code
// (which we want to support to enable quicker test suite
// runs by enabling multiple tests to be compiled into the
// same binary).
h = merge_p_hashes(h, p_hash(cf_locs[f]));
auto init = std::string("register_body__CPP(make_intrusive<") +
f + "_cl>(\"" + f + "\"), " + Fmt(p) + ", " +
Fmt(h) + ", " + events + ");";
AddInit(names_to_bodies[f], init);
if ( update )
{
fprintf(hm.HashFile(), "func\n%s%s\n",
scope_prefix(addl_tag).c_str(), f.c_str());
fprintf(hm.HashFile(), "%llu\n", h);
}
}
void CPPCompile::GenEpilog()
{
NL();
for ( const auto& e : init_exprs.DistinctKeys() )
{
GenInitExpr(e);
if ( update )
init_exprs.LogIfNew(e, addl_tag, hm.HashFile());
}
for ( const auto& a : attributes.DistinctKeys() )
{
GenAttrs(a);
if ( update )
attributes.LogIfNew(a, addl_tag, hm.HashFile());
}
// Generate the guts of compound types, and preserve type names
// if present.
for ( const auto& t : types.DistinctKeys() )
{
ExpandTypeVar(t);
if ( update )
types.LogIfNew(t, addl_tag, hm.HashFile());
}
InitializeEnumMappings();
GenPreInits();
std::unordered_set<const Obj*> to_do;
for ( const auto& oi : obj_inits )
to_do.insert(oi.first);
CheckInitConsistency(to_do);
auto nc = GenDependentInits(to_do);
NL();
Emit("void init__CPP()");
StartBlock();
Emit("enum_mapping.resize(%s);\n", Fmt(int(enum_names.size())));
Emit("pre_init__CPP();");
NL();
for ( auto i = 1; i <= nc; ++i )
Emit("init_%s__CPP();", Fmt(i));
// Populate mappings for dynamic offsets.
NL();
InitializeFieldMappings();
EndBlock(true);
GenInitHook();
Emit("} // %s\n\n", scope_prefix(addl_tag).c_str());
if ( update )
UpdateGlobalHashes();
if ( addl_tag > 0 )
return;
Emit("#include \"zeek/script_opt/CPP/CPP-gen-addl.h\"\n");
Emit("} // zeek::detail");
}
bool CPPCompile::IsCompilable(const FuncInfo& func)
{
if ( func.ShouldSkip() )
// Caller marked this function as one to skip.
return false;
if ( hm.HasHash(func.Profile()->HashVal()) )
// We've already compiled it.
return false;
return is_CPP_compilable(func.Profile());
}
} // zeek::detail

View file

@ -0,0 +1,73 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
void CPPCompile::StartBlock()
{
++block_level;
Emit("{");
}
void CPPCompile::EndBlock(bool needs_semi)
{
Emit("}%s", needs_semi ? ";" : "");
--block_level;
}
std::string CPPCompile::GenString(const char* b, int len) const
{
return std::string("make_intrusive<StringVal>(") + Fmt(len) +
", " + CPPEscape(b, len) + ")";
}
std::string CPPCompile::CPPEscape(const char* b, int len) const
{
std::string res = "\"";
for ( int i = 0; i < len; ++i )
{
unsigned char c = b[i];
switch ( c ) {
case '\a': res += "\\a"; break;
case '\b': res += "\\b"; break;
case '\f': res += "\\f"; break;
case '\n': res += "\\n"; break;
case '\r': res += "\\r"; break;
case '\t': res += "\\t"; break;
case '\v': res += "\\v"; break;
case '\\': res += "\\\\"; break;
case '"': res += "\\\""; break;
default:
if ( isprint(c) )
res += c;
else
{
char buf[8192];
snprintf(buf, sizeof buf, "%03o", c);
res += "\\";
res += buf;
}
break;
}
}
return res + "\"";
}
void CPPCompile::Indent() const
{
for ( auto i = 0; i < block_level; ++i )
fprintf(write_file, "%s", "\t");
}
} // zeek::detail

1226
src/script_opt/CPP/Exprs.cc Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,66 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <broker/error.hh>
#include "zeek/Desc.h"
#include "zeek/broker/Data.h"
#include "zeek/script_opt/CPP/Func.h"
namespace zeek::detail {
std::unordered_map<p_hash_type, CompiledScript> compiled_scripts;
std::unordered_map<p_hash_type, void (*)()> standalone_callbacks;
std::vector<void (*)()> standalone_activations;
void CPPFunc::Describe(ODesc* d) const
{
d->AddSP("compiled function");
d->Add(name);
}
CPPLambdaFunc::CPPLambdaFunc(std::string _name, FuncTypePtr ft,
CPPStmtPtr _l_body)
: ScriptFunc(std::move(_name), std::move(ft), {_l_body}, {0})
{
l_body = std::move(_l_body);
}
broker::expected<broker::data> CPPLambdaFunc::SerializeClosure() const
{
auto vals = l_body->SerializeLambdaCaptures();
broker::vector rval;
rval.emplace_back(std::string("CopyFrame"));
broker::vector body;
for ( int i = 0; i < vals.size(); ++i )
{
const auto& val = vals[i];
auto expected = Broker::detail::val_to_data(val.get());
if ( ! expected )
return broker::ec::invalid_data;
TypeTag tag = val->GetType()->Tag();
broker::vector val_tuple {std::move(*expected),
static_cast<broker::integer>(tag)};
body.emplace_back(std::move(val_tuple));
}
rval.emplace_back(std::move(body));
return {std::move(rval)};
}
void CPPLambdaFunc::SetCaptures(Frame* f)
{
l_body->SetLambdaCaptures(f);
}
FuncPtr CPPLambdaFunc::DoClone()
{
return make_intrusive<CPPLambdaFunc>(name, type, l_body->Clone());
}
} // zeek::detail

120
src/script_opt/CPP/Func.h Normal file
View file

@ -0,0 +1,120 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Subclasses of Func and Stmt to support C++-generated code, along
// with tracking of that code to enable hooking into it at run-time.
#pragma once
#include "zeek/Func.h"
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek {
namespace detail {
// A subclass of Func used for lambdas that the compiler creates for
// complex initializations (expressions used in type attributes).
// The usage is via derivation from this class, rather than direct
// use of it.
class CPPFunc : public Func {
public:
bool IsPure() const override { return is_pure; }
void Describe(ODesc* d) const override;
protected:
// Constructor used when deriving subclasses.
CPPFunc(const char* _name, bool _is_pure)
{
name = _name;
is_pure = _is_pure;
}
std::string name;
bool is_pure;
};
// A subclass of Stmt used to replace a function/event handler/hook body.
class CPPStmt : public Stmt {
public:
CPPStmt(const char* _name) : Stmt(STMT_CPP), name(_name) { }
const std::string& Name() { return name; }
// Sets/returns a hash associated with this statement. A value
// of 0 means "not set".
p_hash_type GetHash() const { return hash; }
void SetHash(p_hash_type h) { hash = h; }
// The following only get defined by lambda bodies.
virtual void SetLambdaCaptures(Frame* f) { }
virtual std::vector<ValPtr> SerializeLambdaCaptures() const
{ return std::vector<ValPtr>{}; }
virtual IntrusivePtr<CPPStmt> Clone()
{
return {NewRef{}, this};
}
protected:
// This method being called means that the inliner is running
// on compiled code, which shouldn't happen.
StmtPtr Duplicate() override { ASSERT(0); return ThisPtr(); }
TraversalCode Traverse(TraversalCallback* cb) const override
{ return TC_CONTINUE; }
std::string name;
p_hash_type hash = 0ULL;
};
using CPPStmtPtr = IntrusivePtr<CPPStmt>;
// For script-level lambdas, a ScriptFunc subclass that knows how to
// deal with its captures for serialization. Different from CPPFunc in
// that CPPFunc is for lambdas generated directly by the compiler,
// rather than those explicitly present in scripts.
class CPPLambdaFunc : public ScriptFunc {
public:
CPPLambdaFunc(std::string name, FuncTypePtr ft, CPPStmtPtr l_body);
bool HasCopySemantics() const override { return true; }
protected:
// Methods related to sending lambdas via Broker.
broker::expected<broker::data> SerializeClosure() const override;
void SetCaptures(Frame* f) override;
FuncPtr DoClone() override;
CPPStmtPtr l_body;
};
// Information associated with a given compiled script body: its
// Stmt subclass, priority, and any events that should be registered
// upon instantiating the body.
struct CompiledScript {
CPPStmtPtr body;
int priority;
std::vector<std::string> events;
};
// Maps hashes to compiled information.
extern std::unordered_map<p_hash_type, CompiledScript> compiled_scripts;
// Maps hashes to standalone script initialization callbacks.
extern std::unordered_map<p_hash_type, void (*)()> standalone_callbacks;
// Standalone callbacks marked for activation by calls to the
// load_CPP() BiF.
extern std::vector<void (*)()> standalone_activations;
} // namespace detail
} // namespace zeek

View file

@ -0,0 +1,247 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
void CPPCompile::CompileFunc(const FuncInfo& func)
{
if ( ! IsCompilable(func) )
return;
auto fname = Canonicalize(BodyName(func).c_str()) + "_zf";
auto pf = func.Profile();
auto f = func.Func();
auto body = func.Body();
DefineBody(f->GetType(), pf, fname, body, nullptr, f->Flavor());
}
void CPPCompile::CompileLambda(const LambdaExpr* l, const ProfileFunc* pf)
{
auto lname = Canonicalize(l->Name().c_str()) + "_lb";
auto body = l->Ingredients().body;
auto l_id = l->Ingredients().id;
auto& ids = l->OuterIDs();
DefineBody(l_id->GetType<FuncType>(), pf, lname, body, &ids,
FUNC_FLAVOR_FUNCTION);
}
void CPPCompile::GenInvokeBody(const std::string& fname, const TypePtr& t,
const std::string& args)
{
auto call = fname + "(" + args + ")";
if ( ! t || t->Tag() == TYPE_VOID )
{
Emit("%s;", call);
Emit("return nullptr;");
}
else
Emit("return %s;", NativeToGT(call, t, GEN_VAL_PTR));
}
void CPPCompile::DefineBody(const FuncTypePtr& ft, const ProfileFunc* pf,
const std::string& fname, const StmtPtr& body,
const IDPList* lambda_ids, FunctionFlavor flavor)
{
locals.clear();
params.clear();
body_name = fname;
ret_type = ft->Yield();
in_hook = flavor == FUNC_FLAVOR_HOOK;
auto ret_type_str = in_hook ? "bool" : FullTypeName(ret_type);
for ( const auto& p : pf->Params() )
params.emplace(p);
NL();
Emit("%s %s(%s)", ret_type_str, fname, ParamDecl(ft, lambda_ids, pf));
StartBlock();
// Deal with "any" parameters, if any.
TranslateAnyParams(ft, pf);
// Make sure that any events referred to in this function have
// been initialized.
InitializeEvents(pf);
// Create the local variables.
DeclareLocals(pf, lambda_ids);
GenStmt(body);
if ( in_hook )
{
Emit("return true;");
in_hook = false;
}
// Seatbelts for running off the end of a function that's supposed
// to return a non-native type.
if ( ! IsNativeType(ret_type) )
Emit("return nullptr;");
EndBlock();
}
void CPPCompile::TranslateAnyParams(const FuncTypePtr& ft, const ProfileFunc* pf)
{
const auto& formals = ft->Params();
int n = formals->NumFields();
for ( auto i = 0; i < n; ++i )
{
const auto& t = formals->GetFieldType(i);
if ( t->Tag() != TYPE_ANY )
// Not a relevant parameter.
continue;
auto param_id = FindParam(i, pf);
if ( ! param_id )
// Parameter isn't used, skip it.
continue;
const auto& pt = param_id->GetType();
if ( pt->Tag() == TYPE_ANY )
// It's already "any", nothing more to do.
continue;
auto any_i = std::string("any_param__CPP_") + Fmt(i);
Emit("%s %s = %s;", FullTypeName(pt), LocalName(param_id),
GenericValPtrToGT(any_i, pt, GEN_NATIVE));
}
}
void CPPCompile::InitializeEvents(const ProfileFunc* pf)
{
// Make sure that any events referred to in this function have
// been initialized. We have to do this dynamically because it
// depends on whether the final script using the compiled code
// happens to load the associated event handler
for ( const auto& e : pf->Events() )
{
auto ev_name = globals[e] + "_ev";
// Create a scope so we don't have to individualize the
// variables.
Emit("{");
Emit("static bool did_init = false;");
Emit("if ( ! did_init )");
StartBlock();
// We do both a Lookup and a Register because only the latter
// returns an EventHandlerPtr, sigh.
Emit("if ( event_registry->Lookup(\"%s\") )", e);
StartBlock();
Emit("%s = event_registry->Register(\"%s\");", ev_name.c_str(), e);
EndBlock();
Emit("did_init = true;");
EndBlock();
Emit("}");
}
}
void CPPCompile::DeclareLocals(const ProfileFunc* pf, const IDPList* lambda_ids)
{
// It's handy to have a set of the lambda captures rather than a list.
std::unordered_set<const ID*> lambda_set;
if ( lambda_ids )
for ( auto li : *lambda_ids )
lambda_set.insert(li);
const auto& ls = pf->Locals();
// Track whether we generated a declaration. This is just for
// tidiness in the output.
bool did_decl = false;
for ( const auto& l : ls )
{
auto ln = LocalName(l);
if ( lambda_set.count(l) > 0 )
// No need to declare these, they're passed in as
// parameters.
ln = lambda_names[l];
else if ( params.count(l) == 0 )
{ // Not a parameter, so must be a local.
Emit("%s %s;", FullTypeName(l->GetType()), ln);
did_decl = true;
}
locals.emplace(l, ln);
}
if ( did_decl )
NL();
}
std::string CPPCompile::BodyName(const FuncInfo& func)
{
const auto& f = func.Func();
const auto& bodies = f->GetBodies();
std::string fname = f->Name();
if ( bodies.size() == 1 )
return fname;
// Make the name distinct-per-body.
const auto& body = func.Body();
int i;
for ( i = 0; i < bodies.size(); ++i )
if ( bodies[i].stmts == body )
break;
if ( i >= bodies.size() )
reporter->InternalError("can't find body in CPPCompile::BodyName");
return fname + "__" + Fmt(i);
}
std::string CPPCompile::GenArgs(const RecordTypePtr& params, const Expr* e)
{
const auto& exprs = e->AsListExpr()->Exprs();
std::string gen;
int n = exprs.size();
for ( auto i = 0; i < n; ++i )
{
auto e_i = exprs[i];
auto gt = GEN_NATIVE;
const auto& param_t = params->GetFieldType(i);
bool param_any = param_t->Tag() == TYPE_ANY;
bool arg_any = e_i->GetType()->Tag() == TYPE_ANY;
if ( param_any && ! arg_any )
gt = GEN_VAL_PTR;
auto expr_gen = GenExpr(e_i, gt);
if ( ! param_any && arg_any )
expr_gen = GenericValPtrToGT(expr_gen, param_t, GEN_NATIVE);
gen = gen + expr_gen;
if ( i < n - 1 )
gen += ", ";
}
return gen;
}
} // zeek::detail

View file

@ -0,0 +1,166 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/Util.h"
namespace zeek::detail {
VarMapper compiled_items;
CPPHashManager::CPPHashManager(const char* hash_name_base, bool _append)
{
append = _append;
hash_name = std::string(hash_name_base) + ".dat";
if ( append )
{
hf_r = fopen(hash_name.c_str(), "r");
if ( ! hf_r )
{
reporter->Error("can't open auxiliary C++ hash file %s for reading",
hash_name.c_str());
exit(1);
}
lock_file(hash_name, hf_r);
LoadHashes(hf_r);
}
auto mode = append ? "a" : "w";
hf_w = fopen(hash_name.c_str(), mode);
if ( ! hf_w )
{
reporter->Error("can't open auxiliary C++ hash file %s for writing",
hash_name.c_str());
exit(1);
}
}
CPPHashManager::~CPPHashManager()
{
fclose(hf_w);
if ( hf_r )
{
unlock_file(hash_name, hf_r);
fclose(hf_r);
}
}
void CPPHashManager::LoadHashes(FILE* f)
{
std::string key;
// The hash file format is inefficient but simple to scan.
// It doesn't appear to pose a bottleneck, so until it does
// it makes sense for maintainability to keep it dead simple.
while ( GetLine(f, key) )
{
std::string line;
RequireLine(f, line);
p_hash_type hash;
if ( key == "func" )
{
auto func = line;
RequireLine(f, line);
if ( sscanf(line.c_str(), "%llu", &hash) != 1 || hash == 0 )
BadLine(line);
previously_compiled[hash] = func;
}
else if ( key == "global" )
{
auto gl = line;
RequireLine(f, line);
p_hash_type gl_t_h, gl_v_h;
if ( sscanf(line.c_str(), "%llu %llu",
&gl_t_h, &gl_v_h) != 2 )
BadLine(line);
gl_type_hashes[gl] = gl_t_h;
gl_val_hashes[gl] = gl_v_h;
// Eat the location info. It's there just for
// maintainers to be able to track down peculiarities
// in the hash file.
(void) RequireLine(f, line);
}
else if ( key == "global-var" )
{
auto gl = line;
RequireLine(f, line);
int scope;
if ( sscanf(line.c_str(), "%d", &scope) != 1 )
BadLine(line);
gv_scopes[gl] = scope;
}
else if ( key == "hash" )
{
int index;
int scope;
if ( sscanf(line.c_str(), "%llu %d %d", &hash, &index,
&scope) != 3 || hash == 0 )
BadLine(line);
compiled_items[hash] = CompiledItemPair{index, scope};
}
else if ( key == "record" )
record_type_globals.insert(line);
else if ( key == "enum" )
enum_type_globals.insert(line);
else
BadLine(line);
}
}
void CPPHashManager::RequireLine(FILE* f, std::string& line)
{
if ( ! GetLine(f, line) )
{
reporter->Error("missing final %s hash file entry", hash_name.c_str());
exit(1);
}
}
bool CPPHashManager::GetLine(FILE* f, std::string& line)
{
char buf[8192];
if ( ! fgets(buf, sizeof buf, f) )
return false;
int n = strlen(buf);
if ( n > 0 && buf[n-1] == '\n' )
buf[n-1] = '\0';
line = buf;
return true;
}
void CPPHashManager::BadLine(std::string& line)
{
reporter->Error("bad %s hash file entry: %s",
hash_name.c_str(), line.c_str());
exit(1);
}
} // zeek::detail

View file

@ -0,0 +1,122 @@
// See the file "COPYING" in the main distribution directory for copyright.
// C++ compiler support class for managing information about compiled
// objects across compilations. The objects are identified via hashes,
// hence the term "hash manager". Objects can exist in different scopes.
// The information mapping hashes to objects and scopes is tracked
// across multiple compilations using intermediary file(s).
#pragma once
#include <stdio.h>
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek::detail {
class CPPHashManager {
public:
// Create a hash manager that uses the given name for
// referring to hash file(s). It's a "base" rather than
// a full name in case the manager winds up managing multiple
// distinct files (not currently the case).
//
// If "append" is true then new hashes will be added to the
// end of the file (and the hash file will be locked, to prevent
// overlapping updates from concurrent compilation/appends).
// Otherwise, the file will be generated afresh.
CPPHashManager(const char* hash_name_base, bool append);
~CPPHashManager();
bool IsAppend() const { return append; }
// True if the given hash has already been generated.
bool HasHash(p_hash_type h) const
{ return previously_compiled.count(h) > 0; }
// The internal (C++) name of a previously compiled function,
// as identified by its hash.
const std::string& FuncBodyName(p_hash_type h)
{ return previously_compiled[h]; }
// Whether the given global has already been generated;
// and, if so, the hashes of its type and initialization
// value (used for consistency checking). Here the name
// is that used at the script level.
bool HasGlobal(const std::string& gl) const
{ return gl_type_hashes.count(gl) > 0; }
p_hash_type GlobalTypeHash(const std::string& gl)
{ return gl_type_hashes[gl]; }
p_hash_type GlobalValHash(const std::string& gl)
{ return gl_val_hashes[gl]; }
// Whether the given C++ global already exists, and, if so,
// in what scope.
bool HasGlobalVar(const std::string& gv) const
{ return gv_scopes.count(gv) > 0; }
int GlobalVarScope(const std::string& gv)
{ return gv_scopes[gv]; }
// True if the given global corresponds to a record type
// or an enum type. Used to suppress complaints about
// definitional inconsistencies for extensible types.
bool HasRecordTypeGlobal(const std::string& rt) const
{ return record_type_globals.count(rt) > 0; }
bool HasEnumTypeGlobal(const std::string& et) const
{ return enum_type_globals.count(et) > 0; }
// Access to the file we're writing hashes to, so that the
// compiler can add new entries to it.
FILE* HashFile() const { return hf_w; }
protected:
// Parses an existing file with hash information.
void LoadHashes(FILE* f);
// Helper routines to load lines from hash file.
// The first complains if the line isn't present;
// the second merely indicates whether it was.
void RequireLine(FILE* f, std::string& line);
bool GetLine(FILE* f, std::string& line);
// Generates an error message for a ill-formatted hash file line.
void BadLine(std::string& line);
// Tracks previously compiled bodies based on hashes, mapping them
// to fully qualified (in terms of scoping) C++ names.
std::unordered_map<p_hash_type, std::string> previously_compiled;
// Tracks globals that are record or enum types, indexed using
// script-level names.
std::unordered_set<std::string> record_type_globals;
std::unordered_set<std::string> enum_type_globals;
// Tracks globals seen in previously compiled bodies, mapping
// script-level names to hashes of their types and their values.
std::unordered_map<std::string, p_hash_type> gl_type_hashes;
std::unordered_map<std::string, p_hash_type> gl_val_hashes;
// Information about globals in terms of their internal variable
// names, rather than their script-level names.
std::unordered_map<std::string, int> gv_scopes;
// Whether we're appending to existing hash file(s), or starting
// afresh.
bool append;
// Base for file names.
std::string hash_name;
// Handles for reading from and writing to the hash file.
// We lock on the first
FILE* hf_r = nullptr;
FILE* hf_w = nullptr;
};
// Maps hashes to indices into C++ globals (like "types_N__CPP"), and
// namespace scopes.
struct CompiledItemPair { int index; int scope; };
using VarMapper = std::unordered_map<p_hash_type, CompiledItemPair>;
extern VarMapper compiled_items;
} // zeek::detail

33
src/script_opt/CPP/ISSUES Normal file
View file

@ -0,0 +1,33 @@
conditional code:
- top-level conditionals okay due to hash protection
- but lower-level, directly called, won't translate
possible approaches:
- warn when compiling such functions
- an option to always do Invoke's rather than direct calls
- rewrite scripts to use run-time conditionals
(in base scripts, it's almost all regarding clusters)
lambdas: not supported if they refer to events that are otherwise not registered
(not all that hard to fix)
standalone code won't execute global statements
standalone code needs to deal with field_mapping initializations the
same as enum_mapping
type switches:
- easy to support by some sort of hash on the type
when's:
- need to understand "return when" semantics
slow compilation:
- analyze whether there's a bunch of unneeded stuff (e.g. orphan types)
efficiency:
- leverage ZVal's directly
- directly calling BiFs
- best done by supplanting bifcl
- event handlers directly called, using vector<ZVal> arguments
- import custom BiFs (e.g. network_time()) from ZAM

554
src/script_opt/CPP/Inits.cc Normal file
View file

@ -0,0 +1,554 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
void CPPCompile::GenInitExpr(const ExprPtr& e)
{
NL();
const auto& t = e->GetType();
auto ename = InitExprName(e);
// First, create a CPPFunc that we can compile to compute 'e'.
auto name = std::string("wrapper_") + ename;
// Forward declaration of the function that computes 'e'.
Emit("static %s %s(Frame* f__CPP);", FullTypeName(t), name);
// Create the Func subclass that can be used in a CallExpr to
// evaluate 'e'.
Emit("class %s_cl : public CPPFunc", name);
StartBlock();
Emit("public:");
Emit("%s_cl() : CPPFunc(\"%s\", %s)", name, name, e->IsPure() ? "true" : "false");
StartBlock();
Emit("type = make_intrusive<FuncType>(make_intrusive<RecordType>(new type_decl_list()), %s, FUNC_FLAVOR_FUNCTION);", GenTypeName(t));
NoteInitDependency(e, TypeRep(t));
EndBlock();
Emit("ValPtr Invoke(zeek::Args* args, Frame* parent) const override final");
StartBlock();
if ( IsNativeType(t) )
GenInvokeBody(name, t, "parent");
else
Emit("return %s(parent);", name);
EndBlock();
EndBlock(true);
// Now the implementation of computing 'e'.
Emit("static %s %s(Frame* f__CPP)", FullTypeName(t), name);
StartBlock();
Emit("return %s;", GenExpr(e, GEN_NATIVE));
EndBlock();
Emit("CallExprPtr %s;", ename);
NoteInitDependency(e, TypeRep(t));
AddInit(e, ename, std::string("make_intrusive<CallExpr>(make_intrusive<ConstExpr>(make_intrusive<FuncVal>(make_intrusive<") +
name + "_cl>())), make_intrusive<ListExpr>(), false)");
}
bool CPPCompile::IsSimpleInitExpr(const ExprPtr& e) const
{
switch ( e->Tag() ) {
case EXPR_CONST:
case EXPR_NAME:
return true;
case EXPR_RECORD_COERCE:
{ // look for coercion of empty record
auto op = e->GetOp1();
if ( op->Tag() != EXPR_RECORD_CONSTRUCTOR )
return false;
auto rc = static_cast<const RecordConstructorExpr*>(op.get());
const auto& exprs = rc->Op()->AsListExpr()->Exprs();
return exprs.length() == 0;
}
default:
return false;
}
}
std::string CPPCompile::InitExprName(const ExprPtr& e)
{
return init_exprs.KeyName(e);
}
void CPPCompile::GenGlobalInit(const ID* g, std::string& gl, const ValPtr& v)
{
const auto& t = v->GetType();
auto tag = t->Tag();
if ( tag == TYPE_FUNC )
// This should get initialized by recognizing hash of
// the function's body.
return;
std::string init_val;
if ( tag == TYPE_OPAQUE )
{
// We can only generate these by reproducing the expression
// (presumably a function call) used to create the value.
// That isn't fully sound, since if the global's value
// was redef'd in terms of its original value (e.g.,
// "redef x = f(x)"), then we'll wind up with a broken
// expression. It's difficult to detect that in full
// generality, so um Don't Do That. (Note that this
// only affects execution of standalone compiled code,
// where the original scripts are replaced by load-stubs.
// If the scripts are available, then the HasVal() test
// we generate will mean we don't wind up using this
// expression anyway.)
// Use the final initialization expression.
auto& init_exprs = g->GetInitExprs();
init_val = GenExpr(init_exprs.back(), GEN_VAL_PTR, false);
}
else
init_val = BuildConstant(g, v);
auto& attrs = g->GetAttrs();
AddInit(g, std::string("if ( ! ") + gl + "->HasVal() )");
if ( attrs )
{
RegisterAttributes(attrs);
AddInit(g, "\t{");
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
AddInit(g, "\t" + gl + "->SetAttrs(" + AttrsName(attrs) + ");");
AddInit(g, "\t}");
}
else
AddInit(g, "\t" + gl + "->SetVal(" + init_val + ");");
}
void CPPCompile::GenFuncVarInits()
{
for ( const auto& fv_init : func_vars )
{
auto& fv = fv_init.first;
auto& const_name = fv_init.second;
auto f = fv->AsFunc();
const auto& fn = f->Name();
const auto& ft = f->GetType();
NoteInitDependency(fv, TypeRep(ft));
const auto& bodies = f->GetBodies();
std::string hashes = "{";
for ( auto b : bodies )
{
auto body = b.stmts.get();
ASSERT(body_names.count(body) > 0);
auto& body_name = body_names[body];
ASSERT(body_hashes.count(body_name) > 0);
NoteInitDependency(fv, body);
if ( hashes.size() > 1 )
hashes += ", ";
hashes += Fmt(body_hashes[body_name]);
}
hashes += "}";
auto init = std::string("lookup_func__CPP(\"") + fn +
"\", " + hashes + ", " + GenTypeName(ft) + ")";
AddInit(fv, const_name, init);
}
}
void CPPCompile::GenPreInit(const Type* t)
{
std::string pre_init;
switch ( t->Tag() ) {
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_VOID:
pre_init = std::string("base_type(") + TypeTagName(t->Tag()) + ")";
break;
case TYPE_ENUM:
pre_init = std::string("get_enum_type__CPP(\"") +
t->GetName() + "\")";
break;
case TYPE_SUBNET:
pre_init = std::string("make_intrusive<SubNetType>()");
break;
case TYPE_FILE:
pre_init = std::string("make_intrusive<FileType>(") +
GenTypeName(t->AsFileType()->Yield()) + ")";
break;
case TYPE_OPAQUE:
pre_init = std::string("make_intrusive<OpaqueType>(\"") +
t->AsOpaqueType()->Name() + "\")";
break;
case TYPE_RECORD:
{
std::string name;
if ( t->GetName() != "" )
name = std::string("\"") + t->GetName() + std::string("\"");
else
name = "nullptr";
pre_init = std::string("get_record_type__CPP(") + name + ")";
}
break;
case TYPE_LIST:
pre_init = std::string("make_intrusive<TypeList>()");
break;
case TYPE_TYPE:
case TYPE_VECTOR:
case TYPE_TABLE:
case TYPE_FUNC:
// Nothing to do for these, pre-initialization-wise.
return;
default:
reporter->InternalError("bad type in CPPCompile::GenType");
}
pre_inits.emplace_back(GenTypeName(t) + " = " + pre_init + ";");
}
void CPPCompile::GenPreInits()
{
NL();
Emit("void pre_init__CPP()");
StartBlock();
for ( const auto& i : pre_inits )
Emit(i);
EndBlock();
}
void CPPCompile::AddInit(const Obj* o, const std::string& init)
{
obj_inits[o].emplace_back(init);
}
void CPPCompile::AddInit(const Obj* o)
{
if ( obj_inits.count(o) == 0 )
{
std::vector<std::string> empty;
obj_inits[o] = empty;
}
}
void CPPCompile::NoteInitDependency(const Obj* o1, const Obj* o2)
{
obj_deps[o1].emplace(o2);
}
void CPPCompile::CheckInitConsistency(std::unordered_set<const Obj*>& to_do)
{
for ( const auto& od : obj_deps )
{
const auto& o = od.first;
if ( to_do.count(o) == 0 )
{
fprintf(stderr, "object not in to_do: %s\n",
obj_desc(o).c_str());
exit(1);
}
for ( const auto& d : od.second )
{
if ( to_do.count(d) == 0 )
{
fprintf(stderr, "dep object for %s not in to_do: %s\n",
obj_desc(o).c_str(), obj_desc(d).c_str());
exit(1);
}
}
}
}
int CPPCompile::GenDependentInits(std::unordered_set<const Obj*>& to_do)
{
int n = 0;
// The basic approach is fairly brute force: find elements of
// to_do that don't have any pending dependencies; generate those;
// and remove them from the to_do list, freeing up other to_do entries
// to now not having any pending dependencies. Iterate until there
// are no more to-do items.
while ( to_do.size() > 0 )
{
std::unordered_set<const Obj*> cohort;
for ( const auto& o : to_do )
{
const auto& od = obj_deps.find(o);
bool has_pending_dep = false;
if ( od != obj_deps.end() )
{
for ( const auto& d : od->second )
if ( to_do.count(d) > 0 )
{
has_pending_dep = true;
break;
}
}
if ( has_pending_dep )
continue;
cohort.insert(o);
}
ASSERT(cohort.size() > 0);
GenInitCohort(++n, cohort);
for ( const auto& o : cohort )
{
ASSERT(to_do.count(o) > 0);
to_do.erase(o);
}
}
return n;
}
void CPPCompile::GenInitCohort(int nc, std::unordered_set<const Obj*>& cohort)
{
NL();
Emit("void init_%s__CPP()", Fmt(nc));
StartBlock();
// If any script/BiF functions are used for initializing globals,
// the code generated from that will expect the presence of a
// frame pointer, even if nil.
Emit("Frame* f__CPP = nullptr;");
// The following is just for making the output readable/pretty:
// add space between initializations for distinct objects, taking
// into account that some objects have empty initializations.
bool did_an_init = false;
for ( auto o : cohort )
{
if ( did_an_init )
{
NL();
did_an_init = false;
}
for ( const auto& i : obj_inits.find(o)->second )
{
Emit("%s", i);
did_an_init = true;
}
}
EndBlock();
}
void CPPCompile::InitializeFieldMappings()
{
Emit("int fm_offset;");
for ( const auto& mapping : field_decls )
{
auto rt = mapping.first;
auto td = mapping.second;
auto fn = td->id;
auto rt_name = GenTypeName(rt) + "->AsRecordType()";
Emit("fm_offset = %s->FieldOffset(\"%s\");", rt_name, fn);
Emit("if ( fm_offset < 0 )");
StartBlock();
Emit("// field does not exist, create it");
Emit("fm_offset = %s->NumFields();", rt_name);
Emit("type_decl_list tl;");
Emit(GenTypeDecl(td));
Emit("%s->AddFieldsDirectly(tl);", rt_name);
EndBlock();
Emit("field_mapping.push_back(fm_offset);");
}
}
void CPPCompile::InitializeEnumMappings()
{
int n = 0;
for ( const auto& mapping : enum_names )
InitializeEnumMappings(mapping.first, mapping.second, n++);
}
void CPPCompile::InitializeEnumMappings(const EnumType* et,
const std::string& e_name, int index)
{
AddInit(et, "{");
auto et_name = GenTypeName(et) + "->AsEnumType()";
AddInit(et, "int em_offset = " + et_name +
"->Lookup(\"" + e_name + "\");");
AddInit(et, "if ( em_offset < 0 )");
AddInit(et, "\t{");
AddInit(et, "\tem_offset = " + et_name + "->Names().size();");
// The following is to catch the case where the offset is already
// in use due to it being specified explicitly for an existing enum.
AddInit(et, "\tif ( " + et_name + "->Lookup(em_offset) )");
AddInit(et, "\t\treporter->InternalError(\"enum inconsistency while initializing compiled scripts\");");
AddInit(et, "\t" + et_name +
"->AddNameInternal(\"" + e_name + "\", em_offset);");
AddInit(et, "\t}");
AddInit(et, "enum_mapping[" + Fmt(index) + "] = em_offset;");
AddInit(et, "}");
}
void CPPCompile::GenInitHook()
{
NL();
if ( standalone )
GenStandaloneActivation();
Emit("int hook_in_init()");
StartBlock();
Emit("CPP_init_funcs.push_back(init__CPP);");
if ( standalone )
GenLoad();
Emit("return 0;");
EndBlock();
// Trigger the activation of the hook at run-time.
NL();
Emit("static int dummy = hook_in_init();\n");
}
void CPPCompile::GenStandaloneActivation()
{
Emit("void standalone_init__CPP()");
StartBlock();
// For events and hooks, we need to add each compiled body *unless*
// it's already there (which could be the case if the standalone
// code wasn't run standalone but instead with the original scripts).
// For events, we also register them in order to activate the
// associated scripts.
// First, build up a list of per-hook/event handler bodies.
std::unordered_map<const Func*, std::vector<p_hash_type>> func_bodies;
for ( const auto& func : funcs )
{
auto f = func.Func();
if ( f->Flavor() == FUNC_FLAVOR_FUNCTION )
// No need to explicitly add bodies.
continue;
auto fname = BodyName(func);
auto bname = Canonicalize(fname.c_str()) + "_zf";
if ( compiled_funcs.count(bname) == 0 )
// We didn't wind up compiling it.
continue;
ASSERT(body_hashes.count(bname) > 0);
func_bodies[f].push_back(body_hashes[bname]);
}
for ( auto& fb : func_bodies )
{
auto f = fb.first;
const auto fn = f->Name();
const auto& ft = f->GetType();
std::string hashes;
for ( auto h : fb.second )
{
if ( hashes.size() > 0 )
hashes += ", ";
hashes += Fmt(h);
}
hashes = "{" + hashes + "}";
Emit("activate_bodies__CPP(\"%s\", %s, %s);",
fn, GenTypeName(ft), hashes);
}
EndBlock();
NL();
}
void CPPCompile::GenLoad()
{
// First, generate a hash unique to this compilation.
auto t = util::current_time();
auto th = std::hash<double>{}(t);
total_hash = merge_p_hashes(total_hash, th);
Emit("register_scripts__CPP(%s, standalone_init__CPP);", Fmt(total_hash));
// Spit out the placeholder script.
printf("global init_CPP_%llu = load_CPP(%llu);\n",
total_hash, total_hash);
}
} // zeek::detail

View file

@ -0,0 +1,37 @@
// See the file "COPYING" in the main distribution directory for copyright.
#pragma once
#include "zeek/module_util.h"
#include "zeek/ZeekString.h"
#include "zeek/Func.h"
#include "zeek/File.h"
#include "zeek/Frame.h"
#include "zeek/Scope.h"
#include "zeek/RE.h"
#include "zeek/IPAddr.h"
#include "zeek/Val.h"
#include "zeek/OpaqueVal.h"
#include "zeek/Expr.h"
#include "zeek/Event.h"
#include "zeek/EventRegistry.h"
#include "zeek/RunState.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/RuntimeInit.h"
#include "zeek/script_opt/CPP/RuntimeOps.h"
#include "zeek/script_opt/CPP/RuntimeVec.h"
namespace zeek {
using BoolValPtr = IntrusivePtr<zeek::BoolVal>;
using CountValPtr = IntrusivePtr<zeek::CountVal>;
using DoubleValPtr = IntrusivePtr<zeek::DoubleVal>;
using StringValPtr = IntrusivePtr<zeek::StringVal>;
using IntervalValPtr = IntrusivePtr<zeek::IntervalVal>;
using PatternValPtr = IntrusivePtr<zeek::PatternVal>;
using FuncValPtr = IntrusivePtr<zeek::FuncVal>;
using FileValPtr = IntrusivePtr<zeek::FileVal>;
using SubNetValPtr = IntrusivePtr<zeek::SubNetVal>;
}

View file

@ -0,0 +1,209 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/module_util.h"
#include "zeek/EventRegistry.h"
#include "zeek/script_opt/CPP/RuntimeInit.h"
namespace zeek::detail {
std::vector<CPP_init_func> CPP_init_funcs;
// Calls all of the initialization hooks, in the order they were added.
void init_CPPs()
{
for ( auto f : CPP_init_funcs )
f();
}
// This is a trick used to register the presence of compiled code.
// The initialization of the static variable will make CPP_init_hook
// non-null, which the main part of Zeek uses to tell that there's
// CPP code available.
static int flag_init_CPP()
{
CPP_init_hook = init_CPPs;
return 0;
}
static int dummy = flag_init_CPP();
void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
std::vector<std::string> events)
{
compiled_scripts[hash] = { std::move(body), priority, std::move(events) };
}
void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash, const char* name,
TypePtr t, bool has_captures)
{
auto ft = cast_intrusive<FuncType>(t);
// Create the quasi-global.
auto id = install_ID(name, GLOBAL_MODULE_NAME, true, false);
auto func = make_intrusive<CPPLambdaFunc>(name, ft, body);
func->SetName(name);
auto v = make_intrusive<FuncVal>(std::move(func));
id->SetVal(std::move(v));
id->SetType(ft);
// Lambdas used in initializing global functions need to
// be registered, so that the initialization can find them.
// We do not, however, want to register *all* lambdas, because
// the ones that use captures cannot be used as regular
// function bodies.
if ( ! has_captures )
// Note, no support for lambdas that themselves refer
// to events.
register_body__CPP(body, 0, hash, {});
}
void register_scripts__CPP(p_hash_type h, void (*callback)())
{
ASSERT(standalone_callbacks.count(h) == 0);
standalone_callbacks[h] = callback;
}
void activate_bodies__CPP(const char* fn, TypePtr t,
std::vector<p_hash_type> hashes)
{
auto ft = cast_intrusive<FuncType>(t);
auto fg = lookup_ID(fn, GLOBAL_MODULE_NAME, false, false, false);
if ( ! fg )
{
fg = install_ID(fn, GLOBAL_MODULE_NAME, true, false);
fg->SetType(ft);
}
auto f = fg->GetVal()->AsFunc();
const auto& bodies = f->GetBodies();
// Track hashes of compiled bodies already associated with f.
std::unordered_set<p_hash_type> existing_CPP_bodies;
for ( auto& b : bodies )
{
auto s = b.stmts;
if ( s->Tag() != STMT_CPP )
continue;
const auto& cpp_s = cast_intrusive<CPPStmt>(s);
existing_CPP_bodies.insert(cpp_s->GetHash());
}
// Events we need to register.
std::unordered_set<std::string> events;
if ( ft->Flavor() == FUNC_FLAVOR_EVENT )
events.insert(fn);
std::vector<detail::IDPtr> no_inits; // empty initialization vector
int num_params = ft->Params()->NumFields();
for ( auto h : hashes )
{
if ( existing_CPP_bodies.count(h) > 0 )
// We're presumably running with the original script,
// and have already incorporated this compiled body
// into f.
continue;
// Add in the new body.
ASSERT(compiled_scripts.count(h) > 0);
auto cs = compiled_scripts[h];
f->AddBody(cs.body, no_inits, num_params, cs.priority);
events.insert(cs.events.begin(), cs.events.end());
}
for ( const auto& e : events )
{
auto eh = event_registry->Register(e);
eh->SetUsed();
}
}
IDPtr lookup_global__CPP(const char* g, const TypePtr& t)
{
auto gl = lookup_ID(g, GLOBAL_MODULE_NAME, false, false, false);
if ( ! gl )
{
gl = install_ID(g, GLOBAL_MODULE_NAME, true, false);
gl->SetType(t);
}
return gl;
}
Func* lookup_bif__CPP(const char* bif)
{
auto b = lookup_ID(bif, GLOBAL_MODULE_NAME, false, false, false);
return b ? b->GetVal()->AsFunc() : nullptr;
}
FuncValPtr lookup_func__CPP(std::string name, std::vector<p_hash_type> hashes,
const TypePtr& t)
{
auto ft = cast_intrusive<FuncType>(t);
std::vector<StmtPtr> bodies;
std::vector<int> priorities;
for ( auto h : hashes )
{
ASSERT(compiled_scripts.count(h) > 0);
const auto& f = compiled_scripts[h];
bodies.push_back(f.body);
priorities.push_back(f.priority);
// This might register the same event more than once,
// if it's used in multiple bodies, but that's okay as
// the semantics for Register explicitly allow it.
for ( auto& e : f.events )
{
auto eh = event_registry->Register(e);
eh->SetUsed();
}
}
auto sf = make_intrusive<ScriptFunc>(std::move(name), std::move(ft),
std::move(bodies),
std::move(priorities));
return make_intrusive<FuncVal>(std::move(sf));
}
RecordTypePtr get_record_type__CPP(const char* record_type_name)
{
IDPtr existing_type;
if ( record_type_name &&
(existing_type = global_scope()->Find(record_type_name)) &&
existing_type->GetType()->Tag() == TYPE_RECORD )
return cast_intrusive<RecordType>(existing_type->GetType());
return make_intrusive<RecordType>(new type_decl_list());
}
EnumTypePtr get_enum_type__CPP(const std::string& enum_type_name)
{
auto existing_type = global_scope()->Find(enum_type_name);
if ( existing_type && existing_type->GetType()->Tag() == TYPE_ENUM )
return cast_intrusive<EnumType>(existing_type->GetType());
else
return make_intrusive<EnumType>(enum_type_name);
}
EnumValPtr make_enum__CPP(TypePtr t, int i)
{
auto et = cast_intrusive<EnumType>(std::move(t));
return make_intrusive<EnumVal>(et, i);
}
} // namespace zeek::detail

View file

@ -0,0 +1,76 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Run-time support for initializing C++-compiled scripts.
#pragma once
#include "zeek/Val.h"
#include "zeek/script_opt/CPP/Func.h"
namespace zeek {
using FuncValPtr = IntrusivePtr<zeek::FuncVal>;
namespace detail {
// An initialization hook for a collection of compiled-to-C++ functions
// (the result of a single invocation of the compiler on a set of scripts).
typedef void (*CPP_init_func)();
// Tracks the initialization hooks for different compilation runs.
extern std::vector<CPP_init_func> CPP_init_funcs;
// Registers the given compiled function body as associated with the
// given priority and hash. "events" is a list of event handlers
// relevant for the function body, which should be registered if the
// function body is going to be used.
extern void register_body__CPP(CPPStmtPtr body, int priority, p_hash_type hash,
std::vector<std::string> events);
// Registers a lambda body as associated with the given hash. Includes
// the name of the lambda (so it can be made available as a quasi-global
// identifier), its type, and whether it needs captures.
extern void register_lambda__CPP(CPPStmtPtr body, p_hash_type hash,
const char* name, TypePtr t,
bool has_captures);
// Registers a callback for activating a set of scripts associated with
// the given hash.
extern void register_scripts__CPP(p_hash_type h, void (*callback)());
// Activates the event handler/hook with the given name (which is created
// if it doesn't exist) and type, using (at least) the bodies associated
// with the given hashes.
extern void activate_bodies__CPP(const char* fn, TypePtr t,
std::vector<p_hash_type> hashes);
// Looks for a global with the given name. If not present, creates it
// with the given type.
extern IDPtr lookup_global__CPP(const char* g, const TypePtr& t);
// Looks for a BiF with the given name. Returns nil if not present.
extern Func* lookup_bif__CPP(const char* bif);
// For the function body associated with the given hash, creates and
// returns an associated FuncVal. It's a fatal error for the hash
// not to exist, because this function should only be called by compiled
// code that has ensured its existence.
extern FuncValPtr lookup_func__CPP(std::string name, std::vector<p_hash_type> h,
const TypePtr& t);
// Returns the record corresponding to the given name, as long as the
// name is indeed a record type. Otherwise (or if the name is nil)
// creates a new empty record.
extern RecordTypePtr get_record_type__CPP(const char* record_type_name);
// Returns the "enum" type corresponding to the given name, as long as
// the name is indeed an enum type. Otherwise, creates a new enum
// type with the given name.
extern EnumTypePtr get_enum_type__CPP(const std::string& enum_type_name);
// Returns an enum value corresponding to the given low-level value 'i'
// in the context of the given enum type 't'.
extern EnumValPtr make_enum__CPP(TypePtr t, int i);
} // namespace zeek::detail
} // namespace zeek

View file

@ -0,0 +1,232 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/ZeekString.h"
#include "zeek/RunState.h"
#include "zeek/EventRegistry.h"
#include "zeek/IPAddr.h"
#include "zeek/script_opt/CPP/RuntimeOps.h"
namespace zeek::detail {
StringValPtr str_concat__CPP(const String* s1, const String* s2)
{
std::vector<const String*> strings(2);
strings[0] = s1;
strings[1] = s2;
return make_intrusive<StringVal>(concatenate(strings));
}
bool str_in__CPP(const String* s1, const String* s2)
{
auto s = reinterpret_cast<const unsigned char*>(s1->CheckString());
return util::strstr_n(s2->Len(), s2->Bytes(), s1->Len(), s) != -1;
}
ListValPtr index_val__CPP(std::vector<ValPtr> indices)
{
auto ind_v = make_intrusive<ListVal>(TYPE_ANY);
// In the future, we could provide N versions of this that
// unroll the loop.
for ( auto i : indices )
ind_v->Append(i);
return ind_v;
}
ValPtr index_table__CPP(const TableValPtr& t, std::vector<ValPtr> indices)
{
auto v = t->FindOrDefault(index_val__CPP(std::move(indices)));
if ( ! v )
reporter->CPPRuntimeError("no such index");
return v;
}
ValPtr index_vec__CPP(const VectorValPtr& vec, int index)
{
auto v = vec->ValAt(index);
if ( ! v )
reporter->CPPRuntimeError("no such index");
return v;
}
ValPtr index_string__CPP(const StringValPtr& svp, std::vector<ValPtr> indices)
{
return index_string(svp->AsString(),
index_val__CPP(std::move(indices)).get());
}
ValPtr set_event__CPP(IDPtr g, ValPtr v, EventHandlerPtr& gh)
{
g->SetVal(std::move(v));
gh = event_registry->Register(g->Name());
return v;
}
ValPtr cast_value_to_type__CPP(const ValPtr& v, const TypePtr& t)
{
auto result = cast_value_to_type(v.get(), t.get());
if ( ! result )
reporter->CPPRuntimeError("invalid cast of value with type '%s' to type '%s'",
type_name(v->GetType()->Tag()), type_name(t->Tag()));
return result;
}
SubNetValPtr addr_mask__CPP(const IPAddr& a, uint32_t mask)
{
if ( a.GetFamily() == IPv4 )
{
if ( mask > 32 )
reporter->CPPRuntimeError("bad IPv4 subnet prefix length: %d", int(mask));
}
else
{
if ( mask > 128 )
reporter->CPPRuntimeError("bad IPv6 subnet prefix length: %d", int(mask));
}
return make_intrusive<SubNetVal>(a, mask);
}
// Helper function for reporting invalidation of interators.
static void check_iterators__CPP(bool invalid)
{
if ( invalid )
reporter->Warning("possible loop/iterator invalidation in compiled code");
}
// Template for aggregate assignments of the form "v1[v2] = v3".
template <typename T>
ValPtr assign_to_index__CPP(T v1, ValPtr v2, ValPtr v3)
{
bool iterators_invalidated = false;
auto err_msg = assign_to_index(std::move(v1), std::move(v2), v3, iterators_invalidated);
check_iterators__CPP(iterators_invalidated);
if ( err_msg )
reporter->CPPRuntimeError("%s", err_msg);
return v3;
}
ValPtr assign_to_index__CPP(TableValPtr v1, ValPtr v2, ValPtr v3)
{
return assign_to_index__CPP<TableValPtr>(v1, v2, v3);
}
ValPtr assign_to_index__CPP(VectorValPtr v1, ValPtr v2, ValPtr v3)
{
return assign_to_index__CPP<VectorValPtr>(v1, v2, v3);
}
ValPtr assign_to_index__CPP(StringValPtr v1, ValPtr v2, ValPtr v3)
{
return assign_to_index__CPP<StringValPtr>(v1, v2, v3);
}
void add_element__CPP(TableValPtr aggr, ListValPtr indices)
{
bool iterators_invalidated = false;
aggr->Assign(indices, nullptr, true, &iterators_invalidated);
check_iterators__CPP(iterators_invalidated);
}
void remove_element__CPP(TableValPtr aggr, ListValPtr indices)
{
bool iterators_invalidated = false;
aggr->Remove(*indices.get(), true, &iterators_invalidated);
check_iterators__CPP(iterators_invalidated);
}
// A helper function that takes a parallel vectors of attribute tags
// and values and returns a collective AttributesPtr corresponding to
// those instantiated attributes. For attributes that don't have
// associated expressions, the correspoinding value should be nil.
static AttributesPtr build_attrs__CPP(std::vector<int> attr_tags,
std::vector<ValPtr> attr_vals)
{
std::vector<AttrPtr> attrs;
int nattrs = attr_tags.size();
for ( auto i = 0; i < nattrs; ++i )
{
auto t_i = AttrTag(attr_tags[i]);
const auto& v_i = attr_vals[i];
ExprPtr e;
if ( v_i )
e = make_intrusive<ConstExpr>(v_i);
attrs.emplace_back(make_intrusive<Attr>(t_i, e));
}
return make_intrusive<Attributes>(std::move(attrs), nullptr, false, false);
}
TableValPtr set_constructor__CPP(std::vector<ValPtr> elements, TableTypePtr t,
std::vector<int> attr_tags,
std::vector<ValPtr> attr_vals)
{
auto attrs = build_attrs__CPP(std::move(attr_tags), std::move(attr_vals));
auto aggr = make_intrusive<TableVal>(std::move(t), std::move(attrs));
for ( const auto& elem : elements )
aggr->Assign(std::move(elem), nullptr);
return aggr;
}
TableValPtr table_constructor__CPP(std::vector<ValPtr> indices,
std::vector<ValPtr> vals, TableTypePtr t,
std::vector<int> attr_tags,
std::vector<ValPtr> attr_vals)
{
const auto& yt = t->Yield().get();
auto n = indices.size();
auto attrs = build_attrs__CPP(std::move(attr_tags), std::move(attr_vals));
auto aggr = make_intrusive<TableVal>(std::move(t), std::move(attrs));
for ( auto i = 0; i < n; ++i )
{
auto v = check_and_promote(vals[i], yt, true);
if ( v )
aggr->Assign(std::move(indices[i]), std::move(v));
}
return aggr;
}
RecordValPtr record_constructor__CPP(std::vector<ValPtr> vals, RecordTypePtr t)
{
auto rv = make_intrusive<RecordVal>(std::move(t));
auto n = vals.size();
rv->Reserve(n);
for ( auto i = 0; i < n; ++i )
rv->Assign(i, vals[i]);
return rv;
}
VectorValPtr vector_constructor__CPP(std::vector<ValPtr> vals, VectorTypePtr t)
{
auto vv = make_intrusive<VectorVal>(std::move(t));
auto n = vals.size();
for ( auto i = 0; i < n; ++i )
vv->Assign(i, vals[i]);
return vv;
}
ValPtr schedule__CPP(double dt, EventHandlerPtr event, std::vector<ValPtr> args)
{
if ( ! run_state::terminating )
timer_mgr->Add(new ScheduleTimer(event, std::move(args), dt));
return nullptr;
}
} // namespace zeek::detail

View file

@ -0,0 +1,198 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Run-time support for (non-vector) operations in C++-compiled scripts.
#pragma once
#include "zeek/Val.h"
#include "zeek/script_opt/CPP/Func.h"
namespace zeek {
using SubNetValPtr = IntrusivePtr<zeek::SubNetVal>;
namespace detail {
// Returns the concatenation of the given strings.
extern StringValPtr str_concat__CPP(const String* s1, const String* s2);
// Returns true if string "s2" is in string "s1".
extern bool str_in__CPP(const String* s1, const String* s2);
// Converts a vector of individual ValPtr's into a single ListValPtr
// suitable for indexing an aggregate.
extern ListValPtr index_val__CPP(std::vector<ValPtr> indices);
// Returns the value corresponding to indexing the given table/vector/string
// with the given set of indices. These are functions rather than something
// generated directly so that they can package up the error handling for
// the case where there's no such index.
extern ValPtr index_table__CPP(const TableValPtr& t, std::vector<ValPtr> indices);
extern ValPtr index_vec__CPP(const VectorValPtr& vec, int index);
extern ValPtr index_string__CPP(const StringValPtr& svp, std::vector<ValPtr> indices);
// Calls out to the given script or BiF function. A separate function because
// of the need to (1) construct the "args" vector using {} initializers,
// but (2) needing to have the address of that vector.
inline ValPtr invoke__CPP(Func* f, std::vector<ValPtr> args, Frame* frame)
{
return f->Invoke(&args, frame);
}
// Assigns the given value to the given global. A separate function because
// we also need to return the value, for use in assignment cascades.
inline ValPtr set_global__CPP(IDPtr g, ValPtr v)
{
g->SetVal(v);
return v;
}
// Assigns the given global to the given value, which corresponds to an
// event handler.
extern ValPtr set_event__CPP(IDPtr g, ValPtr v, EventHandlerPtr& gh);
// Convert (in terms of the Zeek language) the given value to the given type.
// A separate function in order to package up the error handling.
extern ValPtr cast_value_to_type__CPP(const ValPtr& v, const TypePtr& t);
// Returns the subnet corresponding to the given mask of the given address.
// A separate function in order to package up the error handling.
extern SubNetValPtr addr_mask__CPP(const IPAddr& a, uint32_t mask);
// Assigns the given field in the given record to the given value. A
// separate function to allow for assignment cascades.
inline ValPtr assign_field__CPP(RecordValPtr rec, int field, ValPtr v)
{
rec->Assign(field, v);
return v;
}
// Returns the given field in the given record. A separate function to
// support error handling.
inline ValPtr field_access__CPP(const RecordValPtr& rec, int field)
{
auto v = rec->GetFieldOrDefault(field);
if ( ! v )
reporter->CPPRuntimeError("field value missing");
return v;
}
// Each of the following executes the assignment "v1[v2] = v3" for
// tables/vectors/strings.
extern ValPtr assign_to_index__CPP(TableValPtr v1, ValPtr v2, ValPtr v3);
extern ValPtr assign_to_index__CPP(VectorValPtr v1, ValPtr v2, ValPtr v3);
extern ValPtr assign_to_index__CPP(StringValPtr v1, ValPtr v2, ValPtr v3);
// Executes an "add" statement for the given set.
extern void add_element__CPP(TableValPtr aggr, ListValPtr indices);
// Executes a "delete" statement for the given set.
extern void remove_element__CPP(TableValPtr aggr, ListValPtr indices);
// Returns the given table/set (which should be empty) coerced to
// the given Zeek type. A separate function in order to deal with
// error handling. Inlined because this gets invoked a lot.
inline TableValPtr table_coerce__CPP(const ValPtr& v, const TypePtr& t)
{
TableVal* tv = v->AsTableVal();
if ( tv->Size() > 0 )
reporter->CPPRuntimeError("coercion of non-empty table/set");
return make_intrusive<TableVal>(cast_intrusive<TableType>(t),
tv->GetAttrs());
}
// The same, for an empty record.
inline VectorValPtr vector_coerce__CPP(const ValPtr& v, const TypePtr& t)
{
VectorVal* vv = v->AsVectorVal();
if ( vv->Size() > 0 )
reporter->CPPRuntimeError("coercion of non-empty vector");
return make_intrusive<VectorVal>(cast_intrusive<VectorType>(t));
}
// Constructs a set of the given type, containing the given elements, and
// with the associated attributes.
extern TableValPtr set_constructor__CPP(std::vector<ValPtr> elements,
TableTypePtr t,
std::vector<int> attr_tags,
std::vector<ValPtr> attr_vals);
// Constructs a table of the given type, containing the given elements
// (specified as parallel index/value vectors), and with the associated
// attributes.
extern TableValPtr table_constructor__CPP(std::vector<ValPtr> indices,
std::vector<ValPtr> vals,
TableTypePtr t,
std::vector<int> attr_tags,
std::vector<ValPtr> attr_vals);
// Constructs a record of the given type, whose (ordered) fields are
// assigned to the corresponding elements of the given vector of values.
extern RecordValPtr record_constructor__CPP(std::vector<ValPtr> vals,
RecordTypePtr t);
// Constructs a vector of the given type, populated with the given values.
extern VectorValPtr vector_constructor__CPP(std::vector<ValPtr> vals,
VectorTypePtr t);
// Schedules an event to occur at the given absolute time, parameterized
// with the given set of values. A separate function to facilitate avoiding
// the scheduling if Zeek is terminating.
extern ValPtr schedule__CPP(double dt, EventHandlerPtr event,
std::vector<ValPtr> args);
// Simple helper functions for supporting absolute value.
inline bro_uint_t iabs__CPP(bro_int_t v)
{
return v < 0 ? -v : v;
}
inline double fabs__CPP(double v)
{
return v < 0.0 ? -v : v;
}
// The following operations are provided using functions to support
// error checking/reporting.
inline bro_int_t idiv__CPP(bro_int_t v1, bro_int_t v2)
{
if ( v2 == 0 )
reporter->CPPRuntimeError("division by zero");
return v1 / v2;
}
inline bro_int_t imod__CPP(bro_int_t v1, bro_int_t v2)
{
if ( v2 == 0 )
reporter->CPPRuntimeError("modulo by zero");
return v1 % v2;
}
inline bro_uint_t udiv__CPP(bro_uint_t v1, bro_uint_t v2)
{
if ( v2 == 0 )
reporter->CPPRuntimeError("division by zero");
return v1 / v2;
}
inline bro_uint_t umod__CPP(bro_uint_t v1, bro_uint_t v2)
{
if ( v2 == 0 )
reporter->CPPRuntimeError("modulo by zero");
return v1 % v2;
}
inline double fdiv__CPP(double v1, double v2)
{
if ( v2 == 0.0 )
reporter->CPPRuntimeError("division by zero");
return v1 / v2;
}
} // namespace zeek::detail
} // namespace zeek

View file

@ -0,0 +1,442 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/ZeekString.h"
#include "zeek/script_opt/CPP/RuntimeVec.h"
namespace zeek::detail {
// Helper function for ensuring that two vectors have matching sizes.
static bool check_vec_sizes__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
if ( v1->Size() == v2->Size() )
return true;
reporter->CPPRuntimeError("vector operands are of different sizes");
return false;
}
// Helper function that returns a VectorTypePtr apt for use with the
// the given yield type. We don't just use the yield type directly
// because here we're supporting low-level arithmetic operations
// (for example, adding one vector of "interval" to another), which
// we want to do using the low-level representations. We'll later
// convert the vector to the high-level representation if needed.
static VectorTypePtr base_vector_type__CPP(const VectorTypePtr& vt)
{
switch ( vt->Yield()->InternalType() ) {
case TYPE_INTERNAL_INT:
return make_intrusive<VectorType>(base_type(TYPE_INT));
case TYPE_INTERNAL_UNSIGNED:
return make_intrusive<VectorType>(base_type(TYPE_COUNT));
case TYPE_INTERNAL_DOUBLE:
return make_intrusive<VectorType>(base_type(TYPE_DOUBLE));
default:
return nullptr;
}
}
// The kernel used for unary vector operations.
#define VEC_OP1_KERNEL(accessor, type, op) \
for ( unsigned int i = 0; i < v->Size(); ++i ) \
{ \
auto v_i = v->ValAt(i)->accessor(); \
v_result->Assign(i, make_intrusive<type>(op v_i)); \
}
// A macro (since it's beyond my templating skillz to deal with the
// "op" operator) for unary vector operations, invoking the kernel
// per the underlying representation used by the vector. "double_kernel"
// is an optional kernel to use for vectors whose underlying type
// is "double". It needs to be optional because C++ will (rightfully)
// complain about applying certain C++ unary operations to doubles.
#define VEC_OP1(name, op, double_kernel) \
VectorValPtr vec_op_ ## name ## __CPP(const VectorValPtr& v) \
{ \
auto vt = base_vector_type__CPP(v->GetType<VectorType>()); \
auto v_result = make_intrusive<VectorVal>(vt); \
\
switch ( vt->Yield()->InternalType() ) { \
case TYPE_INTERNAL_INT: \
{ \
VEC_OP1_KERNEL(AsInt, IntVal, op) \
break; \
} \
\
case TYPE_INTERNAL_UNSIGNED: \
{ \
VEC_OP1_KERNEL(AsCount, CountVal, op) \
break; \
} \
\
double_kernel \
\
default: \
break; \
} \
\
return v_result; \
}
// Instantiates a double_kernel for a given operation.
#define VEC_OP1_WITH_DOUBLE(name, op) \
VEC_OP1(name, op, case TYPE_INTERNAL_DOUBLE: { VEC_OP1_KERNEL(AsDouble, DoubleVal, op) break; })
// The unary operations supported for vectors.
VEC_OP1_WITH_DOUBLE(pos, +)
VEC_OP1_WITH_DOUBLE(neg, -)
VEC_OP1(not, !,)
VEC_OP1(comp, ~,)
// A kernel for applying a binary operation element-by-element to two
// vectors of a given low-level type.
#define VEC_OP2_KERNEL(accessor, type, op) \
for ( unsigned int i = 0; i < v1->Size(); ++i ) \
{ \
auto v1_i = v1->ValAt(i)->accessor(); \
auto v2_i = v2->ValAt(i)->accessor(); \
v_result->Assign(i, make_intrusive<type>(v1_i op v2_i)); \
}
// Analogous to VEC_OP1, instantiates a function for a given binary operation,
// which might-or-might-not be supported for low-level "double" types.
// This version is for operations whose result type is the same as the
// operand type.
#define VEC_OP2(name, op, double_kernel) \
VectorValPtr vec_op_ ## name ## __CPP(const VectorValPtr& v1, const VectorValPtr& v2) \
{ \
if ( ! check_vec_sizes__CPP(v1, v2) ) \
return nullptr; \
\
auto vt = base_vector_type__CPP(v1->GetType<VectorType>()); \
auto v_result = make_intrusive<VectorVal>(vt); \
\
switch ( vt->Yield()->InternalType() ) { \
case TYPE_INTERNAL_INT: \
{ \
if ( vt->Yield()->Tag() == TYPE_BOOL ) \
VEC_OP2_KERNEL(AsBool, BoolVal, op) \
else \
VEC_OP2_KERNEL(AsInt, IntVal, op) \
break; \
} \
\
case TYPE_INTERNAL_UNSIGNED: \
{ \
VEC_OP2_KERNEL(AsCount, CountVal, op) \
break; \
} \
\
double_kernel \
\
default: \
break; \
} \
\
return v_result; \
}
// Instantiates a double_kernel for a binary operation.
#define VEC_OP2_WITH_DOUBLE(name, op) \
VEC_OP2(name, op, case TYPE_INTERNAL_DOUBLE: { VEC_OP2_KERNEL(AsDouble, DoubleVal, op) break; })
// The binary operations supported for vectors.
VEC_OP2_WITH_DOUBLE(add, +)
VEC_OP2_WITH_DOUBLE(sub, -)
VEC_OP2_WITH_DOUBLE(mul, *)
VEC_OP2_WITH_DOUBLE(div, /)
VEC_OP2(mod, %,)
VEC_OP2(and, &,)
VEC_OP2(or, |,)
VEC_OP2(xor, ^,)
VEC_OP2(andand, &&,)
VEC_OP2(oror, ||,)
// A version of VEC_OP2 that instead supports relational operations, so
// the result type is always vector-of-bool.
#define VEC_REL_OP(name, op) \
VectorValPtr vec_op_ ## name ## __CPP(const VectorValPtr& v1, const VectorValPtr& v2) \
{ \
if ( ! check_vec_sizes__CPP(v1, v2) ) \
return nullptr; \
\
auto vt = v1->GetType<VectorType>(); \
auto res_type = make_intrusive<VectorType>(base_type(TYPE_BOOL)); \
auto v_result = make_intrusive<VectorVal>(res_type); \
\
switch ( vt->Yield()->InternalType() ) { \
case TYPE_INTERNAL_INT: \
{ \
VEC_OP2_KERNEL(AsInt, BoolVal, op) \
break; \
} \
\
case TYPE_INTERNAL_UNSIGNED: \
{ \
VEC_OP2_KERNEL(AsCount, BoolVal, op) \
break; \
} \
\
case TYPE_INTERNAL_DOUBLE: \
{ \
VEC_OP2_KERNEL(AsDouble, BoolVal, op) \
break; \
} \
\
default: \
break; \
} \
\
return v_result; \
}
// The relational operations supported for vectors.
VEC_REL_OP(lt, <)
VEC_REL_OP(gt, >)
VEC_REL_OP(eq, ==)
VEC_REL_OP(ne, !=)
VEC_REL_OP(le, <=)
VEC_REL_OP(ge, >=)
VectorValPtr vec_op_add__CPP(VectorValPtr v, int incr)
{
const auto& yt = v->GetType()->Yield();
auto is_signed = yt->InternalType() == TYPE_INTERNAL_INT;
auto n = v->Size();
for ( unsigned int i = 0; i < n; ++i )
{
auto v_i = v->ValAt(i);
ValPtr new_v_i;
if ( is_signed )
new_v_i = val_mgr->Int(v_i->AsInt() + incr);
else
new_v_i = val_mgr->Count(v_i->AsCount() + incr);
v->Assign(i, new_v_i);
}
return v;
}
VectorValPtr vec_op_sub__CPP(VectorValPtr v, int i)
{
return vec_op_add__CPP(std::move(v), -i);
}
// This function provides the core functionality. The arguments
// are applied as though they appeared left-to-right in a statement
// "s1 + v2 + v3 + s4". For any invocation, v2 will always be
// non-nil, and one-and-only-one of s1, v3, or s4 will be non-nil.
static VectorValPtr str_vec_op_str_vec_add__CPP(const StringValPtr& s1,
const VectorValPtr& v2, const VectorValPtr& v3,
const StringValPtr& s4)
{
auto vt = v2->GetType<VectorType>();
auto v_result = make_intrusive<VectorVal>(vt);
auto n = v2->Size();
for ( unsigned int i = 0; i < n; ++i )
{
std::vector<const String*> strings;
auto v2_i = v2->ValAt(i);
if ( ! v2_i )
continue;
auto s2 = v2_i->AsString();
const String* s3 = nullptr;
if ( v3 )
{
auto v3_i = v3->ValAt(i);
if ( ! v3_i )
continue;
s3 = v3_i->AsString();
}
if ( s1 ) strings.push_back(s1->AsString());
strings.push_back(s2);
if ( s3 ) strings.push_back(s3);
if ( s4 ) strings.push_back(s4->AsString());
auto res = make_intrusive<StringVal>(concatenate(strings));
v_result->Assign(i, res);
}
return v_result;
}
VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
return str_vec_op_str_vec_add__CPP(nullptr, v1, v2, nullptr);
}
VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1, const StringValPtr& s2)
{
return str_vec_op_str_vec_add__CPP(nullptr, v1, nullptr, s2);
}
VectorValPtr str_vec_op_add__CPP(const StringValPtr& s1, const VectorValPtr& v2)
{
return str_vec_op_str_vec_add__CPP(s1, v2, nullptr, nullptr);
}
// Kernel for element-by-element string relationals. "rel1" and "rel2"
// codify which relational (</<=/==/!=/>=/>) we're aiming to support,
// in terms of how a Bstr_cmp() comparison should be assessed.
static VectorValPtr str_vec_op_kernel__CPP(const VectorValPtr& v1,
const VectorValPtr& v2,
int rel1, int rel2)
{
auto res_type = make_intrusive<VectorType>(base_type(TYPE_BOOL));
auto v_result = make_intrusive<VectorVal>(res_type);
auto n = v1->Size();
for ( unsigned int i = 0; i < n; ++i )
{
auto v1_i = v1->ValAt(i);
auto v2_i = v2->ValAt(i);
if ( ! v1_i || ! v2_i )
continue;
auto s1 = v1_i->AsString();
auto s2 = v2_i->AsString();
auto cmp = Bstr_cmp(s1, s2);
auto rel = (cmp == rel1) || (cmp == rel2);
v_result->Assign(i, val_mgr->Bool(rel));
}
return v_result;
}
VectorValPtr str_vec_op_lt__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
return str_vec_op_kernel__CPP(v1, v2, -1, -1);
}
VectorValPtr str_vec_op_le__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
return str_vec_op_kernel__CPP(v1, v2, -1, 0);
}
VectorValPtr str_vec_op_eq__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
return str_vec_op_kernel__CPP(v1, v2, 0, 0);
}
VectorValPtr str_vec_op_ne__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
return str_vec_op_kernel__CPP(v1, v2, -1, 1);
}
VectorValPtr str_vec_op_gt__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
return str_vec_op_kernel__CPP(v1, v2, 1, 1);
}
VectorValPtr str_vec_op_ge__CPP(const VectorValPtr& v1, const VectorValPtr& v2)
{
return str_vec_op_kernel__CPP(v1, v2, 0, 1);
}
VectorValPtr vector_select__CPP(const VectorValPtr& v1, VectorValPtr v2,
VectorValPtr v3)
{
auto vt = v2->GetType<VectorType>();
auto v_result = make_intrusive<VectorVal>(vt);
if ( ! check_vec_sizes__CPP(v1, v2) || ! check_vec_sizes__CPP(v1, v3) )
return nullptr;
auto n = v1->Size();
for ( unsigned int i = 0; i < n; ++i )
{
auto vr_i = v1->BoolAt(i) ? v2->ValAt(i) : v3->ValAt(i);
v_result->Assign(i, std::move(vr_i));
}
return v_result;
}
VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v, const TypePtr& targ)
{
auto res_t = cast_intrusive<VectorType>(targ);
auto v_result = make_intrusive<VectorVal>(std::move(res_t));
auto n = v->Size();
auto yt = targ->Yield();
auto ytag = yt->Tag();
for ( unsigned int i = 0; i < n; ++i )
{
ValPtr v_i = v->ValAt(i);
ValPtr r_i;
switch ( ytag ) {
case TYPE_BOOL:
r_i = val_mgr->Bool(v_i->AsBool());
break;
case TYPE_ENUM:
r_i = yt->AsEnumType()->GetEnumVal(v_i->AsInt());
break;
case TYPE_PORT:
r_i = make_intrusive<PortVal>(v_i->AsCount());
break;
case TYPE_INTERVAL:
r_i = make_intrusive<IntervalVal>(v_i->AsDouble());
break;
case TYPE_TIME:
r_i = make_intrusive<TimeVal>(v_i->AsDouble());
break;
default:
reporter->InternalError("bad vector type in vector_coerce_to__CPP");
}
v_result->Assign(i, std::move(r_i));
}
return v_result;
}
VectorValPtr vec_coerce_to_bro_int_t__CPP(const VectorValPtr& v, TypePtr targ)
{
auto res_t = cast_intrusive<VectorType>(targ);
auto v_result = make_intrusive<VectorVal>(std::move(res_t));
auto n = v->Size();
for ( unsigned int i = 0; i < n; ++i )
v_result->Assign(i, val_mgr->Int(v->IntAt(i)));
return v_result;
}
VectorValPtr vec_coerce_to_bro_uint_t__CPP(const VectorValPtr& v, TypePtr targ)
{
auto res_t = cast_intrusive<VectorType>(targ);
auto v_result = make_intrusive<VectorVal>(std::move(res_t));
auto n = v->Size();
for ( unsigned int i = 0; i < n; ++i )
v_result->Assign(i, val_mgr->Count(v->CountAt(i)));
return v_result;
}
VectorValPtr vec_coerce_to_double__CPP(const VectorValPtr& v, TypePtr targ)
{
auto res_t = cast_intrusive<VectorType>(targ);
auto v_result = make_intrusive<VectorVal>(std::move(res_t));
auto n = v->Size();
for ( unsigned int i = 0; i < n; ++i )
v_result->Assign(i, make_intrusive<DoubleVal>(v->DoubleAt(i)));
return v_result;
}
} // namespace zeek::detail

View file

@ -0,0 +1,96 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Run-time support for vector-oriented operations in C++-compiled scripts.
// The scope is unary (including appending), binary, and conditional
// operations. It does not include operations common to other aggregates,
// such as indexing and explicit coercion (but does include low-level
// coercion needed to support unary and binary operations).
#pragma once
#include "zeek/Val.h"
namespace zeek::detail {
// Appends v2 to the vector v1. A separate function because of the
// need to support assignment cascades.
inline ValPtr vector_append__CPP(VectorValPtr v1, ValPtr v2)
{
v1->Assign(v1->Size(), v2);
return v2;
}
// Unary vector operations.
extern VectorValPtr vec_op_pos__CPP(const VectorValPtr& v);
extern VectorValPtr vec_op_neg__CPP(const VectorValPtr& v);
extern VectorValPtr vec_op_not__CPP(const VectorValPtr& v);
extern VectorValPtr vec_op_comp__CPP(const VectorValPtr& v);
// Binary vector operations.
extern VectorValPtr vec_op_add__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_sub__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_mul__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_div__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_mod__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_and__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_or__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_xor__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_andand__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_oror__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
// Vector relational operations.
extern VectorValPtr vec_op_lt__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_gt__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_eq__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_ne__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_le__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
extern VectorValPtr vec_op_ge__CPP(const VectorValPtr& v1, const VectorValPtr& v2);
// The following are to support ++/-- operations on vectors ...
extern VectorValPtr vec_op_add__CPP(VectorValPtr v, int incr);
extern VectorValPtr vec_op_sub__CPP(VectorValPtr v, int i);
// ... and these for vector-plus-scalar and vector-plus-vector string
// operations.
extern VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1,
const VectorValPtr& v2);
extern VectorValPtr str_vec_op_add__CPP(const VectorValPtr& v1,
const StringValPtr& v2);
extern VectorValPtr str_vec_op_add__CPP(const StringValPtr& v1,
const VectorValPtr& v2);
// String vector relationals.
extern VectorValPtr str_vec_op_lt__CPP(const VectorValPtr& v1,
const VectorValPtr& v2);
extern VectorValPtr str_vec_op_le__CPP(const VectorValPtr& v1,
const VectorValPtr& v2);
extern VectorValPtr str_vec_op_eq__CPP(const VectorValPtr& v1,
const VectorValPtr& v2);
extern VectorValPtr str_vec_op_ne__CPP(const VectorValPtr& v1,
const VectorValPtr& v2);
extern VectorValPtr str_vec_op_gt__CPP(const VectorValPtr& v1,
const VectorValPtr& v2);
extern VectorValPtr str_vec_op_ge__CPP(const VectorValPtr& v1,
const VectorValPtr& v2);
// Support for vector conditional ('?:') expressions. Using the boolean
// vector v1 as a selector, returns a new vector populated with the
// elements selected out of v2 and v3.
extern VectorValPtr vector_select__CPP(const VectorValPtr& v1, VectorValPtr v2,
VectorValPtr v3);
// Returns a new vector reflecting the given vector coerced to the given
// type. Assumes v already has the correct internal type. This can go
// away after we finish migrating to ZVal's.
extern VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v,
const TypePtr& targ);
// Similar coercion, but works for v having perhaps not the correct type.
extern VectorValPtr vec_coerce_to_bro_int_t__CPP(const VectorValPtr& v,
TypePtr targ);
extern VectorValPtr vec_coerce_to_bro_uint_t__CPP(const VectorValPtr& v,
TypePtr targ);
extern VectorValPtr vec_coerce_to_double__CPP(const VectorValPtr& v,
TypePtr targ);
} // namespace zeek::detail

384
src/script_opt/CPP/Stmts.cc Normal file
View file

@ -0,0 +1,384 @@
// See the file "COPYING" in the main distribution directory for copyright.
// C++ compiler methods relating to generating code for Stmt's.
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
void CPPCompile::GenStmt(const Stmt* s)
{
switch ( s->Tag() ) {
case STMT_INIT:
GenInitStmt(s->AsInitStmt());
break;
case STMT_LIST:
{
// These always occur in contexts surrounded by {}'s,
// so no need to add them explicitly.
auto sl = s->AsStmtList();
const auto& stmts = sl->Stmts();
for ( const auto& stmt : stmts )
GenStmt(stmt);
}
break;
case STMT_EXPR:
if ( auto e = s->AsExprStmt()->StmtExpr() )
Emit("%s;", GenExpr(e, GEN_DONT_CARE, true));
break;
case STMT_IF:
GenIfStmt(s->AsIfStmt());
break;
case STMT_WHILE:
GenWhileStmt(s->AsWhileStmt());
break;
case STMT_NULL:
Emit(";");
break;
case STMT_RETURN:
GenReturnStmt(s->AsReturnStmt());
break;
case STMT_ADD:
GenAddStmt(static_cast<const ExprStmt*>(s));
break;
case STMT_DELETE:
GenDeleteStmt(static_cast<const ExprStmt*>(s));
break;
case STMT_EVENT:
GenEventStmt(static_cast<const EventStmt*>(s));
break;
case STMT_SWITCH:
GenSwitchStmt(static_cast<const SwitchStmt*>(s));
break;
case STMT_FOR:
GenForStmt(s->AsForStmt());
break;
case STMT_NEXT:
Emit("continue;");
break;
case STMT_BREAK:
if ( break_level > 0 )
Emit("break;");
else
Emit("return false;");
break;
case STMT_PRINT:
{
auto el = static_cast<const ExprListStmt*>(s)->ExprList();
Emit("do_print_stmt({%s});", GenExpr(el, GEN_VAL_PTR));
}
break;
case STMT_FALLTHROUGH:
break;
case STMT_WHEN:
ASSERT(0);
break;
default:
reporter->InternalError("bad statement type in CPPCompile::GenStmt");
}
}
void CPPCompile::GenInitStmt(const InitStmt* init)
{
auto inits = init->Inits();
for ( const auto& aggr : inits )
{
const auto& t = aggr->GetType();
if ( ! IsAggr(t->Tag()) )
continue;
auto type_name = IntrusiveVal(t);
auto type_type = TypeType(t);
auto type_ind = GenTypeName(t);
if ( locals.count(aggr.get()) == 0 )
{
// fprintf(stderr, "aggregate %s unused\n", obj_desc(aggr.get()).c_str());
continue;
}
Emit("%s = make_intrusive<%s>(cast_intrusive<%s>(%s));",
IDName(aggr), type_name,
type_type, type_ind);
}
}
void CPPCompile::GenIfStmt(const IfStmt* i)
{
auto cond = i->StmtExpr();
Emit("if ( %s )", GenExpr(cond, GEN_NATIVE));
StartBlock();
GenStmt(i->TrueBranch());
EndBlock();
const auto& fb = i->FalseBranch();
if ( fb->Tag() != STMT_NULL )
{
Emit("else");
StartBlock();
GenStmt(i->FalseBranch());
EndBlock();
}
}
void CPPCompile::GenWhileStmt(const WhileStmt* w)
{
Emit("while ( %s )",
GenExpr(w->Condition(), GEN_NATIVE));
StartBlock();
++break_level;
GenStmt(w->Body());
--break_level;
EndBlock();
}
void CPPCompile::GenReturnStmt(const ReturnStmt* r)
{
auto e = r->StmtExpr();
if ( ! ret_type || ! e || e->GetType()->Tag() == TYPE_VOID || in_hook )
{
if ( in_hook )
Emit("return true;");
else
Emit("return;");
return;
}
auto gt = ret_type->Tag() == TYPE_ANY ? GEN_VAL_PTR : GEN_NATIVE;
auto ret = GenExpr(e, gt);
if ( e->GetType()->Tag() == TYPE_ANY )
ret = GenericValPtrToGT(ret, ret_type, gt);
Emit("return %s;", ret);
}
void CPPCompile::GenAddStmt(const ExprStmt* es)
{
auto op = es->StmtExpr();
auto aggr = GenExpr(op->GetOp1(), GEN_DONT_CARE);
auto indices = op->GetOp2();
Emit("add_element__CPP(%s, index_val__CPP({%s}));",
aggr, GenExpr(indices, GEN_VAL_PTR));
}
void CPPCompile::GenDeleteStmt(const ExprStmt* es)
{
auto op = es->StmtExpr();
auto aggr = op->GetOp1();
auto aggr_gen = GenExpr(aggr, GEN_VAL_PTR);
if ( op->Tag() == EXPR_INDEX )
{
auto indices = op->GetOp2();
Emit("remove_element__CPP(%s, index_val__CPP({%s}));",
aggr_gen, GenExpr(indices, GEN_VAL_PTR));
}
else
{
ASSERT(op->Tag() == EXPR_FIELD);
auto field = GenField(aggr, op->AsFieldExpr()->Field());
Emit("%s->Remove(%s);", aggr_gen, field);
}
}
void CPPCompile::GenEventStmt(const EventStmt* ev)
{
auto ev_s = ev->StmtExprPtr();
auto ev_e = cast_intrusive<EventExpr>(ev_s);
auto ev_n = ev_e->Name();
RegisterEvent(ev_n);
if ( ev_e->Args()->Exprs().length() > 0 )
Emit("event_mgr.Enqueue(%s_ev, %s);",
globals[std::string(ev_n)],
GenExpr(ev_e->Args(), GEN_VAL_PTR));
else
Emit("event_mgr.Enqueue(%s_ev, Args{});",
globals[std::string(ev_n)]);
}
void CPPCompile::GenSwitchStmt(const SwitchStmt* sw)
{
auto e = sw->StmtExpr();
auto cases = sw->Cases();
auto e_it = e->GetType()->InternalType();
bool is_int = e_it == TYPE_INTERNAL_INT;
bool is_uint = e_it == TYPE_INTERNAL_UNSIGNED;
bool organic = is_int || is_uint;
std::string sw_val;
if ( organic )
sw_val = GenExpr(e, GEN_NATIVE);
else
sw_val = std::string("p_hash(") + GenExpr(e, GEN_VAL_PTR) + ")";
Emit("switch ( %s ) {", sw_val.c_str());
++break_level;
for ( const auto& c : *cases )
{
if ( c->ExprCases() )
{
const auto& c_e_s =
c->ExprCases()->AsListExpr()->Exprs();
for ( const auto& c_e : c_e_s )
{
auto c_v = c_e->Eval(nullptr);
ASSERT(c_v);
std::string c_v_rep;
if ( is_int )
c_v_rep = Fmt(int(c_v->AsInt()));
else if ( is_uint )
c_v_rep = Fmt(c_v->AsCount());
else
c_v_rep = Fmt(p_hash(c_v));
Emit("case %s:", c_v_rep);
}
}
else
Emit("default:");
StartBlock();
GenStmt(c->Body());
EndBlock();
}
--break_level;
Emit("}");
}
void CPPCompile::GenForStmt(const ForStmt* f)
{
Emit("{ // begin a new scope for the internal loop vars");
++break_level;
auto v = f->StmtExprPtr();
auto t = v->GetType()->Tag();
auto loop_vars = f->LoopVars();
if ( t == TYPE_TABLE )
GenForOverTable(v, f->ValueVar(), loop_vars);
else if ( t == TYPE_VECTOR )
GenForOverVector(v, loop_vars);
else if ( t == TYPE_STRING )
GenForOverString(v, loop_vars);
else
reporter->InternalError("bad for statement in CPPCompile::GenStmt");
GenStmt(f->LoopBody());
EndBlock();
if ( t == TYPE_TABLE )
EndBlock();
--break_level;
Emit("} // end of for scope");
}
void CPPCompile::GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var,
const IDPList* loop_vars)
{
Emit("auto tv__CPP = %s;", GenExpr(tbl, GEN_DONT_CARE));
Emit("const PDict<TableEntryVal>* loop_vals__CPP = tv__CPP->AsTable();");
Emit("if ( loop_vals__CPP->Length() > 0 )");
StartBlock();
Emit("for ( const auto& lve__CPP : *loop_vals__CPP )");
StartBlock();
Emit("auto k__CPP = lve__CPP.GetHashKey();");
Emit("auto* current_tev__CPP = lve__CPP.GetValue<TableEntryVal*>();");
Emit("auto ind_lv__CPP = tv__CPP->RecreateIndex(*k__CPP);");
if ( value_var )
Emit("%s = %s;",
IDName(value_var),
GenericValPtrToGT("current_tev__CPP->GetVal()",
value_var->GetType(),
GEN_NATIVE));
for ( int i = 0; i < loop_vars->length(); ++i )
{
auto var = (*loop_vars)[i];
const auto& v_t = var->GetType();
auto acc = NativeAccessor(v_t);
if ( IsNativeType(v_t) )
Emit("%s = ind_lv__CPP->Idx(%s)%s;",
IDName(var), Fmt(i), acc);
else
Emit("%s = {NewRef{}, ind_lv__CPP->Idx(%s)%s};",
IDName(var), Fmt(i), acc);
}
}
void CPPCompile::GenForOverVector(const ExprPtr& vec, const IDPList* loop_vars)
{
Emit("auto vv__CPP = %s;", GenExpr(vec, GEN_DONT_CARE));
Emit("for ( auto i__CPP = 0u; i__CPP < vv__CPP->Size(); ++i__CPP )");
StartBlock();
Emit("if ( ! vv__CPP->Has(i__CPP) ) continue;");
Emit("%s = i__CPP;", IDName((*loop_vars)[0]));
}
void CPPCompile::GenForOverString(const ExprPtr& str, const IDPList* loop_vars)
{
Emit("auto sval__CPP = %s;", GenExpr(str, GEN_DONT_CARE));
Emit("for ( auto i__CPP = 0u; i__CPP < sval__CPP->Len(); ++i__CPP )");
StartBlock();
Emit("auto sv__CPP = make_intrusive<StringVal>(1, (const char*) sval__CPP->Bytes() + i__CPP);");
Emit("%s = std::move(sv__CPP);", IDName((*loop_vars)[0]));
}
} // zeek::detail

View file

@ -0,0 +1,91 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/Desc.h"
#include "zeek/script_opt/CPP/Tracker.h"
#include "zeek/script_opt/CPP/Util.h"
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek::detail {
template<class T>
void CPPTracker<T>::AddKey(IntrusivePtr<T> key, p_hash_type h)
{
if ( HasKey(key) )
return;
if ( h == 0 )
h = Hash(key);
if ( map2.count(h) == 0 )
{
int index;
if ( mapper && mapper->count(h) > 0 )
{
const auto& pair = (*mapper)[h];
index = pair.index;
scope2[h] = Fmt(pair.scope);
inherited.insert(h);
}
else
{
index = num_non_inherited++;
keys.push_back(key);
}
map2[h] = index;
reps[h] = key.get();
}
ASSERT(h != 0); // check for hash botches
map[key.get()] = h;
}
template<class T>
std::string CPPTracker<T>::KeyName(const T* key)
{
ASSERT(HasKey(key));
auto hash = map[key];
ASSERT(hash != 0);
auto index = map2[hash];
std::string scope;
if ( IsInherited(hash) )
scope = scope_prefix(scope2[hash]);
return scope + std::string(base_name) + "_" + Fmt(index) + "__CPP";
}
template<class T>
void CPPTracker<T>::LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file)
{
if ( IsInherited(key) )
return;
auto hash = map[key.get()];
auto index = map2[hash];
fprintf(log_file, "hash\n%llu %d %d\n", hash, index, scope);
}
template<class T>
p_hash_type CPPTracker<T>::Hash(IntrusivePtr<T> key) const
{
ODesc d;
d.SetDeterminism(true);
key->Describe(&d);
std::string desc = d.Description();
auto h = std::hash<std::string>{}(base_name + desc);
return p_hash_type(h);
}
// Instantiate the templates we'll need.
template class CPPTracker<Type>;
template class CPPTracker<Attributes>;
template class CPPTracker<Expr>;
} // zeek::detail

View file

@ -0,0 +1,97 @@
// See the file "COPYING" in the main distribution directory for copyright.
// C++ compiler helper class that tracks distinct instances of a given key,
// where the key can have any IntrusivePtr type. The properties of a
// tracker are that it (1) supports a notion that two technically distinct
// keys in fact reflect the same underlying object, (2) provides an
// instance of such keys to consistently serve as their "representative",
// (3) provides names (suitable for use as C++ variables) for representative
// keys, and (4) has a notion of "inheritance" (the underlying object is
// already available from a previously generated namespace).
//
// Notions of "same" are taken from hash values ala those provided by
// ProfileFunc.
#pragma once
#include "zeek/script_opt/CPP/HashMgr.h"
namespace zeek::detail {
// T is a type that has an IntrusivePtr instantiation.
template <class T>
class CPPTracker {
public:
// The base name is used to construct key names. The mapper,
// if present, maps hash values to information about the previously
// generated scope in which the value appears.
CPPTracker(const char* _base_name, VarMapper* _mapper = nullptr)
: base_name(_base_name), mapper(_mapper)
{
}
// True if the given key has already been entered.
bool HasKey(const T* key) const { return map.count(key) > 0; }
bool HasKey(IntrusivePtr<T> key) const { return HasKey(key.get()); }
// Only adds the key if it's not already present. If a hash
// is provided, then refrains from computing it.
void AddKey(IntrusivePtr<T> key, p_hash_type h = 0);
// Returns the (C++ variable) name associated with the given key.
std::string KeyName(const T* key);
std::string KeyName(IntrusivePtr<T> key)
{ return KeyName(key.get()); }
// Returns all of the distinct keys entered into the tracker.
// A key is "distinct" if it's both (1) a representative and
// (2) not inherited.
const std::vector<IntrusivePtr<T>>& DistinctKeys() const
{ return keys; }
// For a given key, get its representative.
const T* GetRep(const T* key)
{ ASSERT(HasKey(key)); return reps[map[key]]; }
const T* GetRep(IntrusivePtr<T> key) { return GetRep(key.get()); }
// True if the given key is represented by an inherited value.
bool IsInherited(const T* key)
{ ASSERT(HasKey(key)); return IsInherited(map[key]); }
bool IsInherited(const IntrusivePtr<T>& key)
{ ASSERT(HasKey(key)); return IsInherited(map[key.get()]); }
bool IsInherited(p_hash_type h) { return inherited.count(h) > 0; }
// If the given key is not inherited, logs it and its associated
// scope to the given file.
void LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file);
private:
// Compute a hash for the given key.
p_hash_type Hash(IntrusivePtr<T> key) const;
// Maps keys to internal representations (i.e., hashes).
std::unordered_map<const T*, p_hash_type> map;
// Maps internal representations to distinct values. These
// may-or-may-not be indices into an "inherited" namespace scope.
std::unordered_map<p_hash_type, int> map2;
std::unordered_map<p_hash_type, std::string> scope2; // only if inherited
std::unordered_set<p_hash_type> inherited; // which are inherited
int num_non_inherited = 0; // distinct non-inherited map2 entries
// Tracks the set of distinct keys, to facilitate iterating over them.
// Each such key also has an entry in map2.
std::vector<IntrusivePtr<T>> keys;
// Maps internal representations back to keys.
std::unordered_map<p_hash_type, const T*> reps;
// Used to construct key names.
std::string base_name;
// If non-nil, the mapper to consult for previous names.
VarMapper* mapper;
};
} // zeek::detail

570
src/script_opt/CPP/Types.cc Normal file
View file

@ -0,0 +1,570 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
bool CPPCompile::IsNativeType(const TypePtr& t) const
{
if ( ! t )
return true;
switch ( t->Tag() ) {
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ENUM:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PORT:
case TYPE_TIME:
case TYPE_VOID:
return true;
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_FILE:
case TYPE_FUNC:
case TYPE_OPAQUE:
case TYPE_PATTERN:
case TYPE_RECORD:
case TYPE_STRING:
case TYPE_SUBNET:
case TYPE_TABLE:
case TYPE_TYPE:
case TYPE_VECTOR:
return false;
case TYPE_LIST:
// These occur when initializing tables.
return false;
default:
reporter->InternalError("bad type in CPPCompile::IsNativeType");
}
}
std::string CPPCompile::NativeToGT(const std::string& expr, const TypePtr& t,
GenType gt)
{
if ( gt == GEN_DONT_CARE )
return expr;
if ( gt == GEN_NATIVE || ! IsNativeType(t) )
return expr;
// Need to convert to a ValPtr.
switch ( t->Tag() ) {
case TYPE_VOID:
return expr;
case TYPE_BOOL:
return std::string("val_mgr->Bool(") + expr + ")";
case TYPE_INT:
return std::string("val_mgr->Int(") + expr + ")";
case TYPE_COUNT:
return std::string("val_mgr->Count(") + expr + ")";
case TYPE_PORT:
return std::string("val_mgr->Port(") + expr + ")";
case TYPE_ENUM:
return std::string("make_enum__CPP(") + GenTypeName(t) + ", " +
expr + ")";
default:
return std::string("make_intrusive<") + IntrusiveVal(t) +
">(" + expr + ")";
}
}
std::string CPPCompile::GenericValPtrToGT(const std::string& expr,
const TypePtr& t, GenType gt)
{
if ( gt != GEN_VAL_PTR && IsNativeType(t) )
return expr + NativeAccessor(t);
else
return std::string("cast_intrusive<") + IntrusiveVal(t) +
">(" + expr + ")";
}
void CPPCompile::ExpandTypeVar(const TypePtr& t)
{
auto tn = GenTypeName(t);
switch ( t->Tag() ) {
case TYPE_LIST:
ExpandListTypeVar(t, tn);
break;
case TYPE_RECORD:
ExpandRecordTypeVar(t, tn);
break;
case TYPE_ENUM:
ExpandEnumTypeVar(t, tn);
break;
case TYPE_TABLE:
ExpandTableTypeVar(t, tn);
break;
case TYPE_FUNC:
ExpandFuncTypeVar(t, tn);
break;
case TYPE_TYPE:
AddInit(t, tn, std::string("make_intrusive<TypeType>(") +
GenTypeName(t->AsTypeType()->GetType()) + ")");
break;
case TYPE_VECTOR:
AddInit(t, tn, std::string("make_intrusive<VectorType>(") +
GenTypeName(t->AsVectorType()->Yield()) + ")");
break;
default:
break;
}
auto& script_type_name = t->GetName();
if ( script_type_name.size() > 0 )
AddInit(t, tn + "->SetName(\"" + script_type_name + "\");");
AddInit(t);
}
void CPPCompile::ExpandListTypeVar(const TypePtr& t, std::string& tn)
{
auto tl = t->AsTypeList()->GetTypes();
auto t_name = tn + "->AsTypeList()";
for ( auto i = 0; i < tl.size(); ++i )
AddInit(t, t_name + "->Append(" +
GenTypeName(tl[i]) + ");");
}
void CPPCompile::ExpandRecordTypeVar(const TypePtr& t, std::string& tn)
{
auto r = t->AsRecordType()->Types();
auto t_name = tn + "->AsRecordType()";
AddInit(t, std::string("if ( ") + t_name + "->NumFields() == 0 )");
AddInit(t, "{");
AddInit(t, "type_decl_list tl;");
for ( auto i = 0; i < r->length(); ++i )
{
const auto& td = (*r)[i];
AddInit(t, GenTypeDecl(td));
}
AddInit(t, t_name + "->AddFieldsDirectly(tl);");
AddInit(t, "}");
}
void CPPCompile::ExpandEnumTypeVar(const TypePtr& t, std::string& tn)
{
auto e_name = tn + "->AsEnumType()";
auto et = t->AsEnumType();
auto names = et->Names();
AddInit(t, "{ auto et = " + e_name + ";");
AddInit(t, "if ( et->Names().size() == 0 ) {");
for ( const auto& name_pair : et->Names() )
AddInit(t, std::string("\tet->AddNameInternal(\"") +
name_pair.first + "\", " +
Fmt(int(name_pair.second)) + ");");
AddInit(t, "}}");
}
void CPPCompile::ExpandTableTypeVar(const TypePtr& t, std::string& tn)
{
auto tbl = t->AsTableType();
const auto& indices = tbl->GetIndices();
const auto& yield = tbl->Yield();
if ( tbl->IsSet() )
AddInit(t, tn,
std::string("make_intrusive<SetType>(cast_intrusive<TypeList>(") +
GenTypeName(indices) + " ), nullptr)");
else
AddInit(t, tn,
std::string("make_intrusive<TableType>(cast_intrusive<TypeList>(") +
GenTypeName(indices) + "), " +
GenTypeName(yield) + ")");
}
void CPPCompile::ExpandFuncTypeVar(const TypePtr& t, std::string& tn)
{
auto f = t->AsFuncType();
auto args_type_accessor = GenTypeName(f->Params());
auto yt = f->Yield();
std::string yield_type_accessor;
if ( yt )
yield_type_accessor += GenTypeName(yt);
else
yield_type_accessor += "nullptr";
auto fl = f->Flavor();
std::string fl_name;
if ( fl == FUNC_FLAVOR_FUNCTION )
fl_name = "FUNC_FLAVOR_FUNCTION";
else if ( fl == FUNC_FLAVOR_EVENT )
fl_name = "FUNC_FLAVOR_EVENT";
else if ( fl == FUNC_FLAVOR_HOOK )
fl_name = "FUNC_FLAVOR_HOOK";
auto type_init = std::string("make_intrusive<FuncType>(cast_intrusive<RecordType>(") +
args_type_accessor + "), " +
yield_type_accessor + ", " + fl_name + ")";
AddInit(t, tn, type_init);
}
std::string CPPCompile::GenTypeDecl(const TypeDecl* td)
{
auto type_accessor = GenTypeName(td->type);
auto td_name = std::string("util::copy_string(\"") + td->id + "\")";
if ( td->attrs )
return std::string("tl.append(new TypeDecl(") +
td_name + ", " + type_accessor +
", " + AttrsName(td->attrs) +"));";
return std::string("tl.append(new TypeDecl(") + td_name + ", " +
type_accessor +"));";
}
std::string CPPCompile::GenTypeName(const Type* t)
{
return types.KeyName(TypeRep(t));
}
const char* CPPCompile::TypeTagName(TypeTag tag) const
{
switch ( tag ) {
case TYPE_ADDR: return "TYPE_ADDR";
case TYPE_ANY: return "TYPE_ANY";
case TYPE_BOOL: return "TYPE_BOOL";
case TYPE_COUNT: return "TYPE_COUNT";
case TYPE_DOUBLE: return "TYPE_DOUBLE";
case TYPE_ENUM: return "TYPE_ENUM";
case TYPE_ERROR: return "TYPE_ERROR";
case TYPE_FILE: return "TYPE_FILE";
case TYPE_FUNC: return "TYPE_FUNC";
case TYPE_INT: return "TYPE_INT";
case TYPE_INTERVAL: return "TYPE_INTERVAL";
case TYPE_OPAQUE: return "TYPE_OPAQUE";
case TYPE_PATTERN: return "TYPE_PATTERN";
case TYPE_PORT: return "TYPE_PORT";
case TYPE_RECORD: return "TYPE_RECORD";
case TYPE_STRING: return "TYPE_STRING";
case TYPE_SUBNET: return "TYPE_SUBNET";
case TYPE_TABLE: return "TYPE_TABLE";
case TYPE_TIME: return "TYPE_TIME";
case TYPE_TIMER: return "TYPE_TIMER";
case TYPE_TYPE: return "TYPE_TYPE";
case TYPE_VECTOR: return "TYPE_VECTOR";
case TYPE_VOID: return "TYPE_VOID";
default:
reporter->InternalError("bad type in CPPCompile::TypeTagName");
}
}
const char* CPPCompile::TypeName(const TypePtr& t)
{
switch ( t->Tag() ) {
case TYPE_BOOL: return "bool";
case TYPE_COUNT: return "bro_uint_t";
case TYPE_DOUBLE: return "double";
case TYPE_ENUM: return "int";
case TYPE_INT: return "bro_int_t";
case TYPE_INTERVAL: return "double";
case TYPE_PORT: return "bro_uint_t";
case TYPE_TIME: return "double";
case TYPE_VOID: return "void";
case TYPE_ADDR: return "AddrVal";
case TYPE_ANY: return "Val";
case TYPE_FILE: return "FileVal";
case TYPE_FUNC: return "FuncVal";
case TYPE_OPAQUE: return "OpaqueVal";
case TYPE_PATTERN: return "PatternVal";
case TYPE_RECORD: return "RecordVal";
case TYPE_STRING: return "StringVal";
case TYPE_SUBNET: return "SubNetVal";
case TYPE_TABLE: return "TableVal";
case TYPE_TYPE: return "TypeVal";
case TYPE_VECTOR: return "VectorVal";
default:
reporter->InternalError("bad type in CPPCompile::TypeName");
}
}
const char* CPPCompile::FullTypeName(const TypePtr& t)
{
if ( ! t )
return "void";
switch ( t->Tag() ) {
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ENUM:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PORT:
case TYPE_TIME:
case TYPE_VOID:
return TypeName(t);
case TYPE_ADDR: return "AddrValPtr";
case TYPE_ANY: return "ValPtr";
case TYPE_FILE: return "FileValPtr";
case TYPE_FUNC: return "FuncValPtr";
case TYPE_OPAQUE: return "OpaqueValPtr";
case TYPE_PATTERN: return "PatternValPtr";
case TYPE_RECORD: return "RecordValPtr";
case TYPE_STRING: return "StringValPtr";
case TYPE_SUBNET: return "SubNetValPtr";
case TYPE_TABLE: return "TableValPtr";
case TYPE_TYPE: return "TypeValPtr";
case TYPE_VECTOR: return "VectorValPtr";
default:
reporter->InternalError("bad type in CPPCompile::FullTypeName");
}
}
const char* CPPCompile::TypeType(const TypePtr& t)
{
switch ( t->Tag() ) {
case TYPE_RECORD: return "RecordType";
case TYPE_TABLE: return "TableType";
case TYPE_VECTOR: return "VectorType";
default:
reporter->InternalError("bad type in CPPCompile::TypeType");
}
}
void CPPCompile::RegisterType(const TypePtr& tp)
{
auto t = TypeRep(tp);
if ( processed_types.count(t) > 0 )
return;
// Add the type before going further, to avoid loops due to types
// that reference each other.
processed_types.insert(t);
switch ( t->Tag() ) {
case TYPE_ADDR:
case TYPE_ANY:
case TYPE_BOOL:
case TYPE_COUNT:
case TYPE_DOUBLE:
case TYPE_ENUM:
case TYPE_ERROR:
case TYPE_INT:
case TYPE_INTERVAL:
case TYPE_PATTERN:
case TYPE_PORT:
case TYPE_STRING:
case TYPE_TIME:
case TYPE_TIMER:
case TYPE_VOID:
case TYPE_OPAQUE:
case TYPE_SUBNET:
case TYPE_FILE:
// Nothing to do.
break;
case TYPE_TYPE:
{
const auto& tt = t->AsTypeType()->GetType();
NoteNonRecordInitDependency(t, tt);
RegisterType(tt);
}
break;
case TYPE_VECTOR:
{
const auto& yield = t->AsVectorType()->Yield();
NoteNonRecordInitDependency(t, yield);
RegisterType(yield);
}
break;
case TYPE_LIST:
RegisterListType(tp);
break;
case TYPE_TABLE:
RegisterTableType(tp);
break;
case TYPE_RECORD:
RegisterRecordType(tp);
break;
case TYPE_FUNC:
RegisterFuncType(tp);
break;
default:
reporter->InternalError("bad type in CPPCompile::RegisterType");
}
AddInit(t);
if ( ! types.IsInherited(t) )
{
auto t_rep = types.GetRep(t);
if ( t_rep == t )
GenPreInit(t);
else
NoteInitDependency(t, t_rep);
}
}
void CPPCompile::RegisterListType(const TypePtr& t)
{
auto tl = t->AsTypeList()->GetTypes();
for ( auto i = 0; i < tl.size(); ++i )
{
NoteNonRecordInitDependency(t, tl[i]);
RegisterType(tl[i]);
}
}
void CPPCompile::RegisterTableType(const TypePtr& t)
{
auto tbl = t->AsTableType();
const auto& indices = tbl->GetIndices();
const auto& yield = tbl->Yield();
NoteNonRecordInitDependency(t, indices);
RegisterType(indices);
if ( yield )
{
NoteNonRecordInitDependency(t, yield);
RegisterType(yield);
}
}
void CPPCompile::RegisterRecordType(const TypePtr& t)
{
auto r = t->AsRecordType()->Types();
for ( auto i = 0; i < r->length(); ++i )
{
const auto& r_i = (*r)[i];
NoteNonRecordInitDependency(t, r_i->type);
RegisterType(r_i->type);
if ( r_i->attrs )
{
NoteInitDependency(t, r_i->attrs);
RegisterAttributes(r_i->attrs);
}
}
}
void CPPCompile::RegisterFuncType(const TypePtr& t)
{
auto f = t->AsFuncType();
NoteInitDependency(t, TypeRep(f->Params()));
RegisterType(f->Params());
if ( f->Yield() )
{
NoteNonRecordInitDependency(t, f->Yield());
RegisterType(f->Yield());
}
}
const char* CPPCompile::NativeAccessor(const TypePtr& t)
{
switch ( t->Tag() ) {
case TYPE_BOOL: return "->AsBool()";
case TYPE_COUNT: return "->AsCount()";
case TYPE_DOUBLE: return "->AsDouble()";
case TYPE_ENUM: return "->AsEnum()";
case TYPE_INT: return "->AsInt()";
case TYPE_INTERVAL: return "->AsDouble()";
case TYPE_PORT: return "->AsCount()";
case TYPE_TIME: return "->AsDouble()";
case TYPE_ADDR: return "->AsAddrVal()";
case TYPE_FILE: return "->AsFileVal()";
case TYPE_FUNC: return "->AsFuncVal()";
case TYPE_OPAQUE: return "->AsOpaqueVal()";
case TYPE_PATTERN: return "->AsPatternVal()";
case TYPE_RECORD: return "->AsRecordVal()";
case TYPE_STRING: return "->AsStringVal()";
case TYPE_SUBNET: return "->AsSubNetVal()";
case TYPE_TABLE: return "->AsTableVal()";
case TYPE_TYPE: return "->AsTypeVal()";
case TYPE_VECTOR: return "->AsVectorVal()";
case TYPE_ANY: return ".get()";
case TYPE_VOID: return "";
default:
reporter->InternalError("bad type in CPPCompile::NativeAccessor");
}
}
const char* CPPCompile::IntrusiveVal(const TypePtr& t)
{
switch ( t->Tag() ) {
case TYPE_BOOL: return "BoolVal";
case TYPE_COUNT: return "CountVal";
case TYPE_DOUBLE: return "DoubleVal";
case TYPE_ENUM: return "EnumVal";
case TYPE_INT: return "IntVal";
case TYPE_INTERVAL: return "IntervalVal";
case TYPE_PORT: return "PortVal";
case TYPE_TIME: return "TimeVal";
case TYPE_ADDR: return "AddrVal";
case TYPE_ANY: return "Val";
case TYPE_FILE: return "FileVal";
case TYPE_FUNC: return "FuncVal";
case TYPE_OPAQUE: return "OpaqueVal";
case TYPE_PATTERN: return "PatternVal";
case TYPE_RECORD: return "RecordVal";
case TYPE_STRING: return "StringVal";
case TYPE_SUBNET: return "SubNetVal";
case TYPE_TABLE: return "TableVal";
case TYPE_TYPE: return "TypeVal";
case TYPE_VECTOR: return "VectorVal";
default:
reporter->InternalError("bad type in CPPCompile::IntrusiveVal");
}
}
} // zeek::detail

View file

@ -0,0 +1,67 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <errno.h>
#include <unistd.h>
#include <sys/file.h>
#include "zeek/script_opt/CPP/Util.h"
namespace zeek::detail {
std::string Fmt(double d)
{
// Special hack to preserve the signed-ness of the magic -0.0.
if ( d == 0.0 && signbit(d) )
return "-0.0";
// Unfortunately, to_string(double) is hardwired to use %f with
// default of 6 digits precision.
char buf[8192];
snprintf(buf, sizeof buf, "%.17g", d);
return buf;
}
std::string scope_prefix(const std::string& scope)
{
return std::string("zeek::detail::CPP_") + scope + "::";
}
std::string scope_prefix(int scope)
{
return scope_prefix(std::to_string(scope));
}
bool is_CPP_compilable(const ProfileFunc* pf)
{
if ( pf->NumWhenStmts() > 0 )
return false;
if ( pf->TypeSwitches().size() > 0 )
return false;
return true;
}
void lock_file(const std::string& fname, FILE* f)
{
if ( flock(fileno(f), LOCK_EX) < 0 )
{
char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("flock failed on %s: %s", fname.c_str(), buf);
exit(1);
}
}
void unlock_file(const std::string& fname, FILE* f)
{
if ( flock(fileno(f), LOCK_UN) < 0 )
{
char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("un-flock failed on %s: %s", fname.c_str(), buf);
exit(1);
}
}
} // zeek::detail

30
src/script_opt/CPP/Util.h Normal file
View file

@ -0,0 +1,30 @@
// See the file "COPYING" in the main distribution directory for copyright.
// Utility functions for compile-to-C++ compiler.
#pragma once
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek::detail {
// Conversions to strings.
inline std::string Fmt(int i) { return std::to_string(i); }
inline std::string Fmt(p_hash_type u) { return std::to_string(u) + "ULL"; }
extern std::string Fmt(double d);
// Returns the prefix for the scoping used by the compiler.
extern std::string scope_prefix(const std::string& scope);
// Same, but for scopes identified with numbers.
extern std::string scope_prefix(int scope);
// True if the given function is compilable to C++.
extern bool is_CPP_compilable(const ProfileFunc* pf);
// Helper utilities for file locking, to ensure that hash files
// don't receive conflicting writes due to concurrent compilations.
extern void lock_file(const std::string& fname, FILE* f);
extern void unlock_file(const std::string& fname, FILE* f);
} // zeek::detail

249
src/script_opt/CPP/Vars.cc Normal file
View file

@ -0,0 +1,249 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <errno.h>
#include <unistd.h>
#include <sys/stat.h>
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/CPP/Compile.h"
namespace zeek::detail {
bool CPPCompile::CheckForCollisions()
{
for ( auto& g : pfs.AllGlobals() )
{
auto gn = std::string(g->Name());
if ( hm.HasGlobal(gn) )
{
// Make sure the previous compilation used the
// same type and initialization value for the global.
auto ht_orig = hm.GlobalTypeHash(gn);
auto hv_orig = hm.GlobalValHash(gn);
auto ht = pfs.HashType(g->GetType());
p_hash_type hv = 0;
if ( g->GetVal() )
hv = p_hash(g->GetVal());
if ( ht != ht_orig || hv != hv_orig )
{
fprintf(stderr, "%s: hash clash for global %s (%llu/%llu vs. %llu/%llu)\n",
working_dir.c_str(), gn.c_str(),
ht, hv, ht_orig, hv_orig);
fprintf(stderr, "val: %s\n", g->GetVal() ? obj_desc(g->GetVal().get()).c_str() : "<none>");
return true;
}
}
}
for ( auto& t : pfs.RepTypes() )
{
auto tag = t->Tag();
if ( tag != TYPE_ENUM && tag != TYPE_RECORD )
// Other types, if inconsistent, will just not reuse
// the previously compiled version of the type.
continue;
// We identify enum's and record's by name. Make sure that
// the name either (1) wasn't previously used, or (2) if it
// was, it was likewise for an enum or a record.
const auto& tn = t->GetName();
if ( tn.size() == 0 || ! hm.HasGlobal(tn) )
// No concern of collision since the type name
// wasn't previously compiled.
continue;
if ( tag == TYPE_ENUM && hm.HasEnumTypeGlobal(tn) )
// No inconsistency.
continue;
if ( tag == TYPE_RECORD && hm.HasRecordTypeGlobal(tn) )
// No inconsistency.
continue;
fprintf(stderr, "%s: type \"%s\" collides with compiled global\n",
working_dir.c_str(), tn.c_str());
return true;
}
return false;
}
void CPPCompile::CreateGlobal(const ID* g)
{
auto gn = std::string(g->Name());
bool is_bif = pfs.BiFGlobals().count(g) > 0;
if ( pfs.Globals().count(g) == 0 )
{
// Only used in the context of calls. If it's compilable,
// the we'll call it directly.
if ( compilable_funcs.count(gn) > 0 )
{
AddGlobal(gn, "zf", true);
return;
}
if ( is_bif )
{
AddBiF(g, false);
return;
}
}
if ( AddGlobal(gn, "gl", true) )
{ // We'll be creating this global.
Emit("IDPtr %s;", globals[gn]);
if ( pfs.Events().count(gn) > 0 )
// This is an event that's also used as
// a variable.
Emit("EventHandlerPtr %s_ev;", globals[gn]);
const auto& t = g->GetType();
NoteInitDependency(g, TypeRep(t));
AddInit(g, globals[gn],
std::string("lookup_global__CPP(\"") + gn + "\", " +
GenTypeName(t) + ")");
}
if ( is_bif )
// This is a BiF that's referred to in a non-call context,
// so we didn't already add it above.
AddBiF(g, true);
global_vars.emplace(g);
}
void CPPCompile::UpdateGlobalHashes()
{
for ( auto& g : pfs.AllGlobals() )
{
auto gn = g->Name();
if ( hm.HasGlobal(gn) )
// Not new to this compilation run.
continue;
auto ht = pfs.HashType(g->GetType());
p_hash_type hv = 0;
if ( g->GetVal() )
hv = p_hash(g->GetVal());
fprintf(hm.HashFile(), "global\n%s\n", gn);
fprintf(hm.HashFile(), "%llu %llu\n", ht, hv);
// Record location information in the hash file for
// diagnostic purposes.
auto loc = g->GetLocationInfo();
fprintf(hm.HashFile(), "%s %d\n", loc->filename, loc->first_line);
// Flag any named record/enum types.
if ( g->IsType() )
{
const auto& t = g->GetType();
if ( t->Tag() == TYPE_RECORD )
fprintf(hm.HashFile(), "record\n%s\n", gn);
else if ( t->Tag() == TYPE_ENUM )
fprintf(hm.HashFile(), "enum\n%s\n", gn);
}
}
}
void CPPCompile::AddBiF(const ID* b, bool is_var)
{
auto bn = b->Name();
auto n = std::string(bn);
if ( is_var )
n = n + "_"; // make the name distinct
if ( AddGlobal(n, "bif", true) )
Emit("Func* %s;", globals[n]);
AddInit(b, globals[n], std::string("lookup_bif__CPP(\"") + bn + "\")");
}
bool CPPCompile::AddGlobal(const std::string& g, const char* suffix, bool track)
{
bool new_var = false;
if ( globals.count(g) == 0 )
{
auto gn = GlobalName(g, suffix);
if ( hm.HasGlobalVar(gn) )
gn = scope_prefix(hm.GlobalVarScope(gn)) + gn;
else
{
new_var = true;
if ( track && update )
fprintf(hm.HashFile(), "global-var\n%s\n%d\n",
gn.c_str(), addl_tag);
}
globals.emplace(g, gn);
}
return new_var;
}
void CPPCompile::RegisterEvent(std::string ev_name)
{
body_events[body_name].emplace_back(std::move(ev_name));
}
const std::string& CPPCompile::IDNameStr(const ID* id) const
{
if ( id->IsGlobal() )
{
auto g = std::string(id->Name());
ASSERT(globals.count(g) > 0);
return ((CPPCompile*)(this))->globals[g];
}
ASSERT(locals.count(id) > 0);
return ((CPPCompile*)(this))->locals[id];
}
std::string CPPCompile::LocalName(const ID* l) const
{
auto n = l->Name();
auto without_module = strstr(n, "::");
if ( without_module )
return Canonicalize(without_module + 2);
else
return Canonicalize(n);
}
std::string CPPCompile::Canonicalize(const char* name) const
{
std::string cname;
for ( int i = 0; name[i]; ++i )
{
auto c = name[i];
// Strip <>'s - these get introduced for lambdas.
if ( c == '<' || c == '>' )
continue;
if ( c == ':' || c == '-' )
c = '_';
cname = cname + c;
}
// Add a trailing '_' to avoid conflicts with C++ keywords.
return cname + "_";
}
} // zeek::detail

View file

@ -0,0 +1,13 @@
#! /bin/sh
base=../../..
so=$base/src/script_opt/CPP
build=$base/build
echo > $so/CPP-gen-addl.h
export -n ZEEK_USE_CPP ZEEK_ADD_CPP
export ZEEK_HASH_DIR=$build
cd $build
echo | src/zeek -b -O gen-C++
mv ./CPP-gen-addl.h $so/CPP-gen.cc
ninja || echo Bare embedded build failed

View file

@ -0,0 +1,12 @@
#! /bin/sh
base=../../..
test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build
gen=CPP-gen-addl.h
export -n ZEEK_ADD_CPP
cd $test
rm -rf .tmp
../../auxil/btest/btest -j -a cpp -f cpp-test.diag core

View file

@ -0,0 +1,13 @@
#! /bin/sh
base=../../..
so=$base/src/script_opt/CPP
build=$base/build
echo > $so/CPP-gen-addl.h
export -n ZEEK_USE_CPP ZEEK_ADD_CPP
export ZEEK_HASH_DIR=$build
cd $build
echo | src/zeek -O gen-C++
mv ./CPP-gen-addl.h $so/CPP-gen.cc
ninja || echo Full embedded build failed

View file

@ -0,0 +1,7 @@
#! /bin/sh
base=../../..
so=$base/src/script_opt/CPP
echo > $so/CPP-gen.cc
cd $base/build
ninja || echo Non-embedded build failed

View file

@ -0,0 +1,27 @@
#! /bin/sh
echo $1
base=../../..
test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build
gen=CPP-gen-addl.h
echo >$gen
./non-embedded-build >$build/errs 2>&1 || echo non-embedded build failed
export -n ZEEK_USE_CPP
export ZEEK_HASH_DIR=$test ZEEK_GEN_CPP=
cd $test
../../auxil/btest/btest $1 >jbuild-$1.out 2>&1
grep -c '^namespace' $gen
mv $gen $so/CPP-gen.cc
cd $build
ninja >& errs || echo build for $1 failed
export -n ZEEK_GEN_CPP
cd $test
rm -rf .tmp
../../auxil/btest/btest -a cpp -f cpp-test.$1.diag $1

View file

@ -0,0 +1,25 @@
#! /bin/sh
echo $1
base=../../..
test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build
gen=CPP-gen-addl.h
export -n ZEEK_USE_CPP
export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP=
cd $test
cp $build/CPP-hashes.dat .
echo >$gen
../../auxil/btest/btest $1 >cpp-build-$1.out 2>&1
grep -c '^namespace' $gen
mv $gen $so
cd $build
ninja >& errs || echo build for $1 failed
export -n ZEEK_ADD_CPP
cd $test
rm -rf .tmp
../../auxil/btest/btest -j -a cpp -f cpp-jtest.$1.diag $1

View file

@ -0,0 +1,18 @@
#! /bin/sh
base=../../..
test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build
gen=CPP-gen-addl.h
export -n ZEEK_USE_CPP
export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP=
cd $test
cp $build/CPP-hashes.dat .
echo >$gen
../../auxil/btest/btest $1 >test-suite-build.out 2>&1
grep -c '^namespace' $gen
mv $gen $so
cd $build
ninja >& errs || echo test suite build failed

View file

@ -0,0 +1,23 @@
#! /bin/sh
base=../../..
test=$base/testing/btest
so=$base/src/script_opt/CPP
build=$base/build
gen=CPP-gen-addl.h
export -n ZEEK_USE_CPP
export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP=
cd $test
cp $build/CPP-hashes.dat .
echo >$gen
../../auxil/btest/btest $1 >jbuild-$1.out 2>&1
grep -c '^namespace' $gen
mv $gen $so
cd $build
ninja >& errs || echo build for $1 failed
export -n ZEEK_ADD_CPP
cd $test
rm -rf .tmp
../../auxil/btest/btest -U -a cpp -f cpp-test.$1.diag.update $1

View file

@ -19,6 +19,8 @@ AnalyOpt analysis_options;
std::unordered_set<const Func*> non_recursive_funcs; std::unordered_set<const Func*> non_recursive_funcs;
void (*CPP_init_hook)() = nullptr;
// Tracks all of the loaded functions (including event handlers and hooks). // Tracks all of the loaded functions (including event handlers and hooks).
static std::vector<FuncInfo> funcs; static std::vector<FuncInfo> funcs;

View file

@ -118,4 +118,9 @@ extern const FuncInfo* analyze_global_stmts(Stmt* stmts);
extern void analyze_scripts(); extern void analyze_scripts();
// Used for C++-compiled scripts to signal their presence, by setting this
// to a non-empty value.
extern void (*CPP_init_hook)();
} // namespace zeek::detail } // namespace zeek::detail