script_opt/CPP: errors, recursive type fixes, fix embedded comments

better (than nothing) run-time errors for compiled scripts
fixes for dealing with recursive types in compiled scripts
fix for values in compiled scripts containing embedded comment markers
This commit is contained in:
Vern Paxson 2023-03-08 10:12:01 +01:00 committed by Arne Welzel
parent b7f7d32bf7
commit c0dd2b4e81
27 changed files with 181 additions and 65 deletions

View file

@ -178,7 +178,6 @@ public:
// The same, for a single attribute.
std::shared_ptr<CPP_InitInfo> RegisterAttr(const AttrPtr& attr);
int AttrOffset(const AttrPtr& attr) { return GI_Offset(RegisterAttr(attr)); }
// Returns a mapping of from Attr objects to their associated
// initialization information. The Attr must have previously
@ -595,6 +594,10 @@ private:
// Maps function names to priorities, for hooks & event handlers.
std::unordered_map<std::string, int> body_priorities;
// Maps function names to script locations, for better-than-nothing
// error reporting.
std::unordered_map<std::string, const Location*> body_locs;
// Maps function names to events relevant to them.
std::unordered_map<std::string, std::vector<std::string>> body_events;

View file

@ -103,6 +103,7 @@ void CPPCompile::CreateFunction(const FuncTypePtr& ft, const ProfileFunc* pf, co
body_hashes[fname] = pf->HashVal();
body_priorities[fname] = priority;
body_locs[fname] = body->GetLocationInfo();
body_names.emplace(body.get(), fname);
}
@ -136,13 +137,19 @@ void CPPCompile::DeclareSubclass(const FuncTypePtr& ft, const ProfileFunc* pf, c
}
}
Emit("%s_cl(const char* name%s) : CPPStmt(name)%s { }", fname, addl_args, inits);
const Obj* stmts = pf->ProfiledBody();
if ( ! stmts )
stmts = pf->ProfiledExpr();
auto loc = stmts->GetLocationInfo();
auto loc_info = string("\"") + loc->filename + "\", " + Fmt(loc->first_line);
Emit("%s_cl(const char* name%s) : CPPStmt(name, %s)%s { }", fname, addl_args, loc_info, inits);
// An additional constructor just used to generate place-holder
// instances, due to the mis-design that lambdas are identified
// by their Func objects rather than their FuncVal objects.
if ( lambda_ids && lambda_ids->length() > 0 )
Emit("%s_cl(const char* name) : CPPStmt(name) { }", fname);
Emit("%s_cl(const char* name) : CPPStmt(name, %s) { }", fname, loc_info);
Emit("ValPtr Exec(Frame* f, StmtFlowType& flow) override final");
StartBlock();
@ -178,7 +185,8 @@ void CPPCompile::DeclareDynCPPStmt()
Emit("class CPPDynStmt : public CPPStmt");
Emit("\t{");
Emit("public:");
Emit("\tCPPDynStmt(const char* _name, void* _func, int _type_signature) : CPPStmt(_name), "
Emit("\tCPPDynStmt(const char* _name, void* _func, int _type_signature, const char* filename, "
"int line_num) : CPPStmt(_name, filename, line_num), "
"func(_func), type_signature(_type_signature) { }");
Emit("\tValPtr Exec(Frame* f, StmtFlowType& flow) override final;");
Emit("private:");

View file

@ -274,9 +274,6 @@ shared_ptr<CPP_InitsInfo> CPPCompile::RegisterInitInfo(const char* tag, const ch
void CPPCompile::RegisterCompiledBody(const string& f)
{
auto h = body_hashes[f];
auto p = body_priorities[f];
// Build up an initializer of the events relevant to the function.
string events;
auto be = body_events.find(f);
@ -293,8 +290,15 @@ void CPPCompile::RegisterCompiledBody(const string& f)
auto fi = func_index.find(f);
ASSERT(fi != func_index.end());
auto type_signature = casting_index[fi->second];
Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, %s, std::vector<std::string>(%s)),", f, f,
Fmt(type_signature), Fmt(p), Fmt(h), events);
auto h = body_hashes[f];
auto p = body_priorities[f];
auto loc = body_locs[f];
auto body_info = Fmt(p) + ", " + Fmt(h) + ", \"" + loc->filename + " (C++)\", " +
Fmt(loc->first_line);
Emit("\tCPP_RegisterBody(\"%s\", (void*) %s, %s, %s, std::vector<std::string>(%s)),", f, f,
Fmt(type_signature), body_info, events);
}
void CPPCompile::GenEpilog()
@ -368,6 +372,7 @@ void CPPCompile::GenCPPDynStmt()
StartBlock();
Emit("flow = FLOW_RETURN;");
Emit("f->SetOnlyCall(ce.get());");
Emit("switch ( type_signature )");
StartBlock();
@ -481,7 +486,8 @@ void CPPCompile::GenRegisterBodies()
Emit("for ( auto& b : CPP__bodies_to_register )");
StartBlock();
Emit("auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature);");
Emit("auto f = make_intrusive<CPPDynStmt>(b.func_name.c_str(), b.func, b.type_signature, "
"b.filename, b.line_num);");
auto reg = standalone ? "register_standalone_body" : "register_body";
Emit("%s__CPP(f, b.priority, b.h, b.events, finish_init__CPP);", reg);

View file

@ -23,6 +23,28 @@ void CPPFunc::Describe(ODesc* d) const
d->Add(name);
}
CPPStmt::CPPStmt(const char* _name, const char* filename, int line_num)
: Stmt(STMT_CPP), name(_name)
{
// We build a fake CallExpr node to be used for error-reporting.
// It doesn't matter that it matches the actual function/event/hook
// type-checking-wise, but it *does* need to type-check.
auto no_args = make_intrusive<RecordType>(nullptr);
auto no_yield = base_type(TYPE_VOID);
auto ft = make_intrusive<FuncType>(no_args, no_yield, FUNC_FLAVOR_FUNCTION);
vector<StmtPtr> no_bodies;
vector<int> no_priorities;
auto sf = make_intrusive<ScriptFunc>(name, ft, no_bodies, no_priorities);
auto fv = make_intrusive<FuncVal>(sf);
auto empty_args = make_intrusive<ListExpr>();
ce = make_intrusive<CallExpr>(make_intrusive<ConstExpr>(fv), empty_args);
Location loc(filename, line_num, line_num, 1, 1);
ce->SetLocationInfo(&loc);
}
CPPLambdaFunc::CPPLambdaFunc(string _name, FuncTypePtr ft, CPPStmtPtr _l_body)
: ScriptFunc(move(_name), move(ft), {_l_body}, {0})
{

View file

@ -43,7 +43,7 @@ protected:
class CPPStmt : public Stmt
{
public:
CPPStmt(const char* _name) : Stmt(STMT_CPP), name(_name) { }
CPPStmt(const char* _name, const char* filename, int line_num);
const std::string& Name() { return name; }
@ -71,6 +71,9 @@ protected:
std::string name;
p_hash_type hash = 0ULL;
// A pseudo AST "call" node, used to support error localization.
CallExprPtr ce;
};
using CPPStmtPtr = IntrusivePtr<CPPStmt>;

View file

@ -1,5 +1,7 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include <regex>
#include "zeek/Desc.h"
#include "zeek/RE.h"
#include "zeek/ZeekString.h"
@ -95,7 +97,16 @@ void CPP_InitsInfo::BuildCohort(CPPCompile* c, std::vector<std::shared_ptr<CPP_I
vector<string> ivs;
auto o = co->InitObj();
if ( o )
c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), obj_desc(o));
{
auto od = obj_desc(o);
// Escape any embedded comment characters.
od = regex_replace(od, std::regex("/\\*"), "<<SLASH-STAR>>");
od = regex_replace(od, std::regex("\\*/"), "<<STAR-SLASH>>");
c->Emit("/* #%s: Initializing %s: */", Fmt(co->Offset()), od);
}
co->InitializerVals(ivs);
BuildCohortElement(c, co->InitializerType(), ivs);
++n;
@ -286,7 +297,8 @@ AttrInfo::AttrInfo(CPPCompile* _c, const AttrPtr& attr) : CompoundItemInfo(_c)
if ( a_e )
{
auto gi = c->RegisterType(a_e->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort() + 1);
if ( ! CPPCompile::IsSimpleInitExpr(a_e) )
{
@ -307,7 +319,7 @@ AttrInfo::AttrInfo(CPPCompile* _c, const AttrPtr& attr) : CompoundItemInfo(_c)
else if ( a_e->Tag() == EXPR_NAME )
{
auto g = a_e->AsNameExpr()->Id();
auto gi = c->RegisterGlobal(g);
gi = c->RegisterGlobal(g);
init_cohort = max(init_cohort, gi->InitCohort() + 1);
vals.emplace_back(Fmt(static_cast<int>(AE_NAME)));
@ -383,7 +395,8 @@ CallExprInitInfo::CallExprInitInfo(CPPCompile* c, ExprPtr _e, string _e_name, st
: CPP_InitInfo(_e), e(move(_e)), e_name(move(_e_name)), wrapper_class(move(_wrapper_class))
{
auto gi = c->RegisterType(e->GetType());
init_cohort = max(init_cohort, gi->InitCohort() + 1);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort() + 1);
}
LambdaRegistrationInfo::LambdaRegistrationInfo(CPPCompile* c, string _name, FuncTypePtr ft,
@ -477,12 +490,15 @@ void ListTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
TableTypeInfo::TableTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))
{
// Note, we leave init_cohort at 0 because the skeleton of this type
// is built in the first cohort.
auto tbl = t->AsTableType();
auto gi = c->RegisterType(tbl->GetIndices());
ASSERT(gi);
indices = gi->Offset();
init_cohort = gi->InitCohort();
final_init_cohort = gi->InitCohort();
yield = tbl->Yield();
@ -490,7 +506,7 @@ TableTypeInfo::TableTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c,
{
gi = c->RegisterType(yield);
if ( gi )
init_cohort = max(init_cohort, gi->InitCohort());
final_init_cohort = max(final_init_cohort, gi->InitCohort());
}
}

View file

@ -271,6 +271,9 @@ void CPP_TypeInits::PreInit(InitsManager* im, int offset, ValElemVec& init_vals)
inits_vec[offset] = get_record_type__CPP(nullptr);
}
else if ( tag == TYPE_TABLE )
inits_vec[offset] = make_intrusive<TableType>(nullptr, nullptr);
// else no pre-initialization needed
}
@ -320,7 +323,7 @@ void CPP_TypeInits::Generate(InitsManager* im, vector<TypePtr>& ivec, int offset
break;
case TYPE_TABLE:
t = BuildTableType(im, init_vals);
t = BuildTableType(im, init_vals, offset);
break;
case TYPE_FUNC:
@ -394,13 +397,18 @@ TypePtr CPP_TypeInits::BuildTypeList(InitsManager* im, ValElemVec& init_vals, in
return tl;
}
TypePtr CPP_TypeInits::BuildTableType(InitsManager* im, ValElemVec& init_vals) const
TypePtr CPP_TypeInits::BuildTableType(InitsManager* im, ValElemVec& init_vals, int offset) const
{
auto t = cast_intrusive<TableType>(inits_vec[offset]);
ASSERT(t);
auto index = cast_intrusive<TypeList>(im->Types(init_vals[1]));
auto yield_i = init_vals[2];
auto yield = yield_i >= 0 ? im->Types(yield_i) : nullptr;
return make_intrusive<TableType>(index, yield);
t->SetIndexAndYield(index, yield);
return t;
}
TypePtr CPP_TypeInits::BuildFuncType(InitsManager* im, ValElemVec& init_vals) const

View file

@ -307,7 +307,7 @@ protected:
TypePtr BuildTypeType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildVectorType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildTypeList(InitsManager* im, ValElemVec& init_vals, int offset) const;
TypePtr BuildTableType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildTableType(InitsManager* im, ValElemVec& init_vals, int offset) const;
TypePtr BuildFuncType(InitsManager* im, ValElemVec& init_vals) const;
TypePtr BuildRecordType(InitsManager* im, ValElemVec& init_vals, int offset) const;
};
@ -556,9 +556,11 @@ protected:
struct CPP_RegisterBody
{
CPP_RegisterBody(std::string _func_name, void* _func, int _type_signature, int _priority,
p_hash_type _h, std::vector<std::string> _events)
p_hash_type _h, const char* _filename, int _line_num,
std::vector<std::string> _events)
: func_name(std::move(_func_name)), func(_func), type_signature(_type_signature),
priority(_priority), h(_h), events(std::move(_events))
priority(_priority), h(_h), filename(_filename), line_num(_line_num),
events(std::move(_events))
{
}
@ -567,6 +569,8 @@ struct CPP_RegisterBody
int type_signature;
int priority;
p_hash_type h;
const char* filename;
int line_num;
std::vector<std::string> events;
};

View file

@ -47,8 +47,6 @@ extern ValPtr when_index_slice__CPP(VectorVal* vec, const ListVal* lv);
// but (2) needing to have the address of that vector.
inline ValPtr invoke__CPP(Func* f, std::vector<ValPtr> args, Frame* frame)
{
if ( frame )
frame->SetOnlyCall(nullptr);
return f->Invoke(&args, frame);
}

View file

@ -551,7 +551,7 @@ void CPPCompile::GenForOverString(const ExprPtr& str, const IDPList* loop_vars)
{
Emit("auto sval__CPP = %s;", GenExpr(str, GEN_DONT_CARE));
Emit("for ( auto i__CPP = 0u; i__CPP < sval__CPP->Len(); ++i__CPP )");
Emit("for ( auto i__CPP = 0; i__CPP < sval__CPP->Len(); ++i__CPP )");
StartBlock();
Emit("auto sv__CPP = make_intrusive<StringVal>(1, (const char*) sval__CPP->Bytes() + i__CPP);");

View file

@ -15,7 +15,7 @@ The maintenance workflow:
to check in updates to the list of how the compiler currently fares
on various btests (see end of this doc):
Tue Feb 14 15:15:27 PST 2023
Sun Mar 5 12:02:44 PST 2023
2. Make sure the compiler can compile and execute the base scripts:
@ -42,7 +42,7 @@ The maintenance workflow:
"-O gen-C++" can successfully run on the input. Presently, it should
be able to do so for all of them, other than some exceptions noted below.
This step is parallelizable, say using xargs -P 10.
This step is parallelizable, say using xargs -P 10 -n 1.
7. Copy ./src/zeek to ./zeek.HOLD. This is used to speed up recompilation used
in the next step. However, it's also a headache to do development to