Merge remote-tracking branch 'origin/topic/vern/cpp-maint-Sep22'

* origin/topic/vern/cpp-maint-Sep22:
  oof more manual fixups
  undo inadvertently committed tweak to test
  update for btest only run in some environments
  btest tweaks for recent changes
  updates to notes for compile-to-C++ maintenance
  newly-created btest files
  baseline updates for -a cpp alternative (compile-to-C++)
  tweak btest so it's recognized as a candidate for C++ compilation testing
  split basic "int" btests into main part versus now-separate overflow part
  fix deprecated "local" scoping in test scripts
  annotated scripts to skip when testing compilation-to-C++
  C++ script generation fix for lambdas that have identical bodies
  fix for C++ scripts that refer to "opaque" types
  C++ compilation support for 2-valued vector "for" loops
  C++ compilation support for RE /s operator
  run-time checking of vector operations for overflows and division-by-zero
  error propagation fix to avoid a crash
This commit is contained in:
Tim Wojtulewicz 2022-09-19 13:51:11 -07:00
commit f04f070d9f
64 changed files with 319 additions and 161 deletions

View file

@ -727,7 +727,7 @@ private:
void GenWhenStmt(const WhenStmt* w);
void GenForStmt(const ForStmt* f);
void GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars);
void GenForOverVector(const ExprPtr& tbl, const IDPList* loop_vars);
void GenForOverVector(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars);
void GenForOverString(const ExprPtr& str, const IDPList* loop_vars);
// Nested level of loops/switches for which "break"'s should be

View file

@ -155,20 +155,24 @@ void CPPCompile::Compile(bool report_uncompilable)
if ( ! func.ShouldSkip() )
DeclareFunc(func);
// We track lambdas by their internal names, because two different
// LambdaExpr's can wind up referring to the same underlying lambda
// if the bodies happen to be identical. In that case, we don't
// want to generate the lambda twice.
unordered_set<string> lambda_names;
// We track lambdas by their internal names, and associate those
// with their AST bodies. Two different LambdaExpr's can wind up
// referring to the same underlying lambda if the bodies happen to
// be identical. In that case, we don't want to generate the lambda
// twice, but we do want to map the second one to the same body name.
unordered_map<string, const Stmt*> lambda_ASTs;
for ( const auto& l : pfs.Lambdas() )
{
const auto& n = l->Name();
if ( lambda_names.count(n) > 0 )
// Skip it.
continue;
DeclareLambda(l, pfs.ExprProf(l).get());
lambda_names.insert(n);
const auto body = l->Ingredients().body.get();
if ( lambda_ASTs.count(n) > 0 )
// Reuse previous body.
body_names[body] = body_names[lambda_ASTs[n]];
else
{
DeclareLambda(l, pfs.ExprProf(l).get());
lambda_ASTs[n] = body;
}
}
NL();
@ -178,15 +182,15 @@ void CPPCompile::Compile(bool report_uncompilable)
if ( ! func.ShouldSkip() )
CompileFunc(func);
lambda_names.clear();
lambda_ASTs.clear();
for ( const auto& l : pfs.Lambdas() )
{
const auto& n = l->Name();
if ( lambda_names.count(n) > 0 )
if ( lambda_ASTs.count(n) > 0 )
continue;
CompileLambda(l, pfs.ExprProf(l).get());
lambda_names.insert(n);
lambda_ASTs[n] = l->Ingredients().body.get();
}
NL();

View file

@ -1,3 +1,6 @@
globals initialized by expressions should be done with code, not values
- this would enable globals whose starting values include opaque's
conditional code:
- top-level conditionals okay due to hash protection
- but lower-level, directly called, won't translate
@ -16,12 +19,6 @@ standalone code won't execute global statements
standalone code needs to deal with field_mapping initializations the
same as enum_mapping
type switches:
- easy to support by some sort of hash on the type
when's:
- need to understand "return when" semantics
slow compilation:
- analyze whether there's a bunch of unneeded stuff (e.g. orphan types)
@ -30,4 +27,4 @@ efficiency:
- directly calling BiFs
- best done by supplanting bifcl
- event handlers directly called, using vector<ZVal> arguments
- import custom BiFs (e.g. network_time()) from ZAM
- import custom BiFs (e.g. network_time(), cat()) from ZAM

View file

@ -168,6 +168,7 @@ PatternConstInfo::PatternConstInfo(CPPCompile* c, ValPtr v) : CPP_InitInfo()
auto re = v->AsPatternVal()->Get();
pattern = c->TrackString(CPPEscape(re->OrigText()));
is_case_insensitive = re->IsCaseInsensitive();
is_single_line = re->IsSingleLine();
}
CompoundItemInfo::CompoundItemInfo(CPPCompile* _c, ValPtr v) : CPP_InitInfo(), c(_c)
@ -400,7 +401,7 @@ void EnumTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
void OpaqueTypeInfo::AddInitializerVals(std::vector<std::string>& ivs) const
{
ivs.emplace_back(Fmt(c->TrackString(t->GetName())));
ivs.emplace_back(Fmt(c->TrackString(t->AsOpaqueType()->Name())));
}
TypeTypeInfo::TypeTypeInfo(CPPCompile* _c, TypePtr _t) : AbstractTypeInfo(_c, move(_t))

View file

@ -374,11 +374,13 @@ public:
{
ivs.emplace_back(std::to_string(pattern));
ivs.emplace_back(std::to_string(is_case_insensitive));
ivs.emplace_back(std::to_string(is_single_line));
}
private:
int pattern; // index into string representation of pattern
int is_case_insensitive; // case-insensitivity flag, 0 or 1
int is_single_line; // single-line flag, 0 or 1
};
class PortConstInfo : public CPP_InitInfo

View file

@ -47,6 +47,8 @@ void CPP_IndexedInits<T>::Generate(InitsManager* im, std::vector<PatternValPtr>&
auto re = new RE_Matcher(im->Strings(init_vals[0]));
if ( init_vals[1] )
re->MakeCaseInsensitive();
if ( init_vals[2] )
re->MakeSingleLine();
re->Compile();

View file

@ -2,6 +2,7 @@
#include "zeek/script_opt/CPP/RuntimeVec.h"
#include "zeek/Overflow.h"
#include "zeek/ZeekString.h"
namespace zeek::detail
@ -103,20 +104,25 @@ VEC_OP1(comp, ~, )
// A kernel for applying a binary operation element-by-element to two
// vectors of a given low-level type.
#define VEC_OP2_KERNEL(accessor, type, op) \
#define VEC_OP2_KERNEL(accessor, type, op, zero_check) \
for ( unsigned int i = 0; i < v1->Size(); ++i ) \
{ \
auto v1_i = v1->ValAt(i); \
auto v2_i = v2->ValAt(i); \
if ( v1_i && v2_i ) \
v_result->Assign(i, make_intrusive<type>(v1_i->accessor() op v2_i->accessor())); \
{ \
if ( zero_check && v2_i->IsZero() ) \
reporter->CPPRuntimeError("division/modulo by zero"); \
else \
v_result->Assign(i, make_intrusive<type>(v1_i->accessor() op v2_i->accessor())); \
} \
}
// Analogous to VEC_OP1, instantiates a function for a given binary operation,
// which might-or-might-not be supported for low-level "double" types.
// This version is for operations whose result type is the same as the
// operand type.
#define VEC_OP2(name, op, double_kernel) \
#define VEC_OP2(name, op, double_kernel, zero_check) \
VectorValPtr vec_op_##name##__CPP(const VectorValPtr& v1, const VectorValPtr& v2) \
{ \
if ( ! check_vec_sizes__CPP(v1, v2) ) \
@ -130,15 +136,15 @@ VEC_OP1(comp, ~, )
case TYPE_INTERNAL_INT: \
{ \
if ( vt->Yield()->Tag() == TYPE_BOOL ) \
VEC_OP2_KERNEL(AsBool, BoolVal, op) \
VEC_OP2_KERNEL(AsBool, BoolVal, op, zero_check) \
else \
VEC_OP2_KERNEL(AsInt, IntVal, op) \
VEC_OP2_KERNEL(AsInt, IntVal, op, zero_check) \
break; \
} \
\
case TYPE_INTERNAL_UNSIGNED: \
{ \
VEC_OP2_KERNEL(AsCount, CountVal, op) \
VEC_OP2_KERNEL(AsCount, CountVal, op, zero_check) \
break; \
} \
\
@ -151,27 +157,28 @@ VEC_OP1(comp, ~, )
}
// Instantiates a double_kernel for a binary operation.
#define VEC_OP2_WITH_DOUBLE(name, op) \
#define VEC_OP2_WITH_DOUBLE(name, op, zero_check) \
VEC_OP2( \
name, op, case TYPE_INTERNAL_DOUBLE \
: { \
VEC_OP2_KERNEL(AsDouble, DoubleVal, op) \
VEC_OP2_KERNEL(AsDouble, DoubleVal, op, zero_check) \
break; \
})
}, \
zero_check)
// The binary operations supported for vectors.
VEC_OP2_WITH_DOUBLE(add, +)
VEC_OP2_WITH_DOUBLE(sub, -)
VEC_OP2_WITH_DOUBLE(mul, *)
VEC_OP2_WITH_DOUBLE(div, /)
VEC_OP2(mod, %, )
VEC_OP2(and, &, )
VEC_OP2(or, |, )
VEC_OP2(xor, ^, )
VEC_OP2(andand, &&, )
VEC_OP2(oror, ||, )
VEC_OP2(lshift, <<, )
VEC_OP2(rshift, >>, )
VEC_OP2_WITH_DOUBLE(add, +, 0)
VEC_OP2_WITH_DOUBLE(sub, -, 0)
VEC_OP2_WITH_DOUBLE(mul, *, 0)
VEC_OP2_WITH_DOUBLE(div, /, 1)
VEC_OP2(mod, %, , 1)
VEC_OP2(and, &, , 0)
VEC_OP2(or, |, , 0)
VEC_OP2(xor, ^, , 0)
VEC_OP2(andand, &&, , 0)
VEC_OP2(oror, ||, , 0)
VEC_OP2(lshift, <<, , 0)
VEC_OP2(rshift, >>, , 0)
// A version of VEC_OP2 that instead supports relational operations, so
// the result type is always vector-of-bool.
@ -189,19 +196,19 @@ VEC_OP2(rshift, >>, )
{ \
case TYPE_INTERNAL_INT: \
{ \
VEC_OP2_KERNEL(AsInt, BoolVal, op) \
VEC_OP2_KERNEL(AsInt, BoolVal, op, 0) \
break; \
} \
\
case TYPE_INTERNAL_UNSIGNED: \
{ \
VEC_OP2_KERNEL(AsCount, BoolVal, op) \
VEC_OP2_KERNEL(AsCount, BoolVal, op, 0) \
break; \
} \
\
case TYPE_INTERNAL_DOUBLE: \
{ \
VEC_OP2_KERNEL(AsDouble, BoolVal, op) \
VEC_OP2_KERNEL(AsDouble, BoolVal, op, 0) \
break; \
} \
\
@ -394,6 +401,11 @@ VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v, const TypePtr& targ)
if ( ! v_i )
continue;
// We compute these for each element to cover the case where
// the coerced vector is of type "any".
auto& t_i = v_i->GetType();
auto it = t_i->InternalType();
ValPtr r_i;
switch ( ytag )
{
@ -402,11 +414,21 @@ VectorValPtr vector_coerce_to__CPP(const VectorValPtr& v, const TypePtr& targ)
break;
case TYPE_INT:
r_i = val_mgr->Int(v_i->CoerceToInt());
if ( (it == TYPE_INTERNAL_UNSIGNED || it == TYPE_INTERNAL_DOUBLE) &&
would_overflow(t_i.get(), yt.get(), v_i.get()) )
reporter->CPPRuntimeError(
"overflow promoting from unsigned/double to signed arithmetic value");
else
r_i = val_mgr->Int(v_i->CoerceToInt());
break;
case TYPE_COUNT:
r_i = val_mgr->Count(v_i->CoerceToUnsigned());
if ( (it == TYPE_INTERNAL_INT || it == TYPE_INTERNAL_DOUBLE) &&
would_overflow(t_i.get(), yt.get(), v_i.get()) )
reporter->CPPRuntimeError(
"overflow promoting from signed/double to signed arithmetic value");
else
r_i = val_mgr->Count(v_i->CoerceToUnsigned());
break;
case TYPE_ENUM:

View file

@ -458,12 +458,13 @@ void CPPCompile::GenForStmt(const ForStmt* f)
auto v = f->StmtExprPtr();
auto t = v->GetType()->Tag();
auto loop_vars = f->LoopVars();
auto value_var = f->ValueVar();
if ( t == TYPE_TABLE )
GenForOverTable(v, f->ValueVar(), loop_vars);
GenForOverTable(v, value_var, loop_vars);
else if ( t == TYPE_VECTOR )
GenForOverVector(v, loop_vars);
GenForOverVector(v, value_var, loop_vars);
else if ( t == TYPE_STRING )
GenForOverString(v, loop_vars);
@ -515,7 +516,8 @@ void CPPCompile::GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var,
}
}
void CPPCompile::GenForOverVector(const ExprPtr& vec, const IDPList* loop_vars)
void CPPCompile::GenForOverVector(const ExprPtr& vec, const IDPtr& value_var,
const IDPList* loop_vars)
{
Emit("auto vv__CPP = %s;", GenExpr(vec, GEN_DONT_CARE));
@ -523,7 +525,16 @@ void CPPCompile::GenForOverVector(const ExprPtr& vec, const IDPList* loop_vars)
StartBlock();
Emit("if ( ! vv__CPP->Has(i__CPP) ) continue;");
Emit("%s = i__CPP;", IDName((*loop_vars)[0]));
if ( value_var )
{
auto vv = IDName(value_var);
auto access = "vv__CPP->ValAt(i__CPP)";
auto native = GenericValPtrToGT(access, value_var->GetType(), GEN_NATIVE);
Emit("%s = %s;", IDName(value_var), native);
}
}
void CPPCompile::GenForOverString(const ExprPtr& str, const IDPList* loop_vars)

View file

@ -15,7 +15,7 @@ The maintenance workflow:
to check in updates to the list of how the compiler currently fares
on various btests (see end of this doc):
Mon Aug 1 16:39:05 PDT 2022
Fri Sep 16 16:13:49 PDT 2022
2. Run "find-test-files.sh" to generate a list (to stdout) of all of the
possible Zeek source files found in the test suite.
@ -30,7 +30,7 @@ The maintenance workflow:
5. Run "check-CPP-gen.sh" for each Zeek file that passed "check-zeek.sh".
This will generate a corresponding file in CPP-test/out* indicating whether
"-O gen-C++" can successfully run on the input. Presently, it should
be able to do so for all of them.
be able to do so for all of them, other than some exceptions noted below.
6. Copy ./src/zeek to ./zeek.HOLD. This is used to speed up recompilation used
in the next step. However, it's also a headache to do development to
@ -44,53 +44,48 @@ The maintenance workflow:
This will generate C++ for the BTest, compile it, and run the result
to see if it succeeds. It populates CPP-test/diag* with the Btest
diagnostic output (empty means success). For non-empty output,
either fix the problem or update the database if it's not fixable.
either fix the problem, add
# @TEST-REQUIRES: test "${ZEEK_USE_CPP}" != "1"
if the test isn't appropriate, or update the database if it's not
readily fixable, along with the reason why.
Note that do-CPP-btest.sh takes the same -d and -U arguments as
does btest, for displaying differences or updating the baseline
(which is Baseline.cpp).
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Database Of Known Issues (be sure to keep sorted)
These BTests won't successfully run due to the indicated issue:
@if - has conditional code
bad-constructor - uses a complex old-style constructor that
should be updated
bad-when - deliberately has old-style "when" without captures
command-line-error - a deliberate command-line error
complex-to-debug - hard-to-figure-out failure
deprecated - uses features deprecated for -O C++
error-handling - behavior in face of an error differs
needs-plugin - requires knowing how to build an associated plugin
no-script - there's no actual script to compile
ZAM - meant specifically for -O ZAM
deprecated - uses deprecated features not support for -O gen-C++
test-glitch - fails because of how we do testing: the first -O gen-C++
pass leaves httpd running, which causes the second -O use-C++
pass to fail when it tries to start up a new httpd
opaque - needs a global whose value is (or contains) an opaque value,
not currently supported
skipped - test can be skipped due to environmental reasons (e.g.,
whether we have a certain Kerberos setup)
Consider migrating these to have @TEST-REQUIRES clauses so we don't have
to maintain this list.
Database Of Known Issues (keep sorted)
../testing/btest/core/negative-time.test no-script
../testing/btest/core/pcap/dumper.zeek no-script
../testing/btest/core/pcap/input-error.zeek command-line-error
../testing/btest/core/proc-status-file.zeek no-script
../testing/btest/core/scalar-vector.zeek deprecated
../testing/btest/language/at-if-event.zeek @if
../testing/btest/language/at-if.zeek @if
../testing/btest/language/at-ifdef.zeek @if
../testing/btest/language/at-ifndef.zeek @if
../testing/btest/language/incr-vec-expr.test deprecated
../testing/btest/language/uninitialized-local2.zeek error-handling
../testing/btest/language/vector-deprecated.zeek deprecated
../testing/btest/language/vector-in-operator.zeek
../testing/btest/language/vector-in-operator.zeek deprecated
../testing/btest/bifs/table_values.zeek bad-constructor
../testing/btest/core/global_opaque_val.zeek opaque
../testing/btest/language/alternate-event-hook-prototypes.zeek deprecated
../testing/btest/language/global-init-calls-bif.zeek opaque
../testing/btest/language/redef-same-prefixtable-idx.zeek deprecated
../testing/btest/language/table-redef.zeek deprecated
../testing/btest/language/when-aggregates.zeek bad-when
../testing/btest/opt/opt-files.zeek ZAM
../testing/btest/opt/opt-files2.zeek ZAM
../testing/btest/opt/opt-files3.zeek ZAM
../testing/btest/opt/opt-func.zeek ZAM
../testing/btest/opt/opt-func2.zeek ZAM
../testing/btest/opt/opt-func3.zeek ZAM
../testing/btest/plugins/packet-protocol.zeek needs-plugin
../testing/btest/scripts/base/protocols/dhcp/dhcp-ack-msg-types.zeek no-script
../testing/btest/scripts/base/protocols/dhcp/dhcp-all-msg-types.zeek no-script
../testing/btest/scripts/base/protocols/dhcp/dhcp-discover-msg-types.zeek no-script
../testing/btest/scripts/base/protocols/dhcp/inform.test no-script
../testing/btest/scripts/base/utils/active-http.test complex-to-debug
../testing/btest/scripts/policy/protocols/ssl/validate-certs.zeek no-script
../testing/btest/supervisor/config-bare-mode.zeek @if
../testing/btest/scripts/base/protocols/krb/smb2_krb.test skipped
../testing/btest/scripts/base/protocols/krb/smb2_krb_nokeytab.test skipped
../testing/btest/scripts/base/utils/active-http.test test-glitch
../testing/btest/scripts/policy/frameworks/telemetry/log-prefixes.zeek opaque
../testing/btest/scripts/policy/frameworks/telemetry/log.zeek opaque
../testing/btest/scripts/policy/misc/dump-events.zeek skipped
../testing/btest/telemetry/counter.zeek opaque
../testing/btest/telemetry/gauge.zeek opaque
../testing/btest/telemetry/histogram.zeek opaque

View file

@ -3,4 +3,8 @@
find ../testing/btest -type f |
egrep -v 'Baseline|\.tmp' |
egrep '\.(zeek|test)$' |
sort
sort |
xargs egrep -l '^[ ]*(event|print)' |
xargs egrep -lc 'REQUIRES.*CPP.*((!=.*1)|(==.*0))' |
grep ':0$' |
sed 's,:0,,'