diff --git a/CHANGES b/CHANGES index 6451ab284f..f076b314b6 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,15 @@ +5.0.0-dev.39 | 2022-01-11 15:02:37 -0700 + + * support for compiling type-based switches to C++ (Vern Paxson, Corelight) + + * make encountering uncompilable "standalone" functions to be a hard error (Vern Paxson, Corelight) + + * fix: ZAM could misinterpret a "type" switch that starts with a "default" (Vern Paxson, Corelight) + + * fix: don't treat pseudo-identifiers in type cases as local variables (Vern Paxson, Corelight) + + * removed development helper scripts now obsolete with --optimize-files= (Vern Paxson, Corelight) + 5.0.0-dev.33 | 2022-01-11 14:57:58 -0700 * Bump CI sanitizer task to Ubuntu 20.04 (Tim Wojtulewicz, Corelight) diff --git a/VERSION b/VERSION index 0f2c232205..cb5162155f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.0.0-dev.33 +5.0.0-dev.39 diff --git a/src/Stmt.h b/src/Stmt.h index b6161f2fd3..03a0a54726 100644 --- a/src/Stmt.h +++ b/src/Stmt.h @@ -191,6 +191,7 @@ public: protected: friend class ZAMCompiler; + friend class CPPCompile; int DefaultCaseIndex() const { return default_case_idx; } const auto& ValueMap() const { return case_label_value_map; } diff --git a/src/script_opt/CPP/Compile.h b/src/script_opt/CPP/Compile.h index 11d8808a75..0489be6e83 100644 --- a/src/script_opt/CPP/Compile.h +++ b/src/script_opt/CPP/Compile.h @@ -718,7 +718,11 @@ private: void GenAddStmt(const ExprStmt* es); void GenDeleteStmt(const ExprStmt* es); void GenEventStmt(const EventStmt* ev); + void GenSwitchStmt(const SwitchStmt* sw); + void GenTypeSwitchStmt(const Expr* e, const case_list* cases); + void GenTypeSwitchCase(const ID* id, int case_offset, bool is_multi); + void GenValueSwitchStmt(const Expr* e, const case_list* cases); void GenForStmt(const ForStmt* f); void GenForOverTable(const ExprPtr& tbl, const IDPtr& value_var, const IDPList* loop_vars); diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index f9e97da5b4..8f27fbbf59 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -113,8 +113,13 @@ void CPPCompile::Compile(bool report_uncompilable) } else { - if ( reason && report_uncompilable ) + if ( reason && standalone ) + reporter->Error("%s cannot be compiled to standalone C++ due to %s", f->Name(), + reason); + + else if ( reason && report_uncompilable ) fprintf(stderr, "%s cannot be compiled to C++ due to %s\n", f->Name(), reason); + not_fully_compilable.insert(f->Name()); } } diff --git a/src/script_opt/CPP/README.md b/src/script_opt/CPP/README.md index 0187be4397..f34dbd079e 100644 --- a/src/script_opt/CPP/README.md +++ b/src/script_opt/CPP/README.md @@ -176,9 +176,6 @@ an extensible record (i.e., fields added using `redef`). * The compiler will not compile bodies that include "when" statements This is fairly involved to fix. -* The compiler will not compile bodies that include "type" switches. -This is not hard to fix. - * If a lambda generates an event that is not otherwise referred to, that event will not be registered upon instantiating the lambda. This is not particularly difficult to fix. diff --git a/src/script_opt/CPP/Stmts.cc b/src/script_opt/CPP/Stmts.cc index f62e72b4cb..1e3c89c460 100644 --- a/src/script_opt/CPP/Stmts.cc +++ b/src/script_opt/CPP/Stmts.cc @@ -233,6 +233,104 @@ void CPPCompile::GenSwitchStmt(const SwitchStmt* sw) auto e = sw->StmtExpr(); auto cases = sw->Cases(); + if ( sw->TypeMap()->empty() ) + GenValueSwitchStmt(e, cases); + else + GenTypeSwitchStmt(e, cases); + } + +void CPPCompile::GenTypeSwitchStmt(const Expr* e, const case_list* cases) + { + // Start a scoping block so we avoid naming conflicts if a function + // has multiple type switches. + Emit("{"); + Emit("static std::vector CPP__switch_types ="); + StartBlock(); + + for ( const auto& c : *cases ) + { + auto tc = c->TypeCases(); + if ( tc ) + for ( auto id : *tc ) + Emit(Fmt(TypeOffset(id->GetType())) + ","); + } + EndBlock(true); + + NL(); + + Emit("ValPtr CPP__sw_val = %s;", GenExpr(e, GEN_VAL_PTR)); + Emit("auto& CPP__sw_val_t = CPP__sw_val->GetType();"); + Emit("int CPP__sw_type_ind = 0;"); + + Emit("for ( auto CPP__st : CPP__switch_types )"); + StartBlock(); + Emit("if ( can_cast_value_to_type(CPP__sw_val.get(), CPP__Type__[CPP__st].get()) )"); + Emit("\tbreak;"); + Emit("++CPP__sw_type_ind;"); + EndBlock(); + + Emit("switch ( CPP__sw_type_ind ) {"); + + ++break_level; + + int case_offset = 0; + + for ( const auto& c : *cases ) + { + auto tc = c->TypeCases(); + if ( tc ) + { + bool is_multi = tc->size() > 1; + for ( auto id : *tc ) + GenTypeSwitchCase(id, case_offset++, is_multi); + } + else + Emit("default:"); + + StartBlock(); + GenStmt(c->Body()); + EndBlock(); + } + + --break_level; + + Emit("}"); // end the switch + Emit("}"); // end the scoping block + } + +void CPPCompile::GenTypeSwitchCase(const ID* id, int case_offset, bool is_multi) + { + Emit("case %s:", Fmt(case_offset)); + + if ( ! id->Name() ) + // No assignment, we're done. + return; + + // It's an assignment case. If it's a collection of multiple cases, + // assign to the variable only for this particular case. + IndentUp(); + + if ( is_multi ) + { + Emit("if ( CPP__sw_type_ind == %s )", Fmt(case_offset)); + IndentUp(); + } + + auto targ_val = "CPP__sw_val.get()"; + auto targ_type = string("CPP__Type__[CPP__switch_types[") + Fmt(case_offset) + "]].get()"; + + auto cast = string("cast_value_to_type(") + targ_val + ", " + targ_type + ")"; + + Emit("%s = %s;", LocalName(id), GenericValPtrToGT(cast, id->GetType(), GEN_NATIVE)); + + IndentDown(); + + if ( is_multi ) + IndentDown(); + } + +void CPPCompile::GenValueSwitchStmt(const Expr* e, const case_list* cases) + { auto e_it = e->GetType()->InternalType(); bool is_int = e_it == TYPE_INTERNAL_INT; bool is_uint = e_it == TYPE_INTERNAL_UNSIGNED; diff --git a/src/script_opt/CPP/Util.cc b/src/script_opt/CPP/Util.cc index 8c6e6d0a91..1afbb0c4a3 100644 --- a/src/script_opt/CPP/Util.cc +++ b/src/script_opt/CPP/Util.cc @@ -45,13 +45,6 @@ bool is_CPP_compilable(const ProfileFunc* pf, const char** reason) return false; } - if ( pf->TypeSwitches().size() > 0 ) - { - if ( reason ) - *reason = "use of type-based \"switch\""; - return false; - } - auto body = pf->ProfiledBody(); if ( body && ! body->GetOptInfo()->is_free_of_conditionals ) { diff --git a/src/script_opt/CPP/bare-embedded-build b/src/script_opt/CPP/bare-embedded-build deleted file mode 100755 index af0cf37a9a..0000000000 --- a/src/script_opt/CPP/bare-embedded-build +++ /dev/null @@ -1,9 +0,0 @@ -#! /bin/sh - -build=../../../build - -cd $build -export -n ZEEK_USE_CPP ZEEK_ADD_CPP -export ZEEK_HASH_DIR=. -echo | src/zeek -b -O gen-C++ -ninja || echo Bare embedded build failed diff --git a/src/script_opt/CPP/eval-test-suite b/src/script_opt/CPP/eval-test-suite deleted file mode 100755 index b1856f1156..0000000000 --- a/src/script_opt/CPP/eval-test-suite +++ /dev/null @@ -1,12 +0,0 @@ -#! /bin/sh - -base=../../.. -test=$base/testing/btest -so=$base/src/script_opt/CPP -build=$base/build -gen=CPP-gen-addl.h - -export -n ZEEK_ADD_CPP -cd $test -rm -rf .tmp -../../auxil/btest/btest -j -a cpp -f cpp-test.diag $1 diff --git a/src/script_opt/CPP/full-embedded-build b/src/script_opt/CPP/full-embedded-build deleted file mode 100755 index de4ac500cb..0000000000 --- a/src/script_opt/CPP/full-embedded-build +++ /dev/null @@ -1,9 +0,0 @@ -#! /bin/sh - -build=../../../build - -cd $build -export -n ZEEK_USE_CPP ZEEK_ADD_CPP -export ZEEK_HASH_DIR=. -echo | src/zeek -O gen-C++ -ninja || echo Full embedded build failed diff --git a/src/script_opt/CPP/non-embedded-build b/src/script_opt/CPP/non-embedded-build deleted file mode 100755 index ec925184e4..0000000000 --- a/src/script_opt/CPP/non-embedded-build +++ /dev/null @@ -1,5 +0,0 @@ -#! /bin/sh - -cd ../../../build -echo >CPP-gen.cc -ninja || echo Non-embedded build failed diff --git a/src/script_opt/CPP/single-full-test.sh b/src/script_opt/CPP/single-full-test.sh deleted file mode 100755 index 71fc927b27..0000000000 --- a/src/script_opt/CPP/single-full-test.sh +++ /dev/null @@ -1,24 +0,0 @@ -#! /bin/sh - -echo $1 - -base=../../.. -test=$base/testing/btest -build=$base/build -gen=CPP-gen.cc - -./non-embedded-build >$build/errs 2>&1 || echo non-embedded build failed - -export -n ZEEK_USE_CPP -export ZEEK_HASH_DIR=$test ZEEK_GEN_CPP= -cd $test -../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 -grep -c '^namespace' $gen -mv $gen $build/ -cd $build -ninja >&errs || echo build for $1 failed - -export -n ZEEK_GEN_CPP -cd $test -rm -rf .tmp -../../auxil/btest/btest -a cpp -f cpp-test.$1.diag $1 diff --git a/src/script_opt/CPP/single-test.sh b/src/script_opt/CPP/single-test.sh deleted file mode 100755 index afc552ce03..0000000000 --- a/src/script_opt/CPP/single-test.sh +++ /dev/null @@ -1,23 +0,0 @@ -#! /bin/sh - -echo $1 - -base=../../.. -test=$base/testing/btest -build=$base/build -gen=CPP-gen.cc - -export -n ZEEK_USE_CPP -export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= -cd $test -cp $build/CPP-hashes.dat . -../../auxil/btest/btest $1 >cpp-build-$1.out 2>&1 -grep -c '^namespace' $gen -mv $gen $build -cd $build -ninja >&errs || echo build for $1 failed - -export -n ZEEK_ADD_CPP -cd $test -rm -rf .tmp -../../auxil/btest/btest -j -a cpp -f cpp-test.$1.diag $1 diff --git a/src/script_opt/CPP/test-suite-build b/src/script_opt/CPP/test-suite-build deleted file mode 100755 index 0e06a7ba71..0000000000 --- a/src/script_opt/CPP/test-suite-build +++ /dev/null @@ -1,18 +0,0 @@ -#! /bin/sh - -base=../../.. -test=$base/testing/btest -so=$base/src/script_opt/CPP -build=$base/build -gen=CPP-gen-addl.h - -export -n ZEEK_USE_CPP -export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= -cd $test -cp $build/CPP-hashes.dat . -echo >$gen -../../auxil/btest/btest $1 >test-suite-build.out 2>&1 -grep -c '^namespace' $gen -mv $gen $so -cd $build -ninja >&errs || echo test suite build failed diff --git a/src/script_opt/CPP/update-single-test.sh b/src/script_opt/CPP/update-single-test.sh deleted file mode 100755 index 35fb2a2d77..0000000000 --- a/src/script_opt/CPP/update-single-test.sh +++ /dev/null @@ -1,21 +0,0 @@ -#! /bin/sh - -base=../../.. -test=$base/testing/btest -build=$base/build -gen=CPP-gen.cc - -export -n ZEEK_USE_CPP -export ZEEK_HASH_DIR=$test ZEEK_ADD_CPP= -cd $test -cp $build/CPP-hashes.dat . -../../auxil/btest/btest $1 >jbuild-$1.out 2>&1 -grep -c '^namespace' $gen -mv $gen $build/ -cd $build -ninja >&errs || echo build for $1 failed - -export -n ZEEK_ADD_CPP -cd $test -rm -rf .tmp -../../auxil/btest/btest -U -a cpp -f cpp-test.$1.diag.update $1 diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index b29bf068d4..5d6e8d8566 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -174,7 +174,11 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s) if ( idl ) { for ( auto id : *idl ) - locals.insert(id); + // Make sure it's not a placeholder + // identifier, used when there's + // no explicit one. + if ( id->Name() ) + locals.insert(id); is_type_switch = true; } diff --git a/src/script_opt/ZAM/Stmt.cc b/src/script_opt/ZAM/Stmt.cc index f6af240c95..a9c5637310 100644 --- a/src/script_opt/ZAM/Stmt.cc +++ b/src/script_opt/ZAM/Stmt.cc @@ -410,12 +410,10 @@ const ZAMStmt ZAMCompiler::CompileSwitch(const SwitchStmt* sw) // Need to track a new set of contexts for "break" statements. PushBreaks(); - auto& cases = *sw->Cases(); - - if ( cases.length() > 0 && cases[0]->TypeCases() ) - return TypeSwitch(sw, n, c); - else + if ( sw->TypeMap()->empty() ) return ValueSwitch(sw, n, c); + else + return TypeSwitch(sw, n, c); } const ZAMStmt ZAMCompiler::ValueSwitch(const SwitchStmt* sw, const NameExpr* v, const ConstExpr* c)