Merge remote-tracking branch 'origin/topic/vern/script-opt-uncompilable-AST'

* origin/topic/vern/script-opt-uncompilable-AST:
  ScriptOpt: Fail compilation if known exprs/stmts is outdated
  skip optimization of functions with AST nodes unknown to script optimization
This commit is contained in:
Arne Welzel 2024-12-09 14:03:29 +01:00
commit 0ebcd9608f
9 changed files with 170 additions and 10 deletions

View file

@ -1,3 +1,11 @@
7.1.0-dev.700 | 2024-12-09 14:03:29 +0100
* ScriptOpt: Fail compilation if known exprs/stmts is outdated (Arne Welzel, Corelight)
* skip optimization of functions with AST nodes unknown to script optimization (Vern Paxson, Corelight)
* Disable CTU-SME test under TSAN (Arne Welzel, Corelight)
7.1.0-dev.695 | 2024-12-06 17:33:19 +0100
* btest/http: Demo StreamEvent analyzer with HTTP::upgrade_analyzers (Arne Welzel, Corelight)

View file

@ -1 +1 @@
7.1.0-dev.695
7.1.0-dev.700

View file

@ -39,6 +39,12 @@ string scope_prefix(const string& scope) { return "zeek::detail::CPP_" + scope;
string scope_prefix(int scope) { return scope_prefix(to_string(scope)); }
bool is_CPP_compilable(const ProfileFunc* pf, const char** reason) {
if ( has_AST_node_unknown_to_script_opt(pf, false) ) {
if ( reason )
*reason = "unknown AST node type";
return false;
}
if ( analysis_options.allow_cond )
return true;

View file

@ -10,6 +10,7 @@
#include "zeek/script_opt/ProfileFunc.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/StmtOptInfo.h"
#include "zeek/script_opt/ZAM/Support.h"
namespace zeek::detail {
@ -160,7 +161,7 @@ void Inliner::Analyze() {
if ( non_recursive_funcs.count(func) == 0 )
continue;
if ( body->Tag() == STMT_CPP )
if ( ! is_ZAM_compilable(f.Profile()) )
continue;
inline_ables[func] = f.Profile();

View file

@ -101,7 +101,7 @@ ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) {
}
TraversalCode ProfileFunc::PreStmt(const Stmt* s) {
stmts.push_back(s);
stmts.push_back({NewRef{}, const_cast<Stmt*>(s)});
switch ( s->Tag() ) {
case STMT_INIT:
@ -185,7 +185,7 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s) {
}
TraversalCode ProfileFunc::PreExpr(const Expr* e) {
exprs.push_back(e);
exprs.push_back({NewRef{}, const_cast<Expr*>(e)});
TrackType(e->GetType());
@ -867,11 +867,11 @@ void ProfileFuncs::ComputeProfileHash(std::shared_ptr<ProfileFunc> pf) {
h = merge_p_hashes(h, p_hash(ov[i]->Name()));
h = merge_p_hashes(h, p_hash("stmts"));
for ( auto i : pf->Stmts() )
for ( auto& i : pf->Stmts() )
h = merge_p_hashes(h, p_hash(i->Tag()));
h = merge_p_hashes(h, p_hash("exprs"));
for ( auto i : pf->Exprs() )
for ( auto& i : pf->Exprs() )
h = merge_p_hashes(h, p_hash(i->Tag()));
h = merge_p_hashes(h, p_hash("ids"));

View file

@ -66,6 +66,8 @@ inline p_hash_type merge_p_hashes(p_hash_type h1, p_hash_type h2) {
using AttrSet = std::unordered_set<const Attr*>;
using AttrVec = std::vector<const Attr*>;
class ProfileFuncs;
// Class for profiling the components of a single function (or expression).
class ProfileFunc : public TraversalCallback {
public:
@ -101,8 +103,8 @@ public:
const auto& TableRefs() const { return tbl_refs; }
const auto& AggrMods() const { return aggr_mods; }
const IDSet& Inits() const { return inits; }
const std::vector<const Stmt*>& Stmts() const { return stmts; }
const std::vector<const Expr*>& Exprs() const { return exprs; }
const std::vector<StmtPtr>& Stmts() const { return stmts; }
const std::vector<ExprPtr>& Exprs() const { return exprs; }
const std::vector<const LambdaExpr*>& Lambdas() const { return lambdas; }
const std::vector<const ConstExpr*>& Constants() const { return constants; }
const IDSet& UnorderedIdentifiers() const { return ids; }
@ -213,11 +215,11 @@ protected:
// Statements seen in the function. Does not include indirect
// statements, such as those in lambda bodies.
std::vector<const Stmt*> stmts;
std::vector<StmtPtr> stmts;
// Expressions seen in the function. Does not include indirect
// expressions (such as those appearing in attributes of types).
std::vector<const Expr*> exprs;
std::vector<ExprPtr> exprs;
// Lambdas seen in the function. We don't profile lambda bodies,
// but rather make them available for separate profiling if

View file

@ -659,4 +659,136 @@ void profile_script_execution() {
void finish_script_execution() { profile_script_execution(); }
// For now, we have equivalent concerns between ZAM and compile-to-C++.
bool has_AST_node_unknown_to_script_opt(const ProfileFunc* prof, bool /* is_ZAM */) {
// Note that the following sets are not comprehensive across the
// standard tags, because some tags are only generated *by* script
// optimization
// clang-format off
static const std::set<StmtTag> known_stmts = {
// STMT_ALARM
STMT_PRINT,
STMT_EVENT,
STMT_EXPR,
STMT_IF,
STMT_WHEN,
STMT_SWITCH,
STMT_FOR,
STMT_NEXT,
STMT_BREAK,
STMT_RETURN,
STMT_LIST,
// STMT_EVENT_BODY_LIST,
STMT_INIT,
STMT_FALLTHROUGH,
STMT_WHILE,
// STMT_CATCH_RETURN,
// STMT_CHECK_ANY_LEN,
// STMT_CPP,
// STMT_ZAM,
STMT_NULL,
STMT_ASSERT,
// STMT_EXTERN,
// STMT_STD_FUNCTION,
#define SCRIPT_OPT_NUM_STMTS 24
};
// clang-format on
// Fail compilation if NUM_STMT in StmtEnums.h changes.
// Update known_stmts list above appropriately after adding
// support and increase SCRIPT_OPT_NUM_STMTS.
static_assert(NUM_STMTS == SCRIPT_OPT_NUM_STMTS);
for ( auto& s : prof->Stmts() )
if ( known_stmts.count(s->Tag()) == 0 )
return true;
// clang-format off
static const std::set<ExprTag> known_exprs = {
// EXPR_ANY,
EXPR_NAME,
EXPR_CONST,
EXPR_CLONE,
EXPR_INCR,
EXPR_DECR,
EXPR_NOT,
EXPR_COMPLEMENT,
EXPR_POSITIVE,
EXPR_NEGATE,
EXPR_ADD, EXPR_SUB,
EXPR_AGGR_ADD,
EXPR_AGGR_DEL,
EXPR_ADD_TO,
EXPR_REMOVE_FROM,
EXPR_TIMES,
EXPR_DIVIDE,
EXPR_MASK,
EXPR_MOD,
EXPR_AND,
EXPR_OR,
EXPR_XOR,
EXPR_LSHIFT,
EXPR_RSHIFT,
EXPR_AND_AND,
EXPR_OR_OR,
EXPR_LT,
EXPR_LE,
EXPR_EQ,
EXPR_NE,
EXPR_GE,
EXPR_GT,
EXPR_COND,
EXPR_REF,
EXPR_ASSIGN,
EXPR_INDEX,
EXPR_FIELD,
EXPR_HAS_FIELD,
EXPR_RECORD_CONSTRUCTOR,
EXPR_TABLE_CONSTRUCTOR,
EXPR_SET_CONSTRUCTOR,
EXPR_VECTOR_CONSTRUCTOR,
EXPR_FIELD_ASSIGN,
EXPR_IN,
EXPR_LIST,
EXPR_CALL,
EXPR_LAMBDA,
EXPR_EVENT,
EXPR_SCHEDULE,
EXPR_ARITH_COERCE,
EXPR_RECORD_COERCE,
EXPR_TABLE_COERCE,
EXPR_VECTOR_COERCE,
EXPR_TO_ANY_COERCE,
EXPR_FROM_ANY_COERCE,
EXPR_SIZE,
EXPR_CAST,
EXPR_IS,
// EXPR_INDEX_SLICE_ASSIGN,
EXPR_INLINE,
// EXPR_APPEND_TO,
// EXPR_INDEX_ASSIGN,
// EXPR_FIELD_LHS_ASSIGN,
// EXPR_REC_ASSIGN_FIELDS,
// EXPR_REC_ADD_FIELDS,
// EXPR_REC_CONSTRUCT_WITH_REC,
// EXPR_FROM_ANY_VEC_COERCE,
// EXPR_ANY_INDEX,
// EXPR_SCRIPT_OPT_BUILTIN,
// EXPR_NOP,
#define SCRIPT_OPT_NUM_EXPRS 70
};
// clang-format on
// Fail compilation if NUM_EXPRS in Expr.h changes.
// Update known_exprs list above appropriately after
// adding support and increase SCRIPT_OPT_NUM_STMTS.
static_assert(NUM_EXPRS == SCRIPT_OPT_NUM_EXPRS);
for ( auto& e : prof->Exprs() )
if ( known_exprs.count(e->Tag()) == 0 )
return true;
return false;
}
} // namespace zeek::detail

View file

@ -269,6 +269,11 @@ extern void clear_script_analysis();
// Called when Zeek is terminating.
extern void finish_script_execution();
// Returns true if the given profile indicates the presence of an AST
// node not known to script optimization. The second argument specifies
// whether we're doing ZAM optimization; if not, compile-to-C++ is assumed.
extern bool has_AST_node_unknown_to_script_opt(const ProfileFunc* prof, bool /* is_ZAM */);
// Returns true if the given call has a specialized ZAM equivalent when
// used in a conditional.
extern bool IsZAM_BuiltInCond(const CallExpr* c);

View file

@ -117,6 +117,12 @@ bool file_mgr_set_reassembly_buffer(StringVal* file_id, uint64_t max) {
bool ZAM_error = false;
bool is_ZAM_compilable(const ProfileFunc* pf, const char** reason) {
if ( has_AST_node_unknown_to_script_opt(pf, true) ) {
if ( reason )
*reason = "unknown AST node type";
return false;
}
auto b = pf->ProfiledBody();
auto is_hook = pf->ProfiledFunc()->Flavor() == FUNC_FLAVOR_HOOK;
if ( b && ! script_is_valid(b, is_hook) ) {