Merge remote-tracking branch 'origin/topic/vern/CPP-cond'

* origin/topic/vern/CPP-cond:
  fix btest comment to more accurately describe the test
  clang-format issue
  btests for erroneous script conditionals
  avoid compiling-to-C++ for functions potentially influenced by conditionals
  track the use of conditionals in functions and files
  AST profiles track the associated function/body/expression
This commit is contained in:
Tim Wojtulewicz 2021-12-01 10:25:32 -07:00
commit 6a7bbd5268
14 changed files with 170 additions and 55 deletions

12
CHANGES
View file

@ -1,3 +1,15 @@
4.2.0-dev.403 | 2021-12-01 10:25:32 -0700
* fix btest comment to more accurately describe the test (Vern Paxson, Corelight)
* btests for erroneous script conditionals (Vern Paxson, Corelight)
* avoid compiling-to-C++ for functions potentially influenced by conditionals (Vern Paxson, Corelight)
* track the use of conditionals in functions and files (Vern Paxson, Corelight)
* AST profiles track the associated function/body/expression (Vern Paxson, Corelight)
4.2.0-dev.396 | 2021-12-01 09:44:03 -0700 4.2.0-dev.396 | 2021-12-01 09:44:03 -0700
* GH-1873: Deprecate the tag types differently to avoid type clashes (Tim Wojtulewicz, Corelight) * GH-1873: Deprecate the tag types differently to avoid type clashes (Tim Wojtulewicz, Corelight)

View file

@ -1 +1 @@
4.2.0-dev.396 4.2.0-dev.403

View file

@ -18,6 +18,7 @@
#include "zeek/Val.h" #include "zeek/Val.h"
#include "zeek/module_util.h" #include "zeek/module_util.h"
#include "zeek/script_opt/ScriptOpt.h" #include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/StmtOptInfo.h"
namespace zeek::detail namespace zeek::detail
{ {
@ -717,7 +718,7 @@ TraversalCode OuterIDBindingFinder::PostExpr(const Expr* expr)
static bool duplicate_ASTs = getenv("ZEEK_DUPLICATE_ASTS"); static bool duplicate_ASTs = getenv("ZEEK_DUPLICATE_ASTS");
void end_func(StmtPtr body) void end_func(StmtPtr body, bool free_of_conditionals)
{ {
if ( duplicate_ASTs && reporter->Errors() == 0 ) if ( duplicate_ASTs && reporter->Errors() == 0 )
// Only try duplication in the absence of errors. If errors // Only try duplication in the absence of errors. If errors
@ -729,6 +730,8 @@ void end_func(StmtPtr body)
// by duplicating can itself be correctly duplicated. // by duplicating can itself be correctly duplicated.
body = body->Duplicate()->Duplicate(); body = body->Duplicate()->Duplicate();
body->GetOptInfo()->is_free_of_conditionals = free_of_conditionals;
auto ingredients = std::make_unique<function_ingredients>(pop_scope(), std::move(body)); auto ingredients = std::make_unique<function_ingredients>(pop_scope(), std::move(body));
if ( ingredients->id->HasVal() ) if ( ingredients->id->HasVal() )

View file

@ -45,7 +45,7 @@ extern void add_type(ID* id, TypePtr t, std::unique_ptr<std::vector<AttrPtr>> at
extern void begin_func(IDPtr id, const char* module_name, FunctionFlavor flavor, bool is_redef, extern void begin_func(IDPtr id, const char* module_name, FunctionFlavor flavor, bool is_redef,
FuncTypePtr t, std::unique_ptr<std::vector<AttrPtr>> attrs = nullptr); FuncTypePtr t, std::unique_ptr<std::vector<AttrPtr>> attrs = nullptr);
extern void end_func(StmtPtr body); extern void end_func(StmtPtr body, bool free_of_conditionals);
// Gather all IDs referenced inside a body that aren't part of a given scope. // Gather all IDs referenced inside a body that aren't part of a given scope.
extern IDPList gather_outer_ids(ScopePtr scope, StmtPtr body); extern IDPList gather_outer_ids(ScopePtr scope, StmtPtr body);

View file

@ -105,6 +105,11 @@ extern const char* last_filename; // Absolute path of last file parsed.
extern const char* last_tok_filename; extern const char* last_tok_filename;
extern const char* last_last_tok_filename; extern const char* last_last_tok_filename;
extern int conditional_epoch; // let's us track embedded conditionals
// Whether the file we're currently parsing includes @if conditionals.
extern bool current_file_has_conditionals;
YYLTYPE GetCurrentLocation(); YYLTYPE GetCurrentLocation();
extern int yyerror(const char[]); extern int yyerror(const char[]);
extern int brolex(); extern int brolex();
@ -138,6 +143,7 @@ bool defining_global_ID = false;
std::vector<int> saved_in_init; std::vector<int> saved_in_init;
static Location func_hdr_location; static Location func_hdr_location;
static int func_hdr_cond_epoch = 0;
EnumType* cur_enum_type = nullptr; EnumType* cur_enum_type = nullptr;
static ID* cur_decl_type_id = nullptr; static ID* cur_decl_type_id = nullptr;
@ -1214,16 +1220,19 @@ decl:
zeekygen_mgr->Identifier(std::move(id)); zeekygen_mgr->Identifier(std::move(id));
} }
| func_hdr { func_hdr_location = @1; } func_body | func_hdr
{
| func_hdr { func_hdr_location = @1; } conditional_list func_body func_hdr_location = @1;
func_hdr_cond_epoch = conditional_epoch;
}
conditional_list func_body
| conditional | conditional
; ;
conditional_list: conditional_list:
conditional | conditional_list conditional
| conditional conditional_list ;
conditional: conditional:
TOK_ATIF '(' expr ')' TOK_ATIF '(' expr ')'
@ -1296,7 +1305,13 @@ func_body:
'}' '}'
{ {
set_location(func_hdr_location, @5); set_location(func_hdr_location, @5);
end_func({AdoptRef{}, $3});
bool free_of_conditionals = true;
if ( current_file_has_conditionals ||
conditional_epoch > func_hdr_cond_epoch )
free_of_conditionals = false;
end_func({AdoptRef{}, $3}, free_of_conditionals);
} }
; ;

View file

@ -48,7 +48,22 @@ extern YYLTYPE yylloc; // holds start line and column of token
extern zeek::EnumType* cur_enum_type; extern zeek::EnumType* cur_enum_type;
// Track the @if... depth. // Track the @if... depth.
std::intptr_t current_depth = 0; static std::intptr_t conditional_depth = 0;
zeek::detail::int_list entry_cond_depth; // @if depth upon starting file
// Tracks how many conditionals there have been. This value only
// increases. Its value is to support logic such as figuring out
// whether a function body has a conditional within it by comparing
// the epoch at the beginning of parsing the body with that at the end.
int conditional_epoch = 0;
// Whether the current file has included conditionals (so far).
bool current_file_has_conditionals = false;
// The files that include conditionals. Not currently used, but will be
// in the future once we add --optimize-files=/pat/.
std::unordered_set<std::string> files_with_conditionals;
zeek::detail::int_list if_stack; zeek::detail::int_list if_stack;
@ -99,6 +114,18 @@ static std::string find_relative_script_file(const std::string& filename)
return zeek::util::find_script_file(filename, zeek::util::zeek_path()); return zeek::util::find_script_file(filename, zeek::util::zeek_path());
} }
static void start_conditional()
{
++conditional_depth;
++conditional_epoch;
if ( ! current_file_has_conditionals )
// First time we've observed that this file includes conditionals.
files_with_conditionals.insert(::filename);
current_file_has_conditionals = true;
}
class FileInfo { class FileInfo {
public: public:
FileInfo(std::string restore_module = ""); FileInfo(std::string restore_module = "");
@ -418,11 +445,11 @@ when return TOK_WHEN;
@ifdef return TOK_ATIFDEF; @ifdef return TOK_ATIFDEF;
@ifndef return TOK_ATIFNDEF; @ifndef return TOK_ATIFNDEF;
@else return TOK_ATELSE; @else return TOK_ATELSE;
@endif --current_depth; @endif do_atendif();
<IGNORE>@if ++current_depth; <IGNORE>@if start_conditional();
<IGNORE>@ifdef ++current_depth; <IGNORE>@ifdef start_conditional();
<IGNORE>@ifndef ++current_depth; <IGNORE>@ifndef start_conditional();
<IGNORE>@else return TOK_ATELSE; <IGNORE>@else return TOK_ATELSE;
<IGNORE>@endif return TOK_ATENDIF; <IGNORE>@endif return TOK_ATENDIF;
<IGNORE>[^@\r\n]+ /* eat */ <IGNORE>[^@\r\n]+ /* eat */
@ -639,17 +666,19 @@ static int load_files(const char* orig_file)
zeek::detail::zeekygen_mgr->Script(file_path); zeek::detail::zeekygen_mgr->Script(file_path);
// "orig_file", could be an alias for yytext, which is ephemeral // "orig_file" could be an alias for yytext, which is ephemeral
// and will be zapped after the yy_switch_to_buffer() below. // and will be zapped after the yy_switch_to_buffer() below.
YY_BUFFER_STATE buffer; YY_BUFFER_STATE buffer;
if ( rc.first == 1 ) { if ( rc.first == 1 )
{
// Parse code provided by plugin. // Parse code provided by plugin.
assert(rc.second); assert(rc.second);
DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s from code supplied by plugin ", file_path.c_str()); DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s from code supplied by plugin ", file_path.c_str());
buffer = yy_scan_bytes(rc.second->data(), rc.second->size()); // this copies the data buffer = yy_scan_bytes(rc.second->data(), rc.second->size()); // this copies the data
} }
else { else
{
// Parse from file. // Parse from file.
assert(f); assert(f);
DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s", file_path.c_str()); DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s", file_path.c_str());
@ -663,6 +692,8 @@ static int load_files(const char* orig_file)
// every Obj created when parsing it. // every Obj created when parsing it.
yylloc.filename = filename = zeek::util::copy_string(file_path.c_str()); yylloc.filename = filename = zeek::util::copy_string(file_path.c_str());
entry_cond_depth.push_back(conditional_depth);
return 1; return 1;
} }
@ -693,9 +724,21 @@ public:
std::vector<const zeek::detail::NameExpr*> local_names; std::vector<const zeek::detail::NameExpr*> local_names;
}; };
static void begin_ignoring()
{
if_stack.push_back(conditional_depth);
BEGIN(IGNORE);
}
static void resume_processing()
{
if_stack.pop_back();
BEGIN(INITIAL);
}
void do_atif(zeek::detail::Expr* expr) void do_atif(zeek::detail::Expr* expr)
{ {
++current_depth; start_conditional();
LocalNameFinder cb; LocalNameFinder cb;
expr->Traverse(&cb); expr->Traverse(&cb);
@ -716,70 +759,52 @@ void do_atif(zeek::detail::Expr* expr)
} }
if ( ! val->AsBool() ) if ( ! val->AsBool() )
{ begin_ignoring();
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
} }
void do_atifdef(const char* id) void do_atifdef(const char* id)
{ {
++current_depth; start_conditional();
const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str()); const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str());
if ( ! i ) if ( ! i )
{ begin_ignoring();
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
} }
void do_atifndef(const char *id) void do_atifndef(const char *id)
{ {
++current_depth; start_conditional();
const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str()); const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str());
if ( i ) if ( i )
{ begin_ignoring();
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
} }
void do_atelse() void do_atelse()
{ {
if ( current_depth == 0 ) if ( conditional_depth == 0 )
zeek::reporter->Error("@else without @if..."); zeek::reporter->Error("@else without @if...");
if ( ! if_stack.empty() && current_depth > if_stack.back() ) if ( ! if_stack.empty() && conditional_depth > if_stack.back() )
return; return;
if ( YY_START == INITIAL ) if ( YY_START == INITIAL )
{ begin_ignoring();
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
else else
{ resume_processing();
if_stack.pop_back();
BEGIN(INITIAL);
}
} }
void do_atendif() void do_atendif()
{ {
if ( current_depth == 0 ) if ( conditional_depth <= entry_cond_depth.back() )
zeek::reporter->Error("unbalanced @if... @endif"); zeek::reporter->Error("unbalanced @if... @endif");
if ( current_depth == if_stack.back() ) if ( ! if_stack.empty() && conditional_depth == if_stack.back() )
{ resume_processing();
BEGIN(INITIAL);
if_stack.pop_back();
}
--current_depth; --conditional_depth;
} }
// Be careful to never delete things from this list, as the strings // Be careful to never delete things from this list, as the strings
@ -840,7 +865,15 @@ void add_to_name_list(char* s, char delim, zeek::name_list& nl)
int yywrap() int yywrap()
{ {
if ( entry_cond_depth.size() > 0 )
{
if ( conditional_depth > entry_cond_depth.back() )
zeek::reporter->FatalError("unbalanced @if... @endif");
entry_cond_depth.pop_back();
}
last_filename = ::filename; last_filename = ::filename;
current_file_has_conditionals = false;
if ( zeek::reporter->Errors() > 0 ) if ( zeek::reporter->Errors() > 0 )
return 1; return 1;

View file

@ -6,6 +6,8 @@
#include <sys/file.h> #include <sys/file.h>
#include <unistd.h> #include <unistd.h>
#include "zeek/script_opt/StmtOptInfo.h"
namespace zeek::detail namespace zeek::detail
{ {
@ -50,6 +52,14 @@ bool is_CPP_compilable(const ProfileFunc* pf, const char** reason)
return false; return false;
} }
auto body = pf->ProfiledBody();
if ( body && ! body->GetOptInfo()->is_free_of_conditionals )
{
if ( reason )
*reason = "body may be affected by @if conditional";
return false;
}
return true; return true;
} }

View file

@ -60,12 +60,23 @@ p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash)
ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields) ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields)
{ {
profiled_func = func;
profiled_body = body.get();
abs_rec_fields = _abs_rec_fields; abs_rec_fields = _abs_rec_fields;
Profile(func->GetType().get(), body); Profile(func->GetType().get(), body);
} }
ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields)
{
profiled_body = s;
abs_rec_fields = _abs_rec_fields;
s->Traverse(this);
}
ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields) ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields)
{ {
profiled_expr = e;
abs_rec_fields = _abs_rec_fields; abs_rec_fields = _abs_rec_fields;
if ( e->Tag() == EXPR_LAMBDA ) if ( e->Tag() == EXPR_LAMBDA )
@ -84,12 +95,6 @@ ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields)
e->Traverse(this); e->Traverse(this);
} }
ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields)
{
abs_rec_fields = _abs_rec_fields;
s->Traverse(this);
}
void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body) void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body)
{ {
num_params = ft->Params()->NumFields(); num_params = ft->Params()->NumFields();

View file

@ -100,6 +100,12 @@ public:
ProfileFunc(const Stmt* body, bool abs_rec_fields = false); ProfileFunc(const Stmt* body, bool abs_rec_fields = false);
ProfileFunc(const Expr* func, bool abs_rec_fields = false); ProfileFunc(const Expr* func, bool abs_rec_fields = false);
// Returns the function, body, or expression profiled. Each can be
// null depending on the constructor used.
const Func* ProfiledFunc() const { return profiled_func; }
const Stmt* ProfiledBody() const { return profiled_body; }
const Expr* ProfiledExpr() const { return profiled_expr; }
// See the comments for the associated member variables for each // See the comments for the associated member variables for each
// of these accessors. // of these accessors.
const std::unordered_set<const ID*>& Globals() const { return globals; } const std::unordered_set<const ID*>& Globals() const { return globals; }
@ -157,6 +163,12 @@ protected:
// Take note of an assignment to an identifier. // Take note of an assignment to an identifier.
void TrackAssignment(const ID* id); void TrackAssignment(const ID* id);
// The function, body, or expression profiled. Can be null
// depending on which constructor was used.
const Func* profiled_func = nullptr;
const Stmt* profiled_body = nullptr;
const Expr* profiled_expr = nullptr;
// Globals seen in the function. // Globals seen in the function.
// //
// Does *not* include globals solely seen as the function being // Does *not* include globals solely seen as the function being

View file

@ -22,6 +22,10 @@ public:
// True if we observe that there is a branch out of the statement // True if we observe that there is a branch out of the statement
// to just beyond its extent, such as due to a "break". // to just beyond its extent, such as due to a "break".
bool contains_branch_beyond = false; bool contains_branch_beyond = false;
// Whether this statement is free of the possible influence
// of conditional code.
bool is_free_of_conditionals = true;
}; };
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
fatal error in <...>/dangling-at.zeek, line 9: unbalanced @if... @endif

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/orphan-endif.zeek, line 8: unbalanced @if... @endif

View file

@ -0,0 +1,8 @@
# @TEST-EXEC-FAIL: zeek -b %INPUT
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
# Check that dangling conditionals are detected.
@if ( 1==1 )
print "it's true!";
@else
lalala

View file

@ -0,0 +1,9 @@
# @TEST-EXEC-FAIL: zeek -b %INPUT
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
# Check that orphan endif's are detected.
@if ( T )
print "so far, so good";
@endif
@endif
print "whoops!";