Merge remote-tracking branch 'origin/topic/vern/CPP-cond'

* origin/topic/vern/CPP-cond:
  fix btest comment to more accurately describe the test
  clang-format issue
  btests for erroneous script conditionals
  avoid compiling-to-C++ for functions potentially influenced by conditionals
  track the use of conditionals in functions and files
  AST profiles track the associated function/body/expression
This commit is contained in:
Tim Wojtulewicz 2021-12-01 10:25:32 -07:00
commit 6a7bbd5268
14 changed files with 170 additions and 55 deletions

12
CHANGES
View file

@ -1,3 +1,15 @@
4.2.0-dev.403 | 2021-12-01 10:25:32 -0700
* fix btest comment to more accurately describe the test (Vern Paxson, Corelight)
* btests for erroneous script conditionals (Vern Paxson, Corelight)
* avoid compiling-to-C++ for functions potentially influenced by conditionals (Vern Paxson, Corelight)
* track the use of conditionals in functions and files (Vern Paxson, Corelight)
* AST profiles track the associated function/body/expression (Vern Paxson, Corelight)
4.2.0-dev.396 | 2021-12-01 09:44:03 -0700
* GH-1873: Deprecate the tag types differently to avoid type clashes (Tim Wojtulewicz, Corelight)

View file

@ -1 +1 @@
4.2.0-dev.396
4.2.0-dev.403

View file

@ -18,6 +18,7 @@
#include "zeek/Val.h"
#include "zeek/module_util.h"
#include "zeek/script_opt/ScriptOpt.h"
#include "zeek/script_opt/StmtOptInfo.h"
namespace zeek::detail
{
@ -717,7 +718,7 @@ TraversalCode OuterIDBindingFinder::PostExpr(const Expr* expr)
static bool duplicate_ASTs = getenv("ZEEK_DUPLICATE_ASTS");
void end_func(StmtPtr body)
void end_func(StmtPtr body, bool free_of_conditionals)
{
if ( duplicate_ASTs && reporter->Errors() == 0 )
// Only try duplication in the absence of errors. If errors
@ -729,6 +730,8 @@ void end_func(StmtPtr body)
// by duplicating can itself be correctly duplicated.
body = body->Duplicate()->Duplicate();
body->GetOptInfo()->is_free_of_conditionals = free_of_conditionals;
auto ingredients = std::make_unique<function_ingredients>(pop_scope(), std::move(body));
if ( ingredients->id->HasVal() )

View file

@ -45,7 +45,7 @@ extern void add_type(ID* id, TypePtr t, std::unique_ptr<std::vector<AttrPtr>> at
extern void begin_func(IDPtr id, const char* module_name, FunctionFlavor flavor, bool is_redef,
FuncTypePtr t, std::unique_ptr<std::vector<AttrPtr>> attrs = nullptr);
extern void end_func(StmtPtr body);
extern void end_func(StmtPtr body, bool free_of_conditionals);
// Gather all IDs referenced inside a body that aren't part of a given scope.
extern IDPList gather_outer_ids(ScopePtr scope, StmtPtr body);

View file

@ -105,6 +105,11 @@ extern const char* last_filename; // Absolute path of last file parsed.
extern const char* last_tok_filename;
extern const char* last_last_tok_filename;
extern int conditional_epoch; // let's us track embedded conditionals
// Whether the file we're currently parsing includes @if conditionals.
extern bool current_file_has_conditionals;
YYLTYPE GetCurrentLocation();
extern int yyerror(const char[]);
extern int brolex();
@ -138,6 +143,7 @@ bool defining_global_ID = false;
std::vector<int> saved_in_init;
static Location func_hdr_location;
static int func_hdr_cond_epoch = 0;
EnumType* cur_enum_type = nullptr;
static ID* cur_decl_type_id = nullptr;
@ -1214,16 +1220,19 @@ decl:
zeekygen_mgr->Identifier(std::move(id));
}
| func_hdr { func_hdr_location = @1; } func_body
| func_hdr { func_hdr_location = @1; } conditional_list func_body
| func_hdr
{
func_hdr_location = @1;
func_hdr_cond_epoch = conditional_epoch;
}
conditional_list func_body
| conditional
;
conditional_list:
conditional
| conditional conditional_list
| conditional_list conditional
;
conditional:
TOK_ATIF '(' expr ')'
@ -1296,7 +1305,13 @@ func_body:
'}'
{
set_location(func_hdr_location, @5);
end_func({AdoptRef{}, $3});
bool free_of_conditionals = true;
if ( current_file_has_conditionals ||
conditional_epoch > func_hdr_cond_epoch )
free_of_conditionals = false;
end_func({AdoptRef{}, $3}, free_of_conditionals);
}
;

View file

@ -48,7 +48,22 @@ extern YYLTYPE yylloc; // holds start line and column of token
extern zeek::EnumType* cur_enum_type;
// Track the @if... depth.
std::intptr_t current_depth = 0;
static std::intptr_t conditional_depth = 0;
zeek::detail::int_list entry_cond_depth; // @if depth upon starting file
// Tracks how many conditionals there have been. This value only
// increases. Its value is to support logic such as figuring out
// whether a function body has a conditional within it by comparing
// the epoch at the beginning of parsing the body with that at the end.
int conditional_epoch = 0;
// Whether the current file has included conditionals (so far).
bool current_file_has_conditionals = false;
// The files that include conditionals. Not currently used, but will be
// in the future once we add --optimize-files=/pat/.
std::unordered_set<std::string> files_with_conditionals;
zeek::detail::int_list if_stack;
@ -99,6 +114,18 @@ static std::string find_relative_script_file(const std::string& filename)
return zeek::util::find_script_file(filename, zeek::util::zeek_path());
}
static void start_conditional()
{
++conditional_depth;
++conditional_epoch;
if ( ! current_file_has_conditionals )
// First time we've observed that this file includes conditionals.
files_with_conditionals.insert(::filename);
current_file_has_conditionals = true;
}
class FileInfo {
public:
FileInfo(std::string restore_module = "");
@ -418,11 +445,11 @@ when return TOK_WHEN;
@ifdef return TOK_ATIFDEF;
@ifndef return TOK_ATIFNDEF;
@else return TOK_ATELSE;
@endif --current_depth;
@endif do_atendif();
<IGNORE>@if ++current_depth;
<IGNORE>@ifdef ++current_depth;
<IGNORE>@ifndef ++current_depth;
<IGNORE>@if start_conditional();
<IGNORE>@ifdef start_conditional();
<IGNORE>@ifndef start_conditional();
<IGNORE>@else return TOK_ATELSE;
<IGNORE>@endif return TOK_ATENDIF;
<IGNORE>[^@\r\n]+ /* eat */
@ -639,17 +666,19 @@ static int load_files(const char* orig_file)
zeek::detail::zeekygen_mgr->Script(file_path);
// "orig_file", could be an alias for yytext, which is ephemeral
// "orig_file" could be an alias for yytext, which is ephemeral
// and will be zapped after the yy_switch_to_buffer() below.
YY_BUFFER_STATE buffer;
if ( rc.first == 1 ) {
if ( rc.first == 1 )
{
// Parse code provided by plugin.
assert(rc.second);
DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s from code supplied by plugin ", file_path.c_str());
buffer = yy_scan_bytes(rc.second->data(), rc.second->size()); // this copies the data
}
else {
else
{
// Parse from file.
assert(f);
DBG_LOG(zeek::DBG_SCRIPTS, "Loading %s", file_path.c_str());
@ -663,6 +692,8 @@ static int load_files(const char* orig_file)
// every Obj created when parsing it.
yylloc.filename = filename = zeek::util::copy_string(file_path.c_str());
entry_cond_depth.push_back(conditional_depth);
return 1;
}
@ -693,9 +724,21 @@ public:
std::vector<const zeek::detail::NameExpr*> local_names;
};
static void begin_ignoring()
{
if_stack.push_back(conditional_depth);
BEGIN(IGNORE);
}
static void resume_processing()
{
if_stack.pop_back();
BEGIN(INITIAL);
}
void do_atif(zeek::detail::Expr* expr)
{
++current_depth;
start_conditional();
LocalNameFinder cb;
expr->Traverse(&cb);
@ -716,70 +759,52 @@ void do_atif(zeek::detail::Expr* expr)
}
if ( ! val->AsBool() )
{
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
begin_ignoring();
}
void do_atifdef(const char* id)
{
++current_depth;
start_conditional();
const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str());
if ( ! i )
{
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
begin_ignoring();
}
void do_atifndef(const char *id)
{
++current_depth;
start_conditional();
const auto& i = zeek::detail::lookup_ID(id, zeek::detail::current_module.c_str());
if ( i )
{
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
begin_ignoring();
}
void do_atelse()
{
if ( current_depth == 0 )
if ( conditional_depth == 0 )
zeek::reporter->Error("@else without @if...");
if ( ! if_stack.empty() && current_depth > if_stack.back() )
if ( ! if_stack.empty() && conditional_depth > if_stack.back() )
return;
if ( YY_START == INITIAL )
{
if_stack.push_back(current_depth);
BEGIN(IGNORE);
}
begin_ignoring();
else
{
if_stack.pop_back();
BEGIN(INITIAL);
}
resume_processing();
}
void do_atendif()
{
if ( current_depth == 0 )
if ( conditional_depth <= entry_cond_depth.back() )
zeek::reporter->Error("unbalanced @if... @endif");
if ( current_depth == if_stack.back() )
{
BEGIN(INITIAL);
if_stack.pop_back();
}
if ( ! if_stack.empty() && conditional_depth == if_stack.back() )
resume_processing();
--current_depth;
--conditional_depth;
}
// Be careful to never delete things from this list, as the strings
@ -840,7 +865,15 @@ void add_to_name_list(char* s, char delim, zeek::name_list& nl)
int yywrap()
{
if ( entry_cond_depth.size() > 0 )
{
if ( conditional_depth > entry_cond_depth.back() )
zeek::reporter->FatalError("unbalanced @if... @endif");
entry_cond_depth.pop_back();
}
last_filename = ::filename;
current_file_has_conditionals = false;
if ( zeek::reporter->Errors() > 0 )
return 1;

View file

@ -6,6 +6,8 @@
#include <sys/file.h>
#include <unistd.h>
#include "zeek/script_opt/StmtOptInfo.h"
namespace zeek::detail
{
@ -50,6 +52,14 @@ bool is_CPP_compilable(const ProfileFunc* pf, const char** reason)
return false;
}
auto body = pf->ProfiledBody();
if ( body && ! body->GetOptInfo()->is_free_of_conditionals )
{
if ( reason )
*reason = "body may be affected by @if conditional";
return false;
}
return true;
}

View file

@ -60,12 +60,23 @@ p_hash_type script_specific_hash(const StmtPtr& body, p_hash_type generic_hash)
ProfileFunc::ProfileFunc(const Func* func, const StmtPtr& body, bool _abs_rec_fields)
{
profiled_func = func;
profiled_body = body.get();
abs_rec_fields = _abs_rec_fields;
Profile(func->GetType().get(), body);
}
ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields)
{
profiled_body = s;
abs_rec_fields = _abs_rec_fields;
s->Traverse(this);
}
ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields)
{
profiled_expr = e;
abs_rec_fields = _abs_rec_fields;
if ( e->Tag() == EXPR_LAMBDA )
@ -84,12 +95,6 @@ ProfileFunc::ProfileFunc(const Expr* e, bool _abs_rec_fields)
e->Traverse(this);
}
ProfileFunc::ProfileFunc(const Stmt* s, bool _abs_rec_fields)
{
abs_rec_fields = _abs_rec_fields;
s->Traverse(this);
}
void ProfileFunc::Profile(const FuncType* ft, const StmtPtr& body)
{
num_params = ft->Params()->NumFields();

View file

@ -100,6 +100,12 @@ public:
ProfileFunc(const Stmt* body, bool abs_rec_fields = false);
ProfileFunc(const Expr* func, bool abs_rec_fields = false);
// Returns the function, body, or expression profiled. Each can be
// null depending on the constructor used.
const Func* ProfiledFunc() const { return profiled_func; }
const Stmt* ProfiledBody() const { return profiled_body; }
const Expr* ProfiledExpr() const { return profiled_expr; }
// See the comments for the associated member variables for each
// of these accessors.
const std::unordered_set<const ID*>& Globals() const { return globals; }
@ -157,6 +163,12 @@ protected:
// Take note of an assignment to an identifier.
void TrackAssignment(const ID* id);
// The function, body, or expression profiled. Can be null
// depending on which constructor was used.
const Func* profiled_func = nullptr;
const Stmt* profiled_body = nullptr;
const Expr* profiled_expr = nullptr;
// Globals seen in the function.
//
// Does *not* include globals solely seen as the function being

View file

@ -22,6 +22,10 @@ public:
// True if we observe that there is a branch out of the statement
// to just beyond its extent, such as due to a "break".
bool contains_branch_beyond = false;
// Whether this statement is free of the possible influence
// of conditional code.
bool is_free_of_conditionals = true;
};
} // namespace zeek::detail

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
fatal error in <...>/dangling-at.zeek, line 9: unbalanced @if... @endif

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/orphan-endif.zeek, line 8: unbalanced @if... @endif

View file

@ -0,0 +1,8 @@
# @TEST-EXEC-FAIL: zeek -b %INPUT
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
# Check that dangling conditionals are detected.
@if ( 1==1 )
print "it's true!";
@else
lalala

View file

@ -0,0 +1,9 @@
# @TEST-EXEC-FAIL: zeek -b %INPUT
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
# Check that orphan endif's are detected.
@if ( T )
print "so far, so good";
@endif
@endif
print "whoops!";