Introduce special treatment for the blank identifier _

Mostly: Do not instantiate variables within for loops and allow
reusing differently typed blanks which previously wasn't possible.

This may be missing some corner-cases, but the added tests seem
to work as expected and nothing else fell apart it seems.
This commit is contained in:
Arne Welzel 2022-09-30 17:38:00 +02:00
parent 7f7c77ab07
commit 46334f8b59
21 changed files with 318 additions and 20 deletions

31
NEWS
View file

@ -17,6 +17,9 @@ Breaking Changes
clusters running on FreeBSD, as that OS uses a different range for ephemeral clusters running on FreeBSD, as that OS uses a different range for ephemeral
ports. ports.
- The blank identifier ``_`` cannot be used in expressions and options anymore.
Outside of obfuscation exercises, this should have little real-world impact.
New Functionality New Functionality
----------------- -----------------
@ -39,6 +42,34 @@ New Functionality
been extended to work for packet and file analyzers. This now allows to been extended to work for packet and file analyzers. This now allows to
leverage ``Analyzer::disabled_analyzers`` for these kinds of analyzers. leverage ``Analyzer::disabled_analyzers`` for these kinds of analyzers.
- The blank identifier ``_`` can now be used to ignore loop variables of
different types without type clash errors. This allows to do the following
within the same scope:
local vec = vector("a", "b", "c");
for ( _, v in vec )
print v;
for ( i, _ in vec )
print v;
Iterating over only the values of a table can be done by ignoring the full
index with a single blank identifier. Due to the internal structure of Zeek
tables, this can result in a performance improvement.
local tab = table(["a", 1, T] = "a1T", ["b", 2, F] = "b2f");
for ( _, v in tab )
print v;
It's also possible ignore individual indices of different types with the
blank identifier ``_`` as follows:
for ( [_, i, _], v in tab )
print i, v;
As noted under breaking changes, the blank identifier ``_`` cannot be
referenced in expression anymore.
Changed Functionality Changed Functionality
--------------------- ---------------------

View file

@ -114,11 +114,15 @@ ID::ID(const char* arg_name, IDScope arg_scope, bool arg_is_export)
scope = arg_scope; scope = arg_scope;
is_export = arg_is_export; is_export = arg_is_export;
is_option = false; is_option = false;
is_blank = name && extract_var_name(name) == "_";
is_const = false; is_const = false;
is_enum_const = false; is_enum_const = false;
is_type = false; is_type = false;
offset = 0; offset = 0;
if ( is_blank )
SetType(base_type(TYPE_ANY));
opt_info = new IDOptInfo(this); opt_info = new IDOptInfo(this);
infer_return_type = false; infer_return_type = false;

View file

@ -105,6 +105,7 @@ public:
void SetOption(); void SetOption();
bool IsOption() const { return is_option; } bool IsOption() const { return is_option; }
bool IsBlank() const { return is_blank; };
void SetEnumConst() { is_enum_const = true; } void SetEnumConst() { is_enum_const = true; }
bool IsEnumConst() const { return is_enum_const; } bool IsEnumConst() const { return is_enum_const; }
@ -162,7 +163,7 @@ protected:
bool is_export; bool is_export;
bool infer_return_type; bool infer_return_type;
TypePtr type; TypePtr type;
bool is_const, is_enum_const, is_type, is_option; bool is_const, is_enum_const, is_type, is_option, is_blank;
int offset; int offset;
ValPtr val; ValPtr val;
AttributesPtr attrs; AttributesPtr attrs;

View file

@ -1221,7 +1221,17 @@ ForStmt::ForStmt(IDPList* arg_loop_vars, ExprPtr loop_expr)
{ {
const auto& indices = e->GetType()->AsTableType()->GetIndexTypes(); const auto& indices = e->GetType()->AsTableType()->GetIndexTypes();
if ( static_cast<int>(indices.size()) != loop_vars->length() ) if ( loop_vars->length() == 1 && (*loop_vars)[0]->IsBlank() )
{
// Special case support for looping with a single loop_var
// ignoring the full index of a table.
//
// for ( _, value )
// ...
//
return;
}
else if ( static_cast<int>(indices.size()) != loop_vars->length() )
{ {
e->Error("wrong index size"); e->Error("wrong index size");
return; return;
@ -1233,7 +1243,11 @@ ForStmt::ForStmt(IDPList* arg_loop_vars, ExprPtr loop_expr)
const auto& lv = (*loop_vars)[i]; const auto& lv = (*loop_vars)[i];
const auto& lvt = lv->GetType(); const auto& lvt = lv->GetType();
if ( lvt ) if ( lv->IsBlank() )
{
continue;
}
else if ( lvt )
{ {
if ( ! same_type(lvt, ind_type) ) if ( ! same_type(lvt, ind_type) )
lvt->Error("type clash in iteration", ind_type.get()); lvt->Error("type clash in iteration", ind_type.get());
@ -1254,11 +1268,16 @@ ForStmt::ForStmt(IDPList* arg_loop_vars, ExprPtr loop_expr)
return; return;
} }
const auto& t = (*loop_vars)[0]->GetType(); const auto& lv = (*loop_vars)[0];
const auto& t = lv->GetType();
if ( ! t ) if ( lv->IsBlank() )
add_local({NewRef{}, (*loop_vars)[0]}, base_type(TYPE_COUNT), INIT_NONE, nullptr, {
nullptr, VAR_REGULAR); // nop
}
else if ( ! t )
add_local({NewRef{}, lv}, base_type(TYPE_COUNT), INIT_NONE, nullptr, nullptr,
VAR_REGULAR);
else if ( ! IsIntegral(t->Tag()) ) else if ( ! IsIntegral(t->Tag()) )
{ {
@ -1275,9 +1294,14 @@ ForStmt::ForStmt(IDPList* arg_loop_vars, ExprPtr loop_expr)
return; return;
} }
const auto& t = (*loop_vars)[0]->GetType(); const auto& lv = (*loop_vars)[0];
const auto& t = lv->GetType();
if ( ! t ) if ( lv->IsBlank() )
{
// nop
}
else if ( ! t )
add_local({NewRef{}, (*loop_vars)[0]}, base_type(TYPE_STRING), INIT_NONE, nullptr, add_local({NewRef{}, (*loop_vars)[0]}, base_type(TYPE_STRING), INIT_NONE, nullptr,
nullptr, VAR_REGULAR); nullptr, VAR_REGULAR);
@ -1312,7 +1336,10 @@ ForStmt::ForStmt(IDPList* arg_loop_vars, ExprPtr loop_expr, IDPtr val_var)
} }
// Verify value_vars type if it's already been defined // Verify value_vars type if it's already been defined
if ( value_var->GetType() ) if ( value_var->IsBlank() )
value_var = ID::nil;
else if ( value_var->GetType() )
{ {
if ( ! same_type(value_var->GetType(), yield_type) ) if ( ! same_type(value_var->GetType(), yield_type) )
value_var->GetType()->Error("type clash in iteration", yield_type.get()); value_var->GetType()->Error("type clash in iteration", yield_type.get());
@ -1340,17 +1367,30 @@ ValPtr ForStmt::DoExec(Frame* f, Val* v, StmtFlowType& flow)
if ( ! loop_vals->Length() ) if ( ! loop_vals->Length() )
return nullptr; return nullptr;
// If there are only blank loop_vars (iterating over just the values),
// we can avoid the RecreateIndex() overhead.
bool all_loop_vars_blank = true;
for ( const auto* lv : *loop_vars )
all_loop_vars_blank &= lv->IsBlank();
for ( const auto& lve : *loop_vals ) for ( const auto& lve : *loop_vals )
{ {
auto k = lve.GetHashKey(); auto k = lve.GetHashKey();
auto* current_tev = lve.value; auto* current_tev = lve.value;
auto ind_lv = tv->RecreateIndex(*k);
if ( value_var ) if ( value_var )
f->SetElement(value_var, current_tev->GetVal()); f->SetElement(value_var, current_tev->GetVal());
if ( ! all_loop_vars_blank )
{
auto ind_lv = tv->RecreateIndex(*k);
for ( int i = 0; i < ind_lv->Length(); i++ ) for ( int i = 0; i < ind_lv->Length(); i++ )
f->SetElement((*loop_vars)[i], ind_lv->Idx(i)); {
const auto* lv = (*loop_vars)[i];
if ( ! lv->IsBlank() )
f->SetElement(lv, ind_lv->Idx(i));
}
}
flow = FLOW_NEXT; flow = FLOW_NEXT;
ret = body->Exec(f, flow); ret = body->Exec(f, flow);
@ -1375,7 +1415,10 @@ ValPtr ForStmt::DoExec(Frame* f, Val* v, StmtFlowType& flow)
if ( value_var ) if ( value_var )
f->SetElement(value_var, vv->ValAt(i)); f->SetElement(value_var, vv->ValAt(i));
f->SetElement((*loop_vars)[0], val_mgr->Count(i)); const auto* lv = (*loop_vars)[0];
if ( ! lv->IsBlank() )
f->SetElement(lv, val_mgr->Count(i));
flow = FLOW_NEXT; flow = FLOW_NEXT;
ret = body->Exec(f, flow); ret = body->Exec(f, flow);

View file

@ -209,7 +209,7 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init,
init = expand_op(cast_intrusive<ListExpr>(init), init_t); init = expand_op(cast_intrusive<ListExpr>(init), init_t);
} }
if ( id->GetType() ) if ( id->GetType() && ! id->IsBlank() )
{ {
if ( id->IsRedefinable() || (! init && attr && ! IsFunc(id->GetType()->Tag())) ) if ( id->IsRedefinable() || (! init && attr && ! IsFunc(id->GetType()->Tag())) )
{ {
@ -247,7 +247,7 @@ static void make_var(const IDPtr& id, TypePtr t, InitClass c, ExprPtr init,
t = id->GetType(); t = id->GetType();
} }
if ( id->GetType() && id->GetType()->Tag() != TYPE_ERROR ) if ( id->GetType() && id->GetType()->Tag() != TYPE_ERROR && ! id->IsBlank() )
{ {
if ( dt != VAR_REDEF && (! init || ! do_init || (! t && ! (t = init_type(init)))) ) if ( dt != VAR_REDEF && (! init || ! do_init || (! t && ! (t = init_type(init)))) )
{ {

View file

@ -915,7 +915,12 @@ expr:
if ( id->IsDeprecated() ) if ( id->IsDeprecated() )
reporter->Warning("%s", id->GetDeprecationWarning().c_str()); reporter->Warning("%s", id->GetDeprecationWarning().c_str());
if ( ! id->GetType() ) if ( id->IsBlank() )
{
$$ = new NameExpr(std::move(id));
$$->SetError("blank identifier used in expression");
}
else if ( ! id->GetType() )
{ {
id->Error("undeclared variable"); id->Error("undeclared variable");
id->SetType(error_type()); id->SetType(error_type());
@ -1330,6 +1335,9 @@ decl:
| TOK_OPTION def_global_id opt_type init_class opt_init opt_attr ';' | TOK_OPTION def_global_id opt_type init_class opt_init opt_attr ';'
{ {
if ( $2->IsBlank() )
$2->Error("blank identifier used as option");
else
build_global($2, $3, $4, $5, $6, VAR_OPTION); build_global($2, $3, $4, $5, $6, VAR_OPTION);
} }
@ -1873,6 +1881,7 @@ stmt:
| TOK_CONST local_id opt_type init_class opt_init opt_attr ';' opt_no_test | TOK_CONST local_id opt_type init_class opt_init opt_attr ';' opt_no_test
{ {
set_location(@1, @6); set_location(@1, @6);
$$ = build_local($2, $3, $4, $5, $6, VAR_CONST, ! $8).release(); $$ = build_local($2, $3, $4, $5, $6, VAR_CONST, ! $8).release();
} }
@ -2093,10 +2102,10 @@ local_id:
if ( $$ ) if ( $$ )
{ {
if ( $$->IsGlobal() ) if ( $$->IsGlobal() && ! $$->IsBlank() )
$$->Error("already a global identifier"); $$->Error("already a global identifier");
if ( $$->IsConst() ) if ( $$->IsConst() && ! $$->IsBlank() )
$$->Error("already a const identifier"); $$->Error("already a const identifier");
delete [] $1; delete [] $1;

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/blank-expr-errors.zeek, line 4: blank identifier used in expression (_)

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/blank-expr-errors.zeek, line 6: blank identifier used in expression (MyModule::_)

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/blank-expr-errors.zeek, line 11: blank identifier used in expression (MyModule::_)

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/blank-expr-errors.zeek, line 6: blank identifier used in expression (MyModule::_)

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/blank-expr-errors.zeek, line 9: blank identifier used in expression (_)

View file

@ -0,0 +1,31 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
== vec 1
a
b
c
== vec 2
idxsum(vec), 3
== vec 3
veclen(vec), 3
== t1 1
c
b
a
== t1 2
keyc
keyb
keya
== t1 3
t1len, 3
== t2 1
1, a1a
3, c3c
2, b2b
== t2 2
a, T
c, T
b, F
== t2 3
t2concat, a1ac3cb2b
== s
strlen(s), 10

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.

View file

@ -0,0 +1,2 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
error in <...>/blank-option-error.zeek, line 9: blank identifier used as option (MyModule::_)

View file

@ -0,0 +1,54 @@
# @TEST-DOC: Do not allow to reference the blank identifier.
# @TEST-EXEC-FAIL: zeek -b %INPUT
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
event zeek_init()
{
local vec = vector( "1", "2", "3" );
for ( _, v in vec )
print _;
}
@TEST-START-NEXT
event zeek_init()
{
local _ = vector( "1", "2", "3" );
print _;
}
@TEST-START-NEXT
# Ensure it does not work in a module, either.
module MyModule;
event zeek_init()
{
local _ = vector( "1", "2", "3" );
print _;
}
@TEST-START-NEXT
# Ensure _ can not referenced when it's a const in an export section.
# Adding the const _ isn't an error though.
module MyModule;
export {
const _: count = 1;
}
event zeek_init()
{
print MyModule::_;
}
@TEST-START-NEXT
# Ensure it does not work in a function.
module MyModule;
function helper()
{
local _ = vector( "1", "2", "3" );
print _;
}
event zeek_init()
{
helper();
}

View file

@ -0,0 +1,70 @@
# @TEST-DOC: Some blank identifier tests iterating over vectors, tables and strings.
# @TEST-EXEC: zeek -b %INPUT > output
# @TEST-EXEC: btest-diff output
event zeek_init()
{
local vec = vector("a", "b", "c");
local t1 = table(["keya"] = "a", ["keyb"] = "b", ["keyc"] = "c");
local t2 = table(["a",1,T] = "a1a", ["b",2,F] = "b2b", ["c",3,T] = "c3c");
local s = "the string";
# Ignore just the index.
print "== vec 1";
for ( _, v in vec )
print v;
# Ignore just the value.
print "== vec 2";
local idxsum = 0;
for ( idx, _ in vec )
idxsum += idx;
print "idxsum(vec)", idxsum;
# Ignore index and value
print "== vec 3";
local veclen = 0;
for ( _, _ in vec )
++veclen;
print "veclen(vec)", veclen;
# Ignore just the key
print "== t1 1";
for ( _, v in t1 )
print v;
# Ignore just the value
print "== t1 2";
for ( k, _ in t1 )
print k;
# Ignore index and value
local t1len = 0;
print "== t1 3";
for ( _, _ in t1 )
++t1len;
print "t1len", t1len;
# Ignore part of the index and the value.
print "== t2 1";
for ( [_,c,_], v in t2 )
print c, v;
# Ignore part of the index and the value.
print "== t2 2";
for ( [t2a,_,t2b], _ in t2 )
print t2a, t2b;
# Ignore the whole index with a single _
print "== t2 3";
local t2concat = "";
for ( _, v in t2 )
t2concat += v;
print "t2concat", t2concat;
# String iteration ignoring the value
print "== s";
local i = 0;
for ( _ in s )
++i;
print "strlen(s)", i;
}

View file

@ -0,0 +1,29 @@
# @TEST-DOC: Locals work with the blank identifier, but can not be referenced.
# @TEST-EXEC: zeek -b %INPUT
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
event zeek_init()
{
local _ = "1";
}
#@TEST-START-NEXT
event zeek_init()
{
local _: string = "1";
local _: count = 1;
}
#@TEST-START-NEXT
event zeek_init()
{
local _: string = "1";
const _: count = 1;
}
#@TEST-START-NEXT
event zeek_init()
{
const _: string = "1";
const _: count = 1;
}

View file

@ -0,0 +1,10 @@
# @TEST-DOC: Do not allow blank options.
# @TEST-EXEC-FAIL: zeek -b %INPUT
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff .stderr
module MyModule;
export {
option _: count = 42;
}