mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
405 lines
14 KiB
Text
405 lines
14 KiB
Text
# @TEST-DOC: Stress tests for the AST optimizer dealing with side effects.
|
|
# @TEST-REQUIRES: test "${ZEEK_ZAM}" == "1"
|
|
#
|
|
# See below for an explanation of this convoluted invocation line.
|
|
# @TEST-EXEC: zeek -b -O ZAM -O dump-xform --optimize-func='AST_opt_test_.*' %INPUT >output
|
|
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff output
|
|
|
|
# This is a subtle & extensive test of the AST optimizer used by ZAM, in
|
|
# particular its CSE = Common Subexpression Elimination functionality, whereby
|
|
# it reuses previously computed values rather than recomputing them. This test
|
|
# in particular focuses on that functionality in the context of side-effects
|
|
# that arise due to either implicit or explicit function calls. The implicit
|
|
# ones (such as table &default functions) are the trickiest, because they
|
|
# can in principle access or modify state that leads to *other* implicit
|
|
# function calls.
|
|
#
|
|
# Unlike nearly all Zeek BTests, the heart of the output is intermediary
|
|
# information produced by "-O dump-xform", not the final output. The dump
|
|
# implicitly reveals where the AST optimizer reuses expressions previously
|
|
# computed (into temporaries) and where it refrains from doing so because
|
|
# in principle the re-use is unsafe due to possible modifications to the
|
|
# original expression. Many of the tests print the same set of values twice in
|
|
# a row, often to see whether the first "print" requires correctly re-computing
|
|
# an expression or inheriting its value from a previous computation. The
|
|
# second "print" generally ensures that given the lack of potential
|
|
# side-effects, the values from the first "print" are re-used, although for
|
|
# some tests they need to instead be re-computed.
|
|
#
|
|
# If the dumped intermediary information agrees but there's a difference in
|
|
# the final output as the Zeek script executes, that reflects a bug subsequent
|
|
# to the AST optimizer (typically, this will be in ZAM's low-level optimizer).
|
|
#
|
|
# Changes to the AST processing used by script optimization can introduce
|
|
# benign differences to the intermediary information, such as introducing
|
|
# or removing some temporary variables. These require hand inspection to
|
|
# determine whether they reflect a problem.
|
|
#
|
|
# We divide the tests up into a number of different event handlers. This
|
|
# keeps the AST for each group of tests from getting too complex (especially
|
|
# if later tests wind up reusing values from much earlier tests, which can
|
|
# be hard to manually verify for correctness). We enable ZAM optimization
|
|
# for *only* those handlers to avoid complications in the output unrelated
|
|
# to these tests. We use distinct event handlers, rather than multiple
|
|
# handlers for the same event, to avoid coalescence of the handlers. Doing
|
|
# this explicitly rather than using "-O no-inline" is preferrable because
|
|
# it means the testing still includes any issues that the inliner might
|
|
# introduce.
|
|
#
|
|
# Any time you make a change to the final output the script produces, confirm
|
|
# that running Zeek *without* script optimization produces that same output.
|
|
|
|
########################################################################
|
|
|
|
# A global that in some contexts is directly modified as a side-effect.
|
|
global g1 = 4;
|
|
|
|
# A global that is never directly modified as a side-effect. However, some
|
|
# side effects have Unknown consequences - for those, the optimizer should
|
|
# assume that any expression derived from a global value needs to re-computed.
|
|
global g2 = 44;
|
|
|
|
# A table with a safe &default. However, in some contexts the tests will
|
|
# introduce other tables with the same type signature whose &default's have
|
|
# side effects. Due to the complexity of potential aliasing among Zeek
|
|
# container objects of like types, those side effects should be presumed
|
|
# to potentially modify this global.
|
|
global tbl_c_of_c = table([2] = 3, [3] = 5) &default=456;
|
|
|
|
# A function with no side effects.
|
|
function benign(a: count): count
|
|
{
|
|
return a + 3;
|
|
}
|
|
|
|
# Calls a BiF that should be known to the optimizer as having no side effects.
|
|
function safe_bif(): string
|
|
{
|
|
return fmt("%s", g1 * 7);
|
|
}
|
|
|
|
# Calls a BiF that the optimizer doesn't know as having no side effects,
|
|
# and which should therefore be treated as having Unknown side effects.
|
|
function dangerous_bif(): count
|
|
{
|
|
local l = tbl_c_of_c[2];
|
|
clear_table(tbl_c_of_c);
|
|
return l;
|
|
}
|
|
|
|
event AST_opt_test_1()
|
|
{
|
|
# For the following, the optimizer should reuse "g1 * 2" and
|
|
# "tbl_c_of_c[2]" for the later print's ...
|
|
print g1 * 2, tbl_c_of_c[2];
|
|
print benign(4);
|
|
print g1 * 2, tbl_c_of_c[2];
|
|
print safe_bif();
|
|
print g1 * 2, tbl_c_of_c[2];
|
|
|
|
# ... but *not* after this call. In addition, even though the call
|
|
# doesn't modify "g1", it should still be reloaded because
|
|
# the optimizer only knows that the BiF is unsafe = it has Unknown
|
|
# effects.
|
|
print dangerous_bif();
|
|
print g1 * 2, tbl_c_of_c[2];
|
|
print g1 * 2, tbl_c_of_c[2];
|
|
}
|
|
|
|
########################################################################
|
|
|
|
# A function that modifies our global-of-interest.
|
|
function mod_g1(a: addr): count
|
|
{
|
|
return ++g1;
|
|
}
|
|
|
|
global tbl_addr_of_count = table([1.2.3.4] = 1001, [2.3.4.5] = 10002) &default=mod_g1;
|
|
|
|
event AST_opt_test_2()
|
|
{
|
|
# The optimizer should reload "g1" after each tbl_addr_of_count
|
|
# reference, but reuse "g2 * 3" and "tbl_c_of_c[2]", since those
|
|
# can't be affected by mod_g1().
|
|
print g1 * 2, g2 * 3, tbl_c_of_c[2];
|
|
print tbl_addr_of_count[1.2.3.4];
|
|
print g1 * 2, g2 * 3, tbl_c_of_c[2];
|
|
print tbl_addr_of_count[127.0.0.1];
|
|
print g1 * 2, g2 * 3, tbl_c_of_c[2];
|
|
}
|
|
|
|
########################################################################
|
|
|
|
# A global that controls whether the following functions change an entry
|
|
# in one of our global tables.
|
|
global mess_is_active = F;
|
|
|
|
# We use a separate function for actually changing the global to make sure
|
|
# that the AST analysis follows the effects of function calls.
|
|
function do_the_messing()
|
|
{
|
|
tbl_c_of_c[2] = 999;
|
|
}
|
|
|
|
# An &on_change handler that potentially alters the value of the table.
|
|
function mess_with_tbl_c_of_c(tbl: table[bool] of count, tc: TableChange, ind: bool, val: count)
|
|
{
|
|
if ( mess_is_active )
|
|
do_the_messing();
|
|
}
|
|
|
|
global tcc_mod = table([F] = 44, [T] = 55) &on_change=mess_with_tbl_c_of_c;
|
|
|
|
event AST_opt_test_3()
|
|
{
|
|
# The optimizer should re-use "tbl_c_of_c[2]" in each of the secondary
|
|
# print's, but it should recompute it after each modification to
|
|
# "tcc_mod", even though the first one won't lead to a potential change.
|
|
print tbl_c_of_c[2];
|
|
print tbl_c_of_c[2];
|
|
|
|
tcc_mod[F] = 33;
|
|
|
|
print tbl_c_of_c[2];
|
|
print tbl_c_of_c[2];
|
|
|
|
mess_is_active = T;
|
|
tcc_mod[T] = 66;
|
|
|
|
print tbl_c_of_c[2];
|
|
print tbl_c_of_c[2];
|
|
}
|
|
|
|
########################################################################
|
|
|
|
# Analogous to tbl_c_of_c but has a function call for its &default.
|
|
global tbl_c_of_c2 = table([1] = 4, [4] = 8) &default=benign;
|
|
|
|
event AST_opt_test_4()
|
|
{
|
|
# In the following, for the duplicated prints the optimizer should
|
|
# reuse the previous values. That includes for the accesses to
|
|
# local_tbl_c_of_c3 (which associates a more complex &default function
|
|
# to "table[count] of count" types).
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_c2[10];
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_c2[10];
|
|
|
|
local local_tbl_c_of_c3 = table([4] = 1, [12] = 0)
|
|
&default=function(c: count): count { return benign(c+7) - 2; };
|
|
|
|
# We print at this separately to make sure it occurs prior to
|
|
# potentially computing the other elements in the print.
|
|
print local_tbl_c_of_c3[12];
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12];
|
|
|
|
# Same with separate printing here.
|
|
print local_tbl_c_of_c3[10];
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[10];
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[10];
|
|
|
|
# This BiF should lead to recomputing of all values, including
|
|
# the local local_tbl_c_of_c3.
|
|
print dangerous_bif();
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12];
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12], local_tbl_c_of_c3[10];
|
|
}
|
|
|
|
########################################################################
|
|
|
|
# Used to introduce another global as having side effects, this time in
|
|
# the context of a local table.
|
|
global my_exponential = 2.0;
|
|
|
|
event AST_opt_test_5()
|
|
{
|
|
# A similar test, but this time the second local has side effects,
|
|
# and aliases type-wise with the first local, so expressions with
|
|
# the first local should be recomputed for every access.
|
|
#
|
|
# The notion of aggregate aliases is important because in general
|
|
# with static analysis it can be extremely difficult to tell whether
|
|
# two instances of an aggregate, each with the same type, might in
|
|
# fact refer to the same underlying aggregate value. The optimizer
|
|
# thus needs to play it safe and refrain from only focusing on the
|
|
# instance that directly has the attribute.
|
|
local side_effect_free = table([0] = 0.5, [1] = 1.5);
|
|
|
|
print side_effect_free[1];
|
|
print side_effect_free[1];
|
|
|
|
local has_side_effects = table([0] = -0.5, [1] = -1.5)
|
|
&default_insert=function(c: count): double
|
|
{
|
|
my_exponential = my_exponential * my_exponential;
|
|
return my_exponential;
|
|
};
|
|
|
|
print side_effect_free[1], has_side_effects[2];
|
|
print side_effect_free[1], has_side_effects[2];
|
|
}
|
|
|
|
########################################################################
|
|
|
|
global tbl_c_of_b = table([100] = F, [101] = T);
|
|
global tbl_s_of_s = table(["1"] = "4", ["4"] = "8") &default="no-side-effects";
|
|
global tbl_s_of_s2 = table(["1"] = "4", ["4"] = "8") &default="ultimately-side-effects";
|
|
|
|
event AST_opt_test_6()
|
|
{
|
|
# Without the final statement of this handler, the second one of these
|
|
# prints would re-use all of the values. However, it instead can only
|
|
# reuse the first three, as explained below.
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_b[100], tbl_s_of_s["4"];
|
|
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_b[100], tbl_s_of_s["4"];
|
|
|
|
# This should force recomputation of tbl_c_of_b above (because its
|
|
# type aliases with one that has side effects on loads, so loads
|
|
# must not be optimized away) AND THEREFORE also tbl_s_of_s (because
|
|
# those loads affect an alias of its type).
|
|
print table([11] = F)
|
|
&default=function(c: count): bool
|
|
{ tbl_s_of_s2["2"] = "two"; return |tbl_s_of_s2| < 9; };
|
|
}
|
|
|
|
########################################################################
|
|
|
|
# Here we test &default side-effects of record constructors. We also use
|
|
# this as an opportunity to stress-test propagation of side effects
|
|
# when one side effect induces another, and whether AST analysis correctly
|
|
# accounts for potential type aliasing.
|
|
|
|
# Reads a local table - but that should be enough to raise the specter of
|
|
# any "table[count] of bool" access side-effects.
|
|
function local_table_read(): count
|
|
{
|
|
local l = table([99] = T);
|
|
return l[99] ? 3 : 2;
|
|
}
|
|
|
|
# The same, but modifies the table instead.
|
|
function local_table_mod(): count
|
|
{
|
|
local l = table([99] = T);
|
|
l[99] = F;
|
|
return |l|;
|
|
}
|
|
|
|
# Tickles "table[count] of bool" access side-effects.
|
|
type R1: record {
|
|
a: count &default = local_table_read();
|
|
b: string;
|
|
};
|
|
|
|
# Similar, but for modification side-effects. NOTE: type-compatible with R1.
|
|
type R2: record {
|
|
a: count &default = local_table_mod();
|
|
b: string;
|
|
};
|
|
|
|
# Just like R2 but NOT type-compatible with R1.
|
|
type R3: record {
|
|
a: count &default = local_table_mod();
|
|
b: string;
|
|
c: int &default = +5;
|
|
};
|
|
|
|
event AST_opt_test_7()
|
|
{
|
|
# In this test, it's not the mere presence of "R1" that affects
|
|
# earlier optimization (like in the previous test), but the execution
|
|
# of its constructor. So the following should proceed with the second
|
|
# completely re-using from the first.
|
|
print tbl_s_of_s["4"];
|
|
print tbl_s_of_s["4"];
|
|
|
|
# Here constructing R1 potentially invokes the &default for $a,
|
|
# which reads from a "table[count] of bool", which the previous
|
|
# test (AST_opt_test_6) has set up as potentially affecting values
|
|
# of type "table[string] of string" such as tbl_s_of_s.
|
|
local force_tbl_c_of_c_str_reload = R1($b="match-on-type-of-tbl_c_of_c");
|
|
print force_tbl_c_of_c_str_reload;
|
|
print tbl_s_of_s["4"];
|
|
print tbl_s_of_s["4"];
|
|
|
|
# Here's a similar sequence but using a record that *modifies*
|
|
# a "table[count] of bool" - which ordinarily should NOT thwart
|
|
# optimization since there's no potential &on_change attribute for
|
|
# such tables ... HOWEVER R2 is type-compatible with R1, so it
|
|
# inherits R1's side-effects, thus this WILL require a reload of
|
|
# tbl_s_of_s["4"].
|
|
local problem_r2 = R2($b="hello");
|
|
print tbl_s_of_s["4"], problem_r2$b;
|
|
|
|
# ... THIS however won't, because R3 is not type-compatible with R1,
|
|
# even though it has the same attributes as R2.
|
|
local problem_r3 = R3($b="hello again");
|
|
print tbl_s_of_s["4"], problem_r3$b;
|
|
}
|
|
|
|
########################################################################
|
|
|
|
event AST_opt_test_8()
|
|
{
|
|
# This last one is a hum-dinger. The two locals we define introduce
|
|
# mutually recursive side-effects, type-wise: the first sets up that
|
|
# a "table[count, count] of count" can access a "table[double, double]
|
|
# of double", and the latter establishes the converse. First, this
|
|
# setup requires that the profiler doesn't wind up in infinite
|
|
# recursion trying to follow dependencies. Second, it should both
|
|
# spot those *and* turn them into Unknown side-effects ... which
|
|
# means that reuse of expression involving the global g2 - which is
|
|
# never altered anywhere - that crosses an access to such a table
|
|
# should be recomputed, rather than reusing the previous
|
|
# result, because "all bets are off" for Unknown side-effects.
|
|
|
|
# The second of these should reuse the previous temporary ...
|
|
print g2 * 5;
|
|
print g2 * 5;
|
|
|
|
local l1 = table([1, 3] = 4, [2, 5] = 6)
|
|
&default = function(i1: count, i2: count): count
|
|
{
|
|
local my_tbl = table([1.0, 3.0] = 1e4);
|
|
return double_to_count(my_tbl[1.0, 3.0]);
|
|
};
|
|
|
|
local l2 = table([1.0, 3.0] = 4.0, [2.0, 5.0] = 6.0)
|
|
&default = function(d1: double, d2: double): double
|
|
{
|
|
local my_tbl = table([1, 3] = 1000);
|
|
return my_tbl[1, 3];
|
|
};
|
|
|
|
# ... as should both of these, since we haven't yet done an *access*
|
|
# to one of the tables.
|
|
print g2 * 5;
|
|
print g2 * 5;
|
|
|
|
# Here's an access.
|
|
print l1[3, 8];
|
|
|
|
# The first of these should recompute, the second should re-use.
|
|
print g2 * 5;
|
|
print g2 * 5;
|
|
|
|
# Here's an access to the other.
|
|
print l2[2.0, 5.0];
|
|
|
|
# Again, the first of these should recompute, the second should re-use.
|
|
print g2 * 5;
|
|
print g2 * 5;
|
|
}
|
|
|
|
########################################################################
|
|
|
|
event zeek_init()
|
|
{
|
|
event AST_opt_test_1();
|
|
event AST_opt_test_2();
|
|
event AST_opt_test_3();
|
|
event AST_opt_test_4();
|
|
event AST_opt_test_5();
|
|
event AST_opt_test_6();
|
|
event AST_opt_test_7();
|
|
event AST_opt_test_8();
|
|
}
|