Merge remote-tracking branch 'origin/topic/vern/CSE-opt'

* origin/topic/vern/CSE-opt:
  incorporate latest version of gen-zam to correctly generate indirect calls
  added sub-directory for tracking ZAM maintenance issues
  BTest to stress-test AST optimizer's assessment of side effects
  reworked AST optimizers analysis of side effects during aggregate operations & calls
  script optimization support for tracking information associated with BiFs/functions
  fix for AST analysis of inlined functions
  improved AST optimizer's analysis of variable usage in inlined functions
  new method for Stmt nodes to report whether they could execute a "return"
  bug fixes for indirect function calls when using ZAM
  minor fixes for script optimization, exporting of attr_name, script layout tweak
This commit is contained in:
Tim Wojtulewicz 2023-12-08 10:01:10 -07:00
commit 9e8a738a8a
33 changed files with 3436 additions and 255 deletions

View file

@ -0,0 +1,635 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
Original: {
print 2 * g1, tbl_c_of_c[2];
print benign(4);
print 2 * g1, tbl_c_of_c[2];
print safe_bif();
print 2 * g1, tbl_c_of_c[2];
print dangerous_bif();
print 2 * g1, tbl_c_of_c[2];
print 2 * g1, tbl_c_of_c[2];
} <...>/AST-side-effects.zeek, lines 90-106
Transformed: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
print <internal>::#0, <internal>::#1;
<internal>::#2 = benign(4);
print <internal>::#2;
<internal>::#3 = 2 * g1;
<internal>::#4 = tbl_c_of_c[2];
print <internal>::#3, <internal>::#4;
<internal>::#5 = safe_bif();
print <internal>::#5;
<internal>::#6 = 2 * g1;
<internal>::#7 = tbl_c_of_c[2];
print <internal>::#6, <internal>::#7;
<internal>::#8 = dangerous_bif();
print <internal>::#8;
<internal>::#9 = 2 * g1;
<internal>::#10 = tbl_c_of_c[2];
print <internal>::#9, <internal>::#10;
<internal>::#11 = 2 * g1;
<internal>::#12 = tbl_c_of_c[2];
print <internal>::#11, <internal>::#12;
} <...>/AST-side-effects.zeek, lines 90-106
Optimized: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
print <internal>::#0, <internal>::#1;
<internal>::#2 = benign(4);
print <internal>::#2;
print <internal>::#0, <internal>::#1;
<internal>::#5 = safe_bif();
print <internal>::#5;
print <internal>::#0, <internal>::#1;
<internal>::#8 = dangerous_bif();
print <internal>::#8;
<internal>::#9 = 2 * g1;
<internal>::#10 = tbl_c_of_c[2];
print <internal>::#9, <internal>::#10;
print <internal>::#9, <internal>::#10;
} <...>/AST-side-effects.zeek, lines 90-106
Post removal of unused: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
print <internal>::#0, <internal>::#1;
<internal>::#2 = benign(4);
print <internal>::#2;
print <internal>::#0, <internal>::#1;
<internal>::#5 = safe_bif();
print <internal>::#5;
print <internal>::#0, <internal>::#1;
<internal>::#8 = dangerous_bif();
print <internal>::#8;
<internal>::#9 = 2 * g1;
<internal>::#10 = tbl_c_of_c[2];
print <internal>::#9, <internal>::#10;
print <internal>::#9, <internal>::#10;
} <...>/AST-side-effects.zeek, lines 90-106
Original: {
print 2 * g1, 3 * g2, tbl_c_of_c[2];
print tbl_addr_of_count[1.2.3.4];
print 2 * g1, 3 * g2, tbl_c_of_c[2];
print tbl_addr_of_count[127.0.0.1];
print 2 * g1, 3 * g2, tbl_c_of_c[2];
} <...>/AST-side-effects.zeek, lines 119-128
Transformed: {
<internal>::#0 = 2 * g1;
<internal>::#1 = 3 * g2;
<internal>::#2 = tbl_c_of_c[2];
print <internal>::#0, <internal>::#1, <internal>::#2;
<internal>::#3 = tbl_addr_of_count[1.2.3.4];
print <internal>::#3;
<internal>::#4 = 2 * g1;
<internal>::#5 = 3 * g2;
<internal>::#6 = tbl_c_of_c[2];
print <internal>::#4, <internal>::#5, <internal>::#6;
<internal>::#7 = tbl_addr_of_count[127.0.0.1];
print <internal>::#7;
<internal>::#8 = 2 * g1;
<internal>::#9 = 3 * g2;
<internal>::#10 = tbl_c_of_c[2];
print <internal>::#8, <internal>::#9, <internal>::#10;
} <...>/AST-side-effects.zeek, lines 119-128
Optimized: {
<internal>::#0 = 2 * g1;
<internal>::#1 = 3 * g2;
<internal>::#2 = tbl_c_of_c[2];
print <internal>::#0, <internal>::#1, <internal>::#2;
<internal>::#3 = tbl_addr_of_count[1.2.3.4];
print <internal>::#3;
<internal>::#4 = 2 * g1;
print <internal>::#4, <internal>::#1, <internal>::#2;
<internal>::#7 = tbl_addr_of_count[127.0.0.1];
print <internal>::#7;
<internal>::#8 = 2 * g1;
print <internal>::#8, <internal>::#1, <internal>::#2;
} <...>/AST-side-effects.zeek, lines 119-128
Post removal of unused: {
<internal>::#0 = 2 * g1;
<internal>::#1 = 3 * g2;
<internal>::#2 = tbl_c_of_c[2];
print <internal>::#0, <internal>::#1, <internal>::#2;
<internal>::#3 = tbl_addr_of_count[1.2.3.4];
print <internal>::#3;
<internal>::#4 = 2 * g1;
print <internal>::#4, <internal>::#1, <internal>::#2;
<internal>::#7 = tbl_addr_of_count[127.0.0.1];
print <internal>::#7;
<internal>::#8 = 2 * g1;
print <internal>::#8, <internal>::#1, <internal>::#2;
} <...>/AST-side-effects.zeek, lines 119-128
Original: {
print tbl_c_of_c[2];
print tbl_c_of_c[2];
tcc_mod[F] = 33;
print tbl_c_of_c[2];
print tbl_c_of_c[2];
mess_is_active = T;
tcc_mod[T] = 66;
print tbl_c_of_c[2];
print tbl_c_of_c[2];
} <...>/AST-side-effects.zeek, lines 153-170
Transformed: {
<internal>::#0 = tbl_c_of_c[2];
print <internal>::#0;
<internal>::#1 = tbl_c_of_c[2];
print <internal>::#1;
tcc_mod[F] []= 33;
<internal>::#2 = tbl_c_of_c[2];
print <internal>::#2;
<internal>::#3 = tbl_c_of_c[2];
print <internal>::#3;
mess_is_active = T;
tcc_mod[T] []= 66;
<internal>::#4 = tbl_c_of_c[2];
print <internal>::#4;
<internal>::#5 = tbl_c_of_c[2];
print <internal>::#5;
} <...>/AST-side-effects.zeek, lines 153-170
Optimized: {
<internal>::#0 = tbl_c_of_c[2];
print <internal>::#0;
print <internal>::#0;
tcc_mod[F] []= 33;
<internal>::#2 = tbl_c_of_c[2];
print <internal>::#2;
print <internal>::#2;
mess_is_active = T;
tcc_mod[T] []= 66;
<internal>::#4 = tbl_c_of_c[2];
print <internal>::#4;
print <internal>::#4;
} <...>/AST-side-effects.zeek, lines 153-170
Post removal of unused: {
<internal>::#0 = tbl_c_of_c[2];
print <internal>::#0;
print <internal>::#0;
tcc_mod[F] []= 33;
<internal>::#2 = tbl_c_of_c[2];
print <internal>::#2;
print <internal>::#2;
mess_is_active = T;
tcc_mod[T] []= 66;
<internal>::#4 = tbl_c_of_c[2];
print <internal>::#4;
print <internal>::#4;
} <...>/AST-side-effects.zeek, lines 153-170
Original: {
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_c2[10];
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_c2[10];
local_tbl_c_of_c3 = table(4 = 1, 12 = 0)&default=function(){
return (benign(c + 7) - 2);
}, &optional;
print local_tbl_c_of_c3[12];
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12];
print local_tbl_c_of_c3[10];
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[10];
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[10];
print dangerous_bif();
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12];
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12], local_tbl_c_of_c3[10];
} <...>/AST-side-effects.zeek, lines 178-204
Transformed: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
<internal>::#2 = tbl_c_of_c2[1];
<internal>::#3 = tbl_c_of_c2[10];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3;
<internal>::#4 = 2 * g1;
<internal>::#5 = tbl_c_of_c[2];
<internal>::#6 = tbl_c_of_c2[1];
<internal>::#7 = tbl_c_of_c2[10];
print <internal>::#4, <internal>::#5, <internal>::#6, <internal>::#7;
local_tbl_c_of_c3 = table(4 = 1, 12 = 0)&default=function(){
return (benign(c + 7) - 2);
}, &optional;
<internal>::#8 = local_tbl_c_of_c3[12];
print <internal>::#8;
<internal>::#9 = 2 * g1;
<internal>::#10 = tbl_c_of_c[2];
<internal>::#11 = tbl_c_of_c2[1];
<internal>::#12 = local_tbl_c_of_c3[12];
print <internal>::#9, <internal>::#10, <internal>::#11, <internal>::#12;
<internal>::#13 = local_tbl_c_of_c3[10];
print <internal>::#13;
<internal>::#14 = 2 * g1;
<internal>::#15 = tbl_c_of_c[2];
<internal>::#16 = tbl_c_of_c2[1];
<internal>::#17 = local_tbl_c_of_c3[10];
print <internal>::#14, <internal>::#15, <internal>::#16, <internal>::#17;
<internal>::#18 = 2 * g1;
<internal>::#19 = tbl_c_of_c[2];
<internal>::#20 = tbl_c_of_c2[1];
<internal>::#21 = local_tbl_c_of_c3[10];
print <internal>::#18, <internal>::#19, <internal>::#20, <internal>::#21;
<internal>::#22 = dangerous_bif();
print <internal>::#22;
<internal>::#23 = 2 * g1;
<internal>::#24 = tbl_c_of_c[2];
<internal>::#25 = tbl_c_of_c2[1];
<internal>::#26 = local_tbl_c_of_c3[12];
print <internal>::#23, <internal>::#24, <internal>::#25, <internal>::#26;
<internal>::#27 = 2 * g1;
<internal>::#28 = tbl_c_of_c[2];
<internal>::#29 = tbl_c_of_c2[1];
<internal>::#30 = local_tbl_c_of_c3[12];
<internal>::#31 = local_tbl_c_of_c3[10];
print <internal>::#27, <internal>::#28, <internal>::#29, <internal>::#30, <internal>::#31;
} <...>/AST-side-effects.zeek, lines 178-204
Optimized: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
<internal>::#2 = tbl_c_of_c2[1];
<internal>::#3 = tbl_c_of_c2[10];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3;
local_tbl_c_of_c3 = table(4 = 1, 12 = 0)&default=function(){
return (benign(c + 7) - 2);
}, &optional;
<internal>::#8 = local_tbl_c_of_c3[12];
print <internal>::#8;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#8;
<internal>::#13 = local_tbl_c_of_c3[10];
print <internal>::#13;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#13;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#13;
<internal>::#22 = dangerous_bif();
print <internal>::#22;
<internal>::#23 = 2 * g1;
<internal>::#24 = tbl_c_of_c[2];
<internal>::#25 = tbl_c_of_c2[1];
<internal>::#26 = local_tbl_c_of_c3[12];
print <internal>::#23, <internal>::#24, <internal>::#25, <internal>::#26;
<internal>::#31 = local_tbl_c_of_c3[10];
print <internal>::#23, <internal>::#24, <internal>::#25, <internal>::#26, <internal>::#31;
} <...>/AST-side-effects.zeek, lines 178-204
Post removal of unused: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
<internal>::#2 = tbl_c_of_c2[1];
<internal>::#3 = tbl_c_of_c2[10];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3;
local_tbl_c_of_c3 = table(4 = 1, 12 = 0)&default=function(){
return (benign(c + 7) - 2);
}, &optional;
<internal>::#8 = local_tbl_c_of_c3[12];
print <internal>::#8;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#8;
<internal>::#13 = local_tbl_c_of_c3[10];
print <internal>::#13;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#13;
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#13;
<internal>::#22 = dangerous_bif();
print <internal>::#22;
<internal>::#23 = 2 * g1;
<internal>::#24 = tbl_c_of_c[2];
<internal>::#25 = tbl_c_of_c2[1];
<internal>::#26 = local_tbl_c_of_c3[12];
print <internal>::#23, <internal>::#24, <internal>::#25, <internal>::#26;
<internal>::#31 = local_tbl_c_of_c3[10];
print <internal>::#23, <internal>::#24, <internal>::#25, <internal>::#26, <internal>::#31;
} <...>/AST-side-effects.zeek, lines 178-204
Original: {
side_effect_free = table(0 = 0.5, 1 = 1.5);
print side_effect_free[1];
print side_effect_free[1];
has_side_effects = table(0 = -0.5, 1 = -1.5)&default_insert=function(){
my_exponential = my_exponential * my_exponential;
return (my_exponential);
};
print side_effect_free[1], has_side_effects[2];
print side_effect_free[1], has_side_effects[2];
} <...>/AST-side-effects.zeek, lines 213-238
Transformed: {
side_effect_free = table(0 = 0.5, 1 = 1.5);
<internal>::#0 = side_effect_free[1];
print <internal>::#0;
<internal>::#1 = side_effect_free[1];
print <internal>::#1;
has_side_effects = table(0 = -0.5, 1 = -1.5)&default_insert=function(){
my_exponential = my_exponential * my_exponential;
return (my_exponential);
};
<internal>::#2 = side_effect_free[1];
<internal>::#3 = has_side_effects[2];
print <internal>::#2, <internal>::#3;
<internal>::#4 = side_effect_free[1];
<internal>::#5 = has_side_effects[2];
print <internal>::#4, <internal>::#5;
} <...>/AST-side-effects.zeek, lines 213-238
Optimized: {
side_effect_free = table(0 = 0.5, 1 = 1.5);
<internal>::#0 = side_effect_free[1];
print <internal>::#0;
<internal>::#1 = side_effect_free[1];
print <internal>::#1;
has_side_effects = table(0 = -0.5, 1 = -1.5)&default_insert=function(){
my_exponential = my_exponential * my_exponential;
return (my_exponential);
};
<internal>::#2 = side_effect_free[1];
<internal>::#3 = has_side_effects[2];
print <internal>::#2, <internal>::#3;
<internal>::#4 = side_effect_free[1];
<internal>::#5 = has_side_effects[2];
print <internal>::#4, <internal>::#5;
} <...>/AST-side-effects.zeek, lines 213-238
Post removal of unused: {
side_effect_free = table(0 = 0.5, 1 = 1.5);
<internal>::#0 = side_effect_free[1];
print <internal>::#0;
<internal>::#1 = side_effect_free[1];
print <internal>::#1;
has_side_effects = table(0 = -0.5, 1 = -1.5)&default_insert=function(){
my_exponential = my_exponential * my_exponential;
return (my_exponential);
};
<internal>::#2 = side_effect_free[1];
<internal>::#3 = has_side_effects[2];
print <internal>::#2, <internal>::#3;
<internal>::#4 = side_effect_free[1];
<internal>::#5 = has_side_effects[2];
print <internal>::#4, <internal>::#5;
} <...>/AST-side-effects.zeek, lines 213-238
Original: {
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_b[100], tbl_s_of_s[4];
print 2 * g1, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_b[100], tbl_s_of_s[4];
print table(11 = F)&default=function(){
tbl_s_of_s2[2] = two;
return (sizeoftbl_s_of_s2 < 9);
}, &optional;
} <...>/AST-side-effects.zeek, lines 247-261
Transformed: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
<internal>::#2 = tbl_c_of_c2[1];
<internal>::#3 = tbl_c_of_b[100];
<internal>::#4 = tbl_s_of_s[4];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3, <internal>::#4;
<internal>::#5 = 2 * g1;
<internal>::#6 = tbl_c_of_c[2];
<internal>::#7 = tbl_c_of_c2[1];
<internal>::#8 = tbl_c_of_b[100];
<internal>::#9 = tbl_s_of_s[4];
print <internal>::#5, <internal>::#6, <internal>::#7, <internal>::#8, <internal>::#9;
<internal>::#10 = table(11 = F)&default=function(){
tbl_s_of_s2[2] = two;
return (sizeoftbl_s_of_s2 < 9);
}, &optional;
print <internal>::#10;
} <...>/AST-side-effects.zeek, lines 247-261
Optimized: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
<internal>::#2 = tbl_c_of_c2[1];
<internal>::#3 = tbl_c_of_b[100];
<internal>::#4 = tbl_s_of_s[4];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3, <internal>::#4;
<internal>::#8 = tbl_c_of_b[100];
<internal>::#9 = tbl_s_of_s[4];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#8, <internal>::#9;
<internal>::#10 = table(11 = F)&default=function(){
tbl_s_of_s2[2] = two;
return (sizeoftbl_s_of_s2 < 9);
}, &optional;
print <internal>::#10;
} <...>/AST-side-effects.zeek, lines 247-261
Post removal of unused: {
<internal>::#0 = 2 * g1;
<internal>::#1 = tbl_c_of_c[2];
<internal>::#2 = tbl_c_of_c2[1];
<internal>::#3 = tbl_c_of_b[100];
<internal>::#4 = tbl_s_of_s[4];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#3, <internal>::#4;
<internal>::#8 = tbl_c_of_b[100];
<internal>::#9 = tbl_s_of_s[4];
print <internal>::#0, <internal>::#1, <internal>::#2, <internal>::#8, <internal>::#9;
<internal>::#10 = table(11 = F)&default=function(){
tbl_s_of_s2[2] = two;
return (sizeoftbl_s_of_s2 < 9);
}, &optional;
print <internal>::#10;
} <...>/AST-side-effects.zeek, lines 247-261
Original: {
print tbl_s_of_s[4];
print tbl_s_of_s[4];
force_tbl_c_of_c_str_reload = R1($b=match-on-type-of-tbl_c_of_c);
print force_tbl_c_of_c_str_reload;
print tbl_s_of_s[4];
print tbl_s_of_s[4];
problem_r2 = R2($b=hello);
print tbl_s_of_s[4], problem_r2$b;
problem_r3 = R3($b=hello again);
print tbl_s_of_s[4], problem_r3$b;
} <...>/AST-side-effects.zeek, lines 306-336
Transformed: {
<internal>::#0 = tbl_s_of_s[4];
print <internal>::#0;
<internal>::#1 = tbl_s_of_s[4];
print <internal>::#1;
force_tbl_c_of_c_str_reload = R1($b=match-on-type-of-tbl_c_of_c);
print force_tbl_c_of_c_str_reload;
<internal>::#2 = tbl_s_of_s[4];
print <internal>::#2;
<internal>::#3 = tbl_s_of_s[4];
print <internal>::#3;
problem_r2 = R2($b=hello);
<internal>::#4 = tbl_s_of_s[4];
<internal>::#5 = problem_r2$b;
print <internal>::#4, <internal>::#5;
problem_r3 = R3($b=hello again);
<internal>::#6 = tbl_s_of_s[4];
<internal>::#7 = problem_r3$b;
print <internal>::#6, <internal>::#7;
} <...>/AST-side-effects.zeek, lines 306-336
Optimized: {
<internal>::#0 = tbl_s_of_s[4];
print <internal>::#0;
print <internal>::#0;
force_tbl_c_of_c_str_reload = R1($b=match-on-type-of-tbl_c_of_c);
print force_tbl_c_of_c_str_reload;
<internal>::#2 = tbl_s_of_s[4];
print <internal>::#2;
print <internal>::#2;
problem_r2 = R2($b=hello);
<internal>::#4 = tbl_s_of_s[4];
<internal>::#5 = problem_r2$b;
print <internal>::#4, <internal>::#5;
problem_r3 = R3($b=hello again);
<internal>::#7 = problem_r3$b;
print <internal>::#4, <internal>::#7;
} <...>/AST-side-effects.zeek, lines 306-336
Post removal of unused: {
<internal>::#0 = tbl_s_of_s[4];
print <internal>::#0;
print <internal>::#0;
force_tbl_c_of_c_str_reload = R1($b=match-on-type-of-tbl_c_of_c);
print force_tbl_c_of_c_str_reload;
<internal>::#2 = tbl_s_of_s[4];
print <internal>::#2;
print <internal>::#2;
problem_r2 = R2($b=hello);
<internal>::#4 = tbl_s_of_s[4];
<internal>::#5 = problem_r2$b;
print <internal>::#4, <internal>::#5;
problem_r3 = R3($b=hello again);
<internal>::#7 = problem_r3$b;
print <internal>::#4, <internal>::#7;
} <...>/AST-side-effects.zeek, lines 306-336
Original: {
print 5 * g2;
print 5 * g2;
l1 = table(1, 3 = 4, 2, 5 = 6)&default=function(){
my_tbl = table(1.0, 3.0 = 10000.0);
return (double_to_count(my_tbl[1.0, 3.0]));
}, &optional;
l2 = table(1.0, 3.0 = 4.0, 2.0, 5.0 = 6.0)&default=function(){
my_tbl = table(1, 3 = 1000);
return ((coerce my_tbl[1, 3] to double));
}, &optional;
print 5 * g2;
print 5 * g2;
print l1[3, 8];
print 5 * g2;
print 5 * g2;
print l2[2.0, 5.0];
print 5 * g2;
print 5 * g2;
} <...>/AST-side-effects.zeek, lines 341-390
Transformed: {
<internal>::#0 = 5 * g2;
print <internal>::#0;
<internal>::#1 = 5 * g2;
print <internal>::#1;
l1 = table(1, 3 = 4, 2, 5 = 6)&default=function(){
my_tbl = table(1.0, 3.0 = 10000.0);
return (double_to_count(my_tbl[1.0, 3.0]));
}, &optional;
l2 = table(1.0, 3.0 = 4.0, 2.0, 5.0 = 6.0)&default=function(){
my_tbl = table(1, 3 = 1000);
return ((coerce my_tbl[1, 3] to double));
}, &optional;
<internal>::#2 = 5 * g2;
print <internal>::#2;
<internal>::#3 = 5 * g2;
print <internal>::#3;
<internal>::#4 = l1[3, 8];
print <internal>::#4;
<internal>::#5 = 5 * g2;
print <internal>::#5;
<internal>::#6 = 5 * g2;
print <internal>::#6;
<internal>::#7 = l2[2.0, 5.0];
print <internal>::#7;
<internal>::#8 = 5 * g2;
print <internal>::#8;
<internal>::#9 = 5 * g2;
print <internal>::#9;
} <...>/AST-side-effects.zeek, lines 341-390
Optimized: {
<internal>::#0 = 5 * g2;
print <internal>::#0;
print <internal>::#0;
l1 = table(1, 3 = 4, 2, 5 = 6)&default=function(){
my_tbl = table(1.0, 3.0 = 10000.0);
return (double_to_count(my_tbl[1.0, 3.0]));
}, &optional;
l2 = table(1.0, 3.0 = 4.0, 2.0, 5.0 = 6.0)&default=function(){
my_tbl = table(1, 3 = 1000);
return ((coerce my_tbl[1, 3] to double));
}, &optional;
print <internal>::#0;
print <internal>::#0;
<internal>::#4 = l1[3, 8];
print <internal>::#4;
<internal>::#5 = 5 * g2;
print <internal>::#5;
print <internal>::#5;
<internal>::#7 = l2[2.0, 5.0];
print <internal>::#7;
<internal>::#8 = 5 * g2;
print <internal>::#8;
print <internal>::#8;
} <...>/AST-side-effects.zeek, lines 341-390
Post removal of unused: {
<internal>::#0 = 5 * g2;
print <internal>::#0;
print <internal>::#0;
l1 = table(1, 3 = 4, 2, 5 = 6)&default=function(){
my_tbl = table(1.0, 3.0 = 10000.0);
return (double_to_count(my_tbl[1.0, 3.0]));
}, &optional;
l2 = table(1.0, 3.0 = 4.0, 2.0, 5.0 = 6.0)&default=function(){
my_tbl = table(1, 3 = 1000);
return ((coerce my_tbl[1, 3] to double));
}, &optional;
print <internal>::#0;
print <internal>::#0;
<internal>::#4 = l1[3, 8];
print <internal>::#4;
<internal>::#5 = 5 * g2;
print <internal>::#5;
print <internal>::#5;
<internal>::#7 = l2[2.0, 5.0];
print <internal>::#7;
<internal>::#8 = 5 * g2;
print <internal>::#8;
print <internal>::#8;
} <...>/AST-side-effects.zeek, lines 341-390
8, 3
7
8, 3
28
8, 3
3
8, 456
8, 456
8, 132, 456
1001
8, 132, 456
5
10, 132, 456
456
456
456
456
999
999
10, 999, 4, 13
10, 999, 4, 13
0
10, 999, 4, 0
18
10, 999, 4, 18
10, 999, 4, 18
999
10, 456, 4, 0
10, 456, 4, 0, 18
1.5
1.5
1.5, 4.0
1.5, 4.0
10, 456, 4, F, 8
10, 456, 4, F, 8
{
[11] = F
}
8
8
[a=3, b=match-on-type-of-tbl_c_of_c]
8
8
8, hello
8, hello again
220
220
220
220
10000
220
220
6.0
220
220

View file

@ -0,0 +1,405 @@
# @TEST-DOC: Stress tests for the AST optimizer dealing with side effects.
# @TEST-REQUIRES: test "${ZEEK_ZAM}" == "1"
#
# See below for an explanation of this convoluted invocation line.
# @TEST-EXEC: zeek -b -O ZAM -O dump-xform --optimize-func='AST_opt_test_.*' %INPUT >output
# @TEST-EXEC: TEST_DIFF_CANONIFIER=$SCRIPTS/diff-remove-abspath btest-diff output
# This is a subtle & extensive test of the AST optimizer used by ZAM, in
# particular its CSE = Common Subexpression Elimination functionality, whereby
# it reuses previously computed values rather than recomputing them. This test
# in particular focuses on that functionality in the context of side-effects
# that arise due to either implicit or explicit function calls. The implicit
# ones (such as table &default functions) are the trickiest, because they
# can in principle access or modify state that leads to *other* implicit
# function calls.
#
# Unlike nearly all Zeek BTests, the heart of the output is intermediary
# information produced by "-O dump-xform", not the final output. The dump
# implicitly reveals where the AST optimizer reuses expressions previously
# computed (into temporaries) and where it refrains from doing so because
# in principle the re-use is unsafe due to possible modifications to the
# original expression. Many of the tests print the same set of values twice in
# a row, often to see whether the first "print" requires correctly re-computing
# an expression or inheriting its value from a previous computation. The
# second "print" generally ensures that given the lack of potential
# side-effects, the values from the first "print" are re-used, although for
# some tests they need to instead be re-computed.
#
# If the dumped intermediary information agrees but there's a difference in
# the final output as the Zeek script executes, that reflects a bug subsequent
# to the AST optimizer (typically, this will be in ZAM's low-level optimizer).
#
# Changes to the AST processing used by script optimization can introduce
# benign differences to the intermediary information, such as introducing
# or removing some temporary variables. These require hand inspection to
# determine whether they reflect a problem.
#
# We divide the tests up into a number of different event handlers. This
# keeps the AST for each group of tests from getting too complex (especially
# if later tests wind up reusing values from much earlier tests, which can
# be hard to manually verify for correctness). We enable ZAM optimization
# for *only* those handlers to avoid complications in the output unrelated
# to these tests. We use distinct event handlers, rather than multiple
# handlers for the same event, to avoid coalescence of the handlers. Doing
# this explicitly rather than using "-O no-inline" is preferrable because
# it means the testing still includes any issues that the inliner might
# introduce.
#
# Any time you make a change to the final output the script produces, confirm
# that running Zeek *without* script optimization produces that same output.
########################################################################
# A global that in some contexts is directly modified as a side-effect.
global g1 = 4;
# A global that is never directly modified as a side-effect. However, some
# side effects have Unknown consequences - for those, the optimizer should
# assume that any expression derived from a global value needs to re-computed.
global g2 = 44;
# A table with a safe &default. However, in some contexts the tests will
# introduce other tables with the same type signature whose &default's have
# side effects. Due to the complexity of potential aliasing among Zeek
# container objects of like types, those side effects should be presumed
# to potentially modify this global.
global tbl_c_of_c = table([2] = 3, [3] = 5) &default=456;
# A function with no side effects.
function benign(a: count): count
{
return a + 3;
}
# Calls a BiF that should be known to the optimizer as having no side effects.
function safe_bif(): string
{
return fmt("%s", g1 * 7);
}
# Calls a BiF that the optimizer doesn't know as having no side effects,
# and which should therefore be treated as having Unknown side effects.
function dangerous_bif(): count
{
local l = tbl_c_of_c[2];
clear_table(tbl_c_of_c);
return l;
}
event AST_opt_test_1()
{
# For the following, the optimizer should reuse "g1 * 2" and
# "tbl_c_of_c[2]" for the later print's ...
print g1 * 2, tbl_c_of_c[2];
print benign(4);
print g1 * 2, tbl_c_of_c[2];
print safe_bif();
print g1 * 2, tbl_c_of_c[2];
# ... but *not* after this call. In addition, even though the call
# doesn't modify "g1", it should still be reloaded because
# the optimizer only knows that the BiF is unsafe = it has Unknown
# effects.
print dangerous_bif();
print g1 * 2, tbl_c_of_c[2];
print g1 * 2, tbl_c_of_c[2];
}
########################################################################
# A function that modifies our global-of-interest.
function mod_g1(a: addr): count
{
return ++g1;
}
global tbl_addr_of_count = table([1.2.3.4] = 1001, [2.3.4.5] = 10002) &default=mod_g1;
event AST_opt_test_2()
{
# The optimizer should reload "g1" after each tbl_addr_of_count
# reference, but reuse "g2 * 3" and "tbl_c_of_c[2]", since those
# can't be affected by mod_g1().
print g1 * 2, g2 * 3, tbl_c_of_c[2];
print tbl_addr_of_count[1.2.3.4];
print g1 * 2, g2 * 3, tbl_c_of_c[2];
print tbl_addr_of_count[127.0.0.1];
print g1 * 2, g2 * 3, tbl_c_of_c[2];
}
########################################################################
# A global that controls whether the following functions change an entry
# in one of our global tables.
global mess_is_active = F;
# We use a separate function for actually changing the global to make sure
# that the AST analysis follows the effects of function calls.
function do_the_messing()
{
tbl_c_of_c[2] = 999;
}
# An &on_change handler that potentially alters the value of the table.
function mess_with_tbl_c_of_c(tbl: table[bool] of count, tc: TableChange, ind: bool, val: count)
{
if ( mess_is_active )
do_the_messing();
}
global tcc_mod = table([F] = 44, [T] = 55) &on_change=mess_with_tbl_c_of_c;
event AST_opt_test_3()
{
# The optimizer should re-use "tbl_c_of_c[2]" in each of the secondary
# print's, but it should recompute it after each modification to
# "tcc_mod", even though the first one won't lead to a potential change.
print tbl_c_of_c[2];
print tbl_c_of_c[2];
tcc_mod[F] = 33;
print tbl_c_of_c[2];
print tbl_c_of_c[2];
mess_is_active = T;
tcc_mod[T] = 66;
print tbl_c_of_c[2];
print tbl_c_of_c[2];
}
########################################################################
# Analogous to tbl_c_of_c but has a function call for its &default.
global tbl_c_of_c2 = table([1] = 4, [4] = 8) &default=benign;
event AST_opt_test_4()
{
# In the following, for the duplicated prints the optimizer should
# reuse the previous values. That includes for the accesses to
# local_tbl_c_of_c3 (which associates a more complex &default function
# to "table[count] of count" types).
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_c2[10];
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_c2[10];
local local_tbl_c_of_c3 = table([4] = 1, [12] = 0)
&default=function(c: count): count { return benign(c+7) - 2; };
# We print at this separately to make sure it occurs prior to
# potentially computing the other elements in the print.
print local_tbl_c_of_c3[12];
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12];
# Same with separate printing here.
print local_tbl_c_of_c3[10];
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[10];
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[10];
# This BiF should lead to recomputing of all values, including
# the local local_tbl_c_of_c3.
print dangerous_bif();
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12];
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], local_tbl_c_of_c3[12], local_tbl_c_of_c3[10];
}
########################################################################
# Used to introduce another global as having side effects, this time in
# the context of a local table.
global my_exponential = 2.0;
event AST_opt_test_5()
{
# A similar test, but this time the second local has side effects,
# and aliases type-wise with the first local, so expressions with
# the first local should be recomputed for every access.
#
# The notion of aggregate aliases is important because in general
# with static analysis it can be extremely difficult to tell whether
# two instances of an aggregate, each with the same type, might in
# fact refer to the same underlying aggregate value. The optimizer
# thus needs to play it safe and refrain from only focusing on the
# instance that directly has the attribute.
local side_effect_free = table([0] = 0.5, [1] = 1.5);
print side_effect_free[1];
print side_effect_free[1];
local has_side_effects = table([0] = -0.5, [1] = -1.5)
&default_insert=function(c: count): double
{
my_exponential = my_exponential * my_exponential;
return my_exponential;
};
print side_effect_free[1], has_side_effects[2];
print side_effect_free[1], has_side_effects[2];
}
########################################################################
global tbl_c_of_b = table([100] = F, [101] = T);
global tbl_s_of_s = table(["1"] = "4", ["4"] = "8") &default="no-side-effects";
global tbl_s_of_s2 = table(["1"] = "4", ["4"] = "8") &default="ultimately-side-effects";
event AST_opt_test_6()
{
# Without the final statement of this handler, the second one of these
# prints would re-use all of the values. However, it instead can only
# reuse the first three, as explained below.
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_b[100], tbl_s_of_s["4"];
print g1 * 2, tbl_c_of_c[2], tbl_c_of_c2[1], tbl_c_of_b[100], tbl_s_of_s["4"];
# This should force recomputation of tbl_c_of_b above (because its
# type aliases with one that has side effects on loads, so loads
# must not be optimized away) AND THEREFORE also tbl_s_of_s (because
# those loads affect an alias of its type).
print table([11] = F)
&default=function(c: count): bool
{ tbl_s_of_s2["2"] = "two"; return |tbl_s_of_s2| < 9; };
}
########################################################################
# Here we test &default side-effects of record constructors. We also use
# this as an opportunity to stress-test propagation of side effects
# when one side effect induces another, and whether AST analysis correctly
# accounts for potential type aliasing.
# Reads a local table - but that should be enough to raise the specter of
# any "table[count] of bool" access side-effects.
function local_table_read(): count
{
local l = table([99] = T);
return l[99] ? 3 : 2;
}
# The same, but modifies the table instead.
function local_table_mod(): count
{
local l = table([99] = T);
l[99] = F;
return |l|;
}
# Tickles "table[count] of bool" access side-effects.
type R1: record {
a: count &default = local_table_read();
b: string;
};
# Similar, but for modification side-effects. NOTE: type-compatible with R1.
type R2: record {
a: count &default = local_table_mod();
b: string;
};
# Just like R2 but NOT type-compatible with R1.
type R3: record {
a: count &default = local_table_mod();
b: string;
c: int &default = +5;
};
event AST_opt_test_7()
{
# In this test, it's not the mere presence of "R1" that affects
# earlier optimization (like in the previous test), but the execution
# of its constructor. So the following should proceed with the second
# completely re-using from the first.
print tbl_s_of_s["4"];
print tbl_s_of_s["4"];
# Here constructing R1 potentially invokes the &default for $a,
# which reads from a "table[count] of bool", which the previous
# test (AST_opt_test_6) has set up as potentially affecting values
# of type "table[string] of string" such as tbl_s_of_s.
local force_tbl_c_of_c_str_reload = R1($b="match-on-type-of-tbl_c_of_c");
print force_tbl_c_of_c_str_reload;
print tbl_s_of_s["4"];
print tbl_s_of_s["4"];
# Here's a similar sequence but using a record that *modifies*
# a "table[count] of bool" - which ordinarily should NOT thwart
# optimization since there's no potential &on_change attribute for
# such tables ... HOWEVER R2 is type-compatible with R1, so it
# inherits R1's side-effects, thus this WILL require a reload of
# tbl_s_of_s["4"].
local problem_r2 = R2($b="hello");
print tbl_s_of_s["4"], problem_r2$b;
# ... THIS however won't, because R3 is not type-compatible with R1,
# even though it has the same attributes as R2.
local problem_r3 = R3($b="hello again");
print tbl_s_of_s["4"], problem_r3$b;
}
########################################################################
event AST_opt_test_8()
{
# This last one is a hum-dinger. The two locals we define introduce
# mutually recursive side-effects, type-wise: the first sets up that
# a "table[count, count] of count" can access a "table[double, double]
# of double", and the latter establishes the converse. First, this
# setup requires that the profiler doesn't wind up in infinite
# recursion trying to follow dependencies. Second, it should both
# spot those *and* turn them into Unknown side-effects ... which
# means that reuse of expression involving the global g2 - which is
# never altered anywhere - that crosses an access to such a table
# should be recomputed, rather than reusing the previous
# result, because "all bets are off" for Unknown side-effects.
# The second of these should reuse the previous temporary ...
print g2 * 5;
print g2 * 5;
local l1 = table([1, 3] = 4, [2, 5] = 6)
&default = function(i1: count, i2: count): count
{
local my_tbl = table([1.0, 3.0] = 1e4);
return double_to_count(my_tbl[1.0, 3.0]);
};
local l2 = table([1.0, 3.0] = 4.0, [2.0, 5.0] = 6.0)
&default = function(d1: double, d2: double): double
{
local my_tbl = table([1, 3] = 1000);
return my_tbl[1, 3];
};
# ... as should both of these, since we haven't yet done an *access*
# to one of the tables.
print g2 * 5;
print g2 * 5;
# Here's an access.
print l1[3, 8];
# The first of these should recompute, the second should re-use.
print g2 * 5;
print g2 * 5;
# Here's an access to the other.
print l2[2.0, 5.0];
# Again, the first of these should recompute, the second should re-use.
print g2 * 5;
print g2 * 5;
}
########################################################################
event zeek_init()
{
event AST_opt_test_1();
event AST_opt_test_2();
event AST_opt_test_3();
event AST_opt_test_4();
event AST_opt_test_5();
event AST_opt_test_6();
event AST_opt_test_7();
event AST_opt_test_8();
}