input/Raw: Avoid reading file twice in MODE_REREAD

Found while writing documentation and being confused why
all lines and end_of_data() arrive twice during startup.

The test is a bit fuzzy, but does fail reliably without
the changes to Raw.cc

Also fix not checking dev in the MODE_REREAD path.

Closes #3053
This commit is contained in:
Arne Welzel 2023-05-17 20:55:06 +02:00
parent 3b28f59c3b
commit d8b5bdf758
3 changed files with 76 additions and 2 deletions

View file

@ -291,7 +291,7 @@ bool Raw::OpenInput()
return false; return false;
} }
if ( Info().mode == MODE_STREAM ) if ( Info().mode == MODE_STREAM || Info().mode == MODE_REREAD )
{ {
struct stat sb; struct stat sb;
if ( fstat(fileno(file.get()), &sb) == -1 ) if ( fstat(fileno(file.get()), &sb) == -1 )
@ -300,6 +300,8 @@ bool Raw::OpenInput()
Error(Fmt("Could not get fstat for %s", fname.c_str())); Error(Fmt("Could not get fstat for %s", fname.c_str()));
return false; return false;
} }
mtime = sb.st_mtime;
ino = sb.st_ino; ino = sb.st_ino;
dev = sb.st_dev; dev = sb.st_dev;
} }
@ -587,7 +589,7 @@ bool Raw::DoUpdate()
return false; return false;
} }
if ( sb.st_ino == ino && sb.st_mtime == mtime ) if ( sb.st_dev == dev && sb.st_ino == ino && sb.st_mtime == mtime )
// no change // no change
return true; return true;

View file

@ -0,0 +1,8 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
A::line, 1, First, 5
A::line, 2, Second, 6
A::line, 3, Third, 5
A::line, 4, , 0
A::line, 5, Fourth, 6
end_of_data, input, ./input.log
terminate

View file

@ -0,0 +1,64 @@
# @TEST-DOC: The raw reader would read a file in MODE_REREAD twice initially. Check this is fixed by running with reduced heartbeat_interval and waiting for 20 intervals after the first end_of_data event.
#
# @TEST-EXEC: zeek -b %INPUT > out
# @TEST-EXEC: btest-diff out
@TEST-START-FILE input.log
First
Second
Third
Fourth
@TEST-END-FILE
@load base/frameworks/input
# By default the heartbeat timer is 1sec. To avoid running this test for
# multiple seconds, tune it down 100x. 10msec is still pretty long.
redef Threading::heartbeat_interval = 10msec;
redef exit_only_after_terminate = T;
module A;
type Val: record {
s: string;
};
event do_terminate()
{
if ( zeek_is_terminating() )
return;
print "terminate";
terminate();
}
event Input::end_of_data(name: string, source: string)
{
print "end_of_data", name, source;
schedule 20 * Threading::heartbeat_interval { do_terminate() };
}
global lines = 0;
event A::line(description: Input::EventDescription, tpe: Input::Event, s: string)
{
++lines;
print "A::line", lines, s, |s|;
}
event zeek_init()
{
# In case something goes wrong.
schedule 10sec { do_terminate() };
Input::add_event([
$source="./input.log",
$reader=Input::READER_RAW,
$mode=Input::REREAD,
$name="input",
$fields=Val,
$ev=A::line,
$want_record=F,
]);
}