broker/store: Extend SQLiteOptions around data safety and performance

Add configurability of synchronous and journal_mode for SQLite backed
Broker data stores. Setting these to synchronous=normal and journal_mode=wal
can significantly improve throughput at the cost of some durability in
the presence of power loss or OS crash. In the context of Zeek, this is
likely more than acceptable.

Additionally, add integrity_check and failure_mode options to support deleting
and re-opening a corrupted SQLite database at store creation.

Closes #2698
This commit is contained in:
Arne Welzel 2023-01-20 16:15:23 +01:00
parent b720f71e5e
commit f35cf228dc
11 changed files with 271 additions and 3 deletions

9
NEWS
View file

@ -165,6 +165,15 @@ New Functionality
of new analyzers as well as for collecting operational data in production of new analyzers as well as for collecting operational data in production
environments. environments.
- Expose configurability of for SQLite's synchronous and journal_mode PRAGMAs
for SQLite backed Broker data stores. Setting these to synchronous=normal
and journal_mode=wal can significantly improve throughput at the cost of
some durability in the presence of power loss or OS crash. In the context
of Zeek, this is likely more than acceptable.
Additionally, add integrity_check and failure_mode options to support
detecting and deleting corrupted SQLite database at store initialization.
Changed Functionality Changed Functionality
--------------------- ---------------------

View file

@ -57,12 +57,62 @@ export {
SQLITE, SQLITE,
}; };
## Behavior when the SQLite database file is found to be corrupt
## or otherwise fails to open or initialize.
type SQLiteFailureMode: enum {
SQLITE_FAILURE_MODE_FAIL, ##< Fail during initialization.
SQLITE_FAILURE_MODE_DELETE, ##< Attempt to delete the database file and retry.
};
## Values supported for SQLite's PRAGMA synchronous statement.
type SQLiteSynchronous: enum {
SQLITE_SYNCHRONOUS_OFF,
SQLITE_SYNCHRONOUS_NORMAL,
SQLITE_SYNCHRONOUS_FULL,
SQLITE_SYNCHRONOUS_EXTRA,
};
## Values supported for SQLite's PRAGMA journal_mode statement.
type SQLiteJournalMode: enum {
SQLITE_JOURNAL_MODE_DELETE,
SQLITE_JOURNAL_MODE_WAL,
};
## Options to tune the SQLite storage backend. ## Options to tune the SQLite storage backend.
type SQLiteOptions: record { type SQLiteOptions: record {
## File system path of the database. ## File system path of the database.
## If left empty, will be derived from the name of the store, ## If left empty, will be derived from the name of the store,
## and use the '.sqlite' file suffix. ## and use the '.sqlite' file suffix.
path: string &default = ""; path: string &default = "";
## If set, runs the PRAGMA synchronous statement with the
## provided value after connecting to the SQLite database. See
## `SQLite's synchronous documentation <https://www.sqlite.org/pragma.html#pragma_synchronous>`_
## for more details around performance and data safety trade offs.
synchronous: SQLiteSynchronous &optional;
## If set, runs the PRAGMA journal_mode statement with the
## provided value after connecting to the SQLite database. See
## `SQLite's journal_mode documentation <https://www.sqlite.org/pragma.html#pragma_journal_mode>`_
## for more details around performance, data safety trade offs
## and interaction with the PRAGMA synchronous statement.
journal_mode: SQLiteJournalMode &optional;
## What to do when the database is found corrupt during
## initialization. When set to SQLITE_FAILURE_MODE_DELETE,
## the old file is deleted to allow creation of a new and empty
## database. By default, an error is reported, the corrupt
## database file left in place and the data store is in a
## non-functional state.
failure_mode: SQLiteFailureMode &default=SQLITE_FAILURE_MODE_FAIL;
## When true, run the PRAGMA integrity_check statement after
## opening the database and fail according to ``failure_mode``.
## PRAGMA integrity_check may take a non-negligible amount of time,
## so you are advised to experiment with the expected sizes
## of your databases if that is acceptable. Corrupted databases
## should be reliably detected when this setting is ``T``.
integrity_check: bool &default=F;
}; };
## Options to tune the particular storage backends. ## Options to tune the particular storage backends.

View file

@ -66,19 +66,42 @@ broker::backend to_backend_type(BifEnum::Broker::BackendType type)
broker::backend_options to_backend_options(broker::backend backend, RecordVal* options) broker::backend_options to_backend_options(broker::backend backend, RecordVal* options)
{ {
static auto failure_mode_type = id::find_type("Broker::SQLiteFailureMode")->AsEnumType();
static auto sqlite_synchronous_type = id::find_type("Broker::SQLiteSynchronous")->AsEnumType();
static auto sqlite_journal_mode_type = id::find_type("Broker::SQLiteJournalMode")->AsEnumType();
broker::backend_options result;
switch ( backend ) switch ( backend )
{ {
case broker::backend::sqlite: case broker::backend::sqlite:
{ {
auto path = options->GetFieldAs<RecordVal>(0)->GetFieldAs<StringVal>(0)->CheckString(); auto sqlite_opts = options->GetField<RecordVal>("sqlite");
return {{"path", path}}; result["path"] = sqlite_opts->GetField<StringVal>("path")->CheckString();
if ( auto synchronous = sqlite_opts->GetField<EnumVal>("synchronous") )
result["synchronous"] = broker::enum_value(
sqlite_synchronous_type->Lookup(synchronous->Get()));
if ( auto journal_mode = sqlite_opts->GetField<EnumVal>("journal_mode") )
result["journal_mode"] = broker::enum_value(
sqlite_journal_mode_type->Lookup(journal_mode->Get()));
auto failure_mode = sqlite_opts->GetField<EnumVal>("failure_mode");
result["failure_mode"] = broker::enum_value(
failure_mode_type->Lookup(failure_mode->Get()));
auto integrity_check = sqlite_opts->GetField<BoolVal>("integrity_check")->Get();
result["integrity_check"] = integrity_check;
break;
} }
default: default:
break; break;
} }
return broker::backend_options{}; return result;
} }
} // namespace zeek::Broker } // namespace zeek::Broker

View file

@ -0,0 +1,6 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
store is open
populated 100 rows
store is open
populated 100 rows
100

View file

@ -0,0 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
store is open
populated 100 rows
failed to open store

View file

@ -0,0 +1,5 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
store is open
populated 100 rows
store is open
populated 100 rows

View file

@ -0,0 +1,4 @@
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63.
populated 100 rows
wal
100

View file

@ -0,0 +1,46 @@
# @TEST-DOC: Populate a database, corrupt it then observe Zeek's behavior deleting the database and reopening it.
# @TEST-REQUIRES: dd --version
# @TEST-REQUIRES: sqlite3 --version
# @TEST-REQUIRES: test -e /dev/zero
# @TEST-EXEC: zeek -b %INPUT >> out
# Evil
# @TEST-EXEC: dd if=/dev/zero of=path_to_db.sqlite seek=512 count=32 bs=1
# @TEST-EXEC: zeek -b %INPUT >> out
# This will find 100 rows, the previous DB was deleted.
# @TEST-EXEC: sqlite3 ./path_to_db.sqlite 'select count(*) from store' >> out;
#
# @TEST-EXEC: grep 'database disk image is malformed' .stderr
# @TEST-EXEC: btest-diff out
@load base/frameworks/broker/store
global test_store: opaque of Broker::Store;
global test_table: table[string] of count &broker_store="test_store_42";
event zeek_init()
{
test_store = Broker::create_master(
"test_store_42",
Broker::SQLITE,
Broker::BackendOptions(
$sqlite=Broker::SQLiteOptions(
$path="path_to_db.sqlite",
$failure_mode=Broker::SQLITE_FAILURE_MODE_DELETE,
),
),
);
if ( Broker::is_closed(test_store) ) {
print("failed to open store");
exit(1);
} else {
print("store is open");
}
local rows = 100;
local i = 0;
while ( ++i <= rows )
test_table[cat(|test_table|)] = i;
print fmt("populated %s rows", rows);
}

View file

@ -0,0 +1,41 @@
# @TEST-DOC: Populate a database, corrupt it then observe Zeek's behavior not being able to open the database and store.
# @TEST-REQUIRES: dd --version
# @TEST-REQUIRES: test -e /dev/zero
# @TEST-EXEC: zeek -b %INPUT >> out
# Evil
# @TEST-EXEC: dd if=/dev/zero of=path_to_db.sqlite seek=512 count=32 bs=1
# @TEST-EXEC-FAIL: zeek -b %INPUT >> out
#
# @TEST-EXEC: grep 'database disk image is malformed' .stderr
# @TEST-EXEC: btest-diff out
@load base/frameworks/broker/store
global test_store: opaque of Broker::Store;
global test_table: table[string] of count &broker_store="test_store_42";
event zeek_init()
{
test_store = Broker::create_master(
"test_store_42",
Broker::SQLITE,
Broker::BackendOptions(
$sqlite=Broker::SQLiteOptions(
$path="path_to_db.sqlite",
),
),
);
if ( Broker::is_closed(test_store) ) {
print("failed to open store");
exit(1);
} else {
print("store is open");
}
local rows = 100;
local i = 0;
while ( ++i <= rows )
test_table[cat(|test_table|)] = i;
print fmt("populated %s rows", rows);
}

View file

@ -0,0 +1,37 @@
# @TEST-DOC: Use SQLite backend option integrity_check, but not breaking anything.
# @TEST-EXEC: zeek -b %INPUT >> out
# @TEST-EXEC: zeek -b %INPUT >> out
# @TEST-EXEC: btest-diff out
@load base/frameworks/broker/store
global test_store: opaque of Broker::Store;
global test_table: table[string] of count &broker_store="test_store_42";
event zeek_init()
{
test_store = Broker::create_master(
"test_store_42",
Broker::SQLITE,
Broker::BackendOptions(
$sqlite=Broker::SQLiteOptions(
$path="path_to_db.sqlite",
$integrity_check=T,
),
),
);
if ( Broker::is_closed(test_store) ) {
print("failed to open store");
exit(1);
} else {
print("store is open");
}
local rows = 100;
local i = 0;
while ( ++i <= rows )
test_table[cat(|test_table|)] = i;
print fmt("populated %s rows", rows);
}

View file

@ -0,0 +1,43 @@
# @TEST-DOC: Configure a broker store to be in WAL mode withou journal_mode NORMAL.
# @TEST-REQUIRES: sqlite3 --version
# @TEST-EXEC: zeek -b %INPUT > out 2>&1
#
# This is poking a bit at SQLite internals, but because WAL mode
# was flipped on, expect a wal and a shm file to exist.
# @TEST-EXEC: test -f path_to_db.sqlite || ls -lha >> out
# @TEST-EXEC: test -f path_to_db.sqlite-shm || ls -lha >> out
# @TEST-EXEC: test -f path_to_db.sqlite-wal || ls -lha >> out
# More poking, running sqlite3 should detect WAL mode, and the store
# table has 100 entries.
#
# @TEST-EXEC: sqlite3 ./path_to_db.sqlite 'PRAGMA journal_mode' >> out;
# @TEST-EXEC: sqlite3 ./path_to_db.sqlite 'select count(*) from store' >> out;
#
# @TEST-EXEC: btest-diff out
@load base/frameworks/broker/store
global test_store: opaque of Broker::Store;
global test_table: table[string] of count &broker_store="test_store_42";
event zeek_init()
{
test_store = Broker::create_master(
"test_store_42",
Broker::SQLITE,
Broker::BackendOptions(
$sqlite=Broker::SQLiteOptions(
$path="path_to_db.sqlite",
$synchronous=Broker::SQLITE_SYNCHRONOUS_NORMAL,
$journal_mode=Broker::SQLITE_JOURNAL_MODE_WAL,
),
),
);
local rows = 100;
local i = 0;
while ( ++i <= rows )
test_table[cat(|test_table|)] = i;
print fmt("populated %s rows", rows);
}