Redis: Fix thread-contention issues with Expire(), add more tests

This commit is contained in:
Tim Wojtulewicz 2025-02-27 16:39:41 -07:00
parent b81e876ec8
commit cca1d4f988
7 changed files with 200 additions and 40 deletions

View file

@ -16,6 +16,8 @@
// Anonymous callback handler methods for the hiredis async API.
namespace {
bool during_expire = false;
class Tracer {
public:
Tracer(const std::string& where) : where(where) {} // DBG_LOG(zeek::DBG_STORAGE, "%s", where.c_str()); }
@ -56,17 +58,21 @@ void redisErase(redisAsyncContext* ctx, void* reply, void* privdata) {
backend->HandleEraseResult(static_cast<redisReply*>(reply), callback);
}
void redisZRANGEBYSCORE(redisAsyncContext* ctx, void* reply, void* privdata) {
auto t = Tracer("zrangebyscore");
void redisZADD(redisAsyncContext* ctx, void* reply, void* privdata) {
auto t = Tracer("generic");
auto backend = static_cast<zeek::storage::backend::redis::Redis*>(ctx->data);
backend->HandleZRANGEBYSCORE(static_cast<redisReply*>(reply));
// We don't care about the reply from the ZADD, m1ostly because blocking to poll
// for it adds a bunch of complication to DoPut() with having to handle the
// reply from SET first.
backend->HandleGeneric(nullptr);
freeReplyObject(reply);
}
void redisGeneric(redisAsyncContext* ctx, void* reply, void* privdata) {
auto t = Tracer("generic");
auto backend = static_cast<zeek::storage::backend::redis::Redis*>(ctx->data);
backend->HandleGeneric();
freeReplyObject(reply);
backend->HandleGeneric(static_cast<redisReply*>(reply));
}
// Because we called redisPollAttach in DoOpen(), privdata here is a
@ -74,12 +80,17 @@ void redisGeneric(redisAsyncContext* ctx, void* reply, void* privdata) {
// data, which contains the backend. Because we overrode these callbacks in
// DoOpen, we still want to mimic their callbacks to redisPollTick functions
// correctly.
//
// Additionally, if we're in the middle of running a manual Expire() because
// we're reading a pcap, don't add the file descriptor into iosource_mgr. Manual
// calls to Poll() during that will handle reading/writing any data, and we
// don't want the contention with the main loop.
void redisAddRead(void* privdata) {
auto t = Tracer("addread");
auto rpe = static_cast<redisPollEvents*>(privdata);
auto backend = static_cast<zeek::storage::backend::redis::Redis*>(rpe->context->data);
if ( rpe->reading == 0 )
if ( rpe->reading == 0 && ! during_expire )
zeek::iosource_mgr->RegisterFd(rpe->fd, backend, zeek::iosource::IOSource::READ);
rpe->reading = 1;
}
@ -89,7 +100,7 @@ void redisDelRead(void* privdata) {
auto rpe = static_cast<redisPollEvents*>(privdata);
auto backend = static_cast<zeek::storage::backend::redis::Redis*>(rpe->context->data);
if ( rpe->reading == 1 )
if ( rpe->reading == 1 && ! during_expire )
zeek::iosource_mgr->UnregisterFd(rpe->fd, backend, zeek::iosource::IOSource::READ);
rpe->reading = 0;
}
@ -99,7 +110,7 @@ void redisAddWrite(void* privdata) {
auto rpe = static_cast<redisPollEvents*>(privdata);
auto backend = static_cast<zeek::storage::backend::redis::Redis*>(rpe->context->data);
if ( rpe->writing == 0 )
if ( rpe->writing == 0 && ! during_expire )
zeek::iosource_mgr->RegisterFd(rpe->fd, backend, zeek::iosource::IOSource::WRITE);
rpe->writing = 1;
}
@ -109,11 +120,21 @@ void redisDelWrite(void* privdata) {
auto t = Tracer("delwrite");
auto backend = static_cast<zeek::storage::backend::redis::Redis*>(rpe->context->data);
if ( rpe->writing == 1 )
if ( rpe->writing == 1 && ! during_expire )
zeek::iosource_mgr->UnregisterFd(rpe->fd, backend, zeek::iosource::IOSource::WRITE);
rpe->writing = 0;
}
// Creates a unique_lock based on a condition against a mutex. This is used to
// conditionally lock the expire_mutex. We only need to do it while reading
// pcaps. The only thread contention happens during Expire(), which only happens
// when reading pcaps. It's not worth the cycles to lock the mutex otherwise,
// and hiredis will deal with other cross-command contention correctly as long
// as it's in a single thread.
std::unique_lock<std::mutex> conditionally_lock(bool condition, std::mutex& mutex) {
return condition ? std::unique_lock<std::mutex>(mutex) : std::unique_lock<std::mutex>();
}
} // namespace
namespace zeek::storage::backend::redis {
@ -213,6 +234,8 @@ OperationResult Redis::DoOpen(RecordValPtr options, OpenResultCallback* cb) {
* Finalizes the backend when it's being closed.
*/
OperationResult Redis::DoClose(OperationResultCallback* cb) {
auto locked_scope = conditionally_lock(zeek::run_state::reading_traces, expire_mutex);
connected = false;
redisAsyncDisconnect(async_ctx);
@ -240,6 +263,8 @@ OperationResult Redis::DoPut(ValPtr key, ValPtr value, bool overwrite, double ex
if ( ! connected && ! async_ctx )
return {ReturnCode::NOT_CONNECTED};
auto locked_scope = conditionally_lock(zeek::run_state::reading_traces, expire_mutex);
std::string format = "SET %s:%s %s";
if ( ! overwrite )
format.append(" NX");
@ -251,9 +276,9 @@ OperationResult Redis::DoPut(ValPtr key, ValPtr value, bool overwrite, double ex
// Use built-in expiration if reading live data, since time will move
// forward consistently. If reading pcaps, we'll do something else.
if ( expiration_time > 0.0 && ! zeek::run_state::reading_traces ) {
format.append(" PXAT %d");
format.append(" PXAT %" PRIu64);
status = redisAsyncCommand(async_ctx, redisPut, cb, format.c_str(), key_prefix.data(), json_key.data(),
json_value.data(), static_cast<uint64_t>(expiration_time * 1e6));
json_value.data(), static_cast<uint64_t>(expiration_time * 1e3));
}
else
status = redisAsyncCommand(async_ctx, redisPut, cb, format.c_str(), key_prefix.data(), json_key.data(),
@ -272,7 +297,7 @@ OperationResult Redis::DoPut(ValPtr key, ValPtr value, bool overwrite, double ex
format.append(" NX");
format += " %f %s";
status = redisAsyncCommand(async_ctx, redisGeneric, NULL, format.c_str(), key_prefix.data(), expiration_time,
status = redisAsyncCommand(async_ctx, redisZADD, NULL, format.c_str(), key_prefix.data(), expiration_time,
json_key.data());
if ( connected && status == REDIS_ERR )
return {ReturnCode::OPERATION_FAILED, util::fmt("ZADD operation failed: %s", async_ctx->errstr)};
@ -291,6 +316,8 @@ OperationResult Redis::DoGet(ValPtr key, OperationResultCallback* cb) {
if ( ! connected && ! async_ctx )
return {ReturnCode::NOT_CONNECTED};
auto locked_scope = conditionally_lock(zeek::run_state::reading_traces, expire_mutex);
int status = redisAsyncCommand(async_ctx, redisGet, cb, "GET %s:%s", key_prefix.data(),
key->ToJSON()->ToStdStringView().data());
@ -312,6 +339,8 @@ OperationResult Redis::DoErase(ValPtr key, OperationResultCallback* cb) {
if ( ! connected && ! async_ctx )
return {ReturnCode::NOT_CONNECTED};
auto locked_scope = conditionally_lock(zeek::run_state::reading_traces, expire_mutex);
int status = redisAsyncCommand(async_ctx, redisErase, cb, "DEL %s:%s", key_prefix.data(),
key->ToJSON()->ToStdStringView().data());
@ -328,12 +357,17 @@ void Redis::Expire() {
if ( ! connected || ! zeek::run_state::reading_traces )
return;
int status = redisAsyncCommand(async_ctx, redisZRANGEBYSCORE, NULL, "ZRANGEBYSCORE %s_expire -inf %f",
key_prefix.data(), run_state::network_time);
auto locked_scope = conditionally_lock(zeek::run_state::reading_traces, expire_mutex);
during_expire = true;
int status = redisAsyncCommand(async_ctx, redisGeneric, NULL, "ZRANGEBYSCORE %s_expire -inf %f", key_prefix.data(),
run_state::network_time);
if ( status == REDIS_ERR ) {
// TODO: do something with the error?
printf("ZRANGEBYSCORE command failed: %s\n", async_ctx->errstr);
during_expire = false;
return;
}
@ -347,22 +381,29 @@ void Redis::Expire() {
if ( reply->elements == 0 ) {
freeReplyObject(reply);
during_expire = false;
return;
}
// The data from the reply to ZRANGEBYSCORE gets deleted as part of the
// commands below so we don't need to free it manually. Doing so results in
// a double-free.
std::vector<std::string> elements;
for ( size_t i = 0; i < reply->elements; i++ )
elements.emplace_back(reply->element[i]->str);
freeReplyObject(reply);
// TODO: it's possible to pass multiple keys to a DEL operation but it requires
// building an array of the strings, building up the DEL command with entries,
// and passing the array as a block somehow. There's no guarantee it'd be faster
// anyways.
for ( size_t i = 0; i < reply->elements; i++ ) {
status =
redisAsyncCommand(async_ctx, redisGeneric, NULL, "DEL %s:%s", key_prefix.data(), reply->element[i]->str);
for ( const auto& e : elements ) {
status = redisAsyncCommand(async_ctx, redisGeneric, NULL, "DEL %s:%s", key_prefix.data(), e.c_str());
++active_ops;
Poll();
redisReply* reply = reply_queue.front();
reply_queue.pop_front();
freeReplyObject(reply);
// TODO: do we care if this failed?
}
// Remove all of the elements from the range-set that match the time range.
@ -371,6 +412,11 @@ void Redis::Expire() {
++active_ops;
Poll();
reply = reply_queue.front();
reply_queue.pop_front();
freeReplyObject(reply);
// TODO: do we care if this failed?
}
void Redis::HandlePutResult(redisReply* reply, OperationResultCallback* callback) {
@ -421,9 +467,11 @@ void Redis::HandleEraseResult(redisReply* reply, OperationResultCallback* callba
}
}
void Redis::HandleZRANGEBYSCORE(redisReply* reply) {
void Redis::HandleGeneric(redisReply* reply) {
--active_ops;
reply_queue.push_back(reply);
if ( reply )
reply_queue.push_back(reply);
}
void Redis::OnConnect(int status) {
@ -459,6 +507,8 @@ void Redis::OnDisconnect(int status) {
}
void Redis::ProcessFd(int fd, int flags) {
auto locked_scope = conditionally_lock(zeek::run_state::reading_traces, expire_mutex);
if ( (flags & IOSource::ProcessFlags::READ) != 0 )
redisAsyncHandleRead(async_ctx);
if ( (flags & IOSource::ProcessFlags::WRITE) != 0 )