From 1fa182c16918d258cbda6bfc69b3394103d4313f Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 18 Jul 2012 00:00:31 -0400 Subject: [PATCH] Some better elasticsearch reliability. - Added a configurable option for timing out ES HTTP requests. - Stop sending reporter messages after one message for one failure. --- .../logging/writers/elasticsearch.bro | 3 ++ src/logging.bif | 1 + src/logging/writers/ElasticSearch.cc | 38 ++++++++++++++----- src/logging/writers/ElasticSearch.h | 2 + 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/scripts/base/frameworks/logging/writers/elasticsearch.bro b/scripts/base/frameworks/logging/writers/elasticsearch.bro index 93c6c98705..adc675e487 100644 --- a/scripts/base/frameworks/logging/writers/elasticsearch.bro +++ b/scripts/base/frameworks/logging/writers/elasticsearch.bro @@ -17,6 +17,9 @@ export { ## e.g. prefix = "bro_" would create types of bro_dns, bro_software, etc. const type_prefix = "" &redef; + ## The time before an ElasticSearch transfer will timeout. + const transfer_timeout = 2secs; + ## The batch size is the number of messages that will be queued up before ## they are sent to be bulk indexed. ## Note: this is mainly a memory usage parameter. diff --git a/src/logging.bif b/src/logging.bif index 3cdb414d80..7e50a9d285 100644 --- a/src/logging.bif +++ b/src/logging.bif @@ -91,6 +91,7 @@ const server_host: string; const server_port: count; const index_prefix: string; const type_prefix: string; +const transfer_timeout: interval; const max_batch_size: count; const max_batch_interval: interval; const max_byte_size: count; diff --git a/src/logging/writers/ElasticSearch.cc b/src/logging/writers/ElasticSearch.cc index 1b8dfa495d..71be036a72 100644 --- a/src/logging/writers/ElasticSearch.cc +++ b/src/logging/writers/ElasticSearch.cc @@ -42,7 +42,10 @@ ElasticSearch::ElasticSearch(WriterFrontend* frontend) : WriterBackend(frontend) current_index = string(); prev_index = string(); last_send = current_time(); + failing = false; + transfer_timeout = BifConst::LogElasticSearch::transfer_timeout * 1000; + curl_handle = HTTPSetup(); } @@ -77,12 +80,13 @@ bool ElasticSearch::BatchIndex() curl_easy_setopt(curl_handle, CURLOPT_POST, 1); curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE_LARGE, (curl_off_t)buffer.Len()); curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, buffer.Bytes()); - HTTPSend(curl_handle); - + failing = ! HTTPSend(curl_handle); + + // We are currently throwing the data out regardless of if the send failed. Fire and forget! buffer.Clear(); counter = 0; last_send = current_time(); - + return true; } @@ -347,6 +351,8 @@ bool ElasticSearch::HTTPSend(CURL *handle) // HTTP 1.1 likes to use chunked encoded transfers, which aren't good for speed. // The best (only?) way to disable that is to just use HTTP 1.0 curl_easy_setopt(handle, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0); + + curl_easy_setopt(handle, CURLOPT_TIMEOUT_MS, transfer_timeout); CURLcode return_code = curl_easy_perform(handle); @@ -355,21 +361,35 @@ bool ElasticSearch::HTTPSend(CURL *handle) case CURLE_COULDNT_CONNECT: case CURLE_COULDNT_RESOLVE_HOST: case CURLE_WRITE_ERROR: - return false; + case CURLE_RECV_ERROR: + { + if ( ! failing ) + Error(Fmt("ElasticSearch server may not be accessible.")); + } + + case CURLE_OPERATION_TIMEDOUT: + { + if ( ! failing ) + Warning(Fmt("HTTP operation with elasticsearch server timed out at %" PRIu64 " msecs.", transfer_timeout)); + } case CURLE_OK: { uint http_code = 0; curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_code); - if ( http_code != 200 ) - Error(Fmt("Received a non-successful status code back from ElasticSearch server.")); - - return true; + if ( http_code == 200 ) + // Hopefully everything goes through here. + return true; + else if ( ! failing ) + Error(Fmt("Received a non-successful status code back from ElasticSearch server, check the elasticsearch server log.")); } default: - return true; + { + } } + // The "successful" return happens above + return false; } #endif diff --git a/src/logging/writers/ElasticSearch.h b/src/logging/writers/ElasticSearch.h index 375845b002..60977f7737 100644 --- a/src/logging/writers/ElasticSearch.h +++ b/src/logging/writers/ElasticSearch.h @@ -65,6 +65,8 @@ private: string path; string index_prefix; + uint64 transfer_timeout; + bool failing; uint64 batch_size; };