From 1f7f16be9dc6476174abb03b78f94990d3fd5992 Mon Sep 17 00:00:00 2001 From: Justin Azoff Date: Fri, 29 Jul 2016 12:26:04 -0400 Subject: [PATCH 1/2] Also track recent_global_view_keys on manager Previously, recent_global_view_keys was only tracked on workers causing a popular key to be sent up and handled by the manager once for each worker. This records the key inside recent_global_view_keys on the manager after the first update, making the rest of the updates no-ops. Additionally, since the counter value was never used, it has been changed from a table to a set. --- scripts/base/frameworks/sumstats/cluster.bro | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/base/frameworks/sumstats/cluster.bro b/scripts/base/frameworks/sumstats/cluster.bro index b609abf472..0286e0f6e6 100644 --- a/scripts/base/frameworks/sumstats/cluster.bro +++ b/scripts/base/frameworks/sumstats/cluster.bro @@ -61,12 +61,11 @@ redef Cluster::manager2worker_events += /SumStats::(get_a_key)/; redef Cluster::worker2manager_events += /SumStats::cluster_(send_result|key_intermediate_response)/; redef Cluster::worker2manager_events += /SumStats::(send_a_key|send_no_key)/; +# This variable is maintained to know what keys have recently sent or received +# intermediate updates so they don't overwhelm the manager. +global recent_global_view_keys: set[string, Key] &create_expire=1min; + @if ( Cluster::local_node_type() != Cluster::MANAGER ) -# This variable is maintained to know what keys have recently sent as -# intermediate updates so they don't overwhelm their manager. The count that is -# yielded is the number of times the percentage threshold has been crossed and -# an intermediate result has been received. -global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0; # Result tables indexed on a uid that are currently being sent to the # manager. @@ -76,8 +75,7 @@ global sending_results: table[string] of ResultTable = table() &read_expire=1min # being collected somewhere other than a worker. function data_added(ss: SumStat, key: Key, result: Result) { - # If an intermediate update for this value was sent recently, don't send - # it again. + # If an intermediate update for this key was sent recently, don't send it again if ( [ss$name, key] in recent_global_view_keys ) return; @@ -88,7 +86,7 @@ function data_added(ss: SumStat, key: Key, result: Result) { # kick off intermediate update event SumStats::cluster_key_intermediate_response(ss$name, key); - ++recent_global_view_keys[ss$name, key]; + add recent_global_view_keys[ss$name, key]; } } @@ -441,6 +439,10 @@ event SumStats::cluster_key_intermediate_response(ss_name: string, key: Key) { #print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr); #print fmt("MANAGER: requesting key data for %s", key); + # If an intermediate update for this key was handled recently, don't do it again + if ( [ss_name, key] in recent_global_view_keys ) + return; + add recent_global_view_keys[ss_name, key]; if ( ss_name in outstanding_global_views && |outstanding_global_views[ss_name]| > max_outstanding_global_views ) From f80f2f2a08c35af45f0aa61dd7f695317957c1d6 Mon Sep 17 00:00:00 2001 From: Justin Azoff Date: Fri, 29 Jul 2016 12:54:20 -0400 Subject: [PATCH 2/2] Track outstanding_global_views updates by uid Currently outstanding_global_views values are only decremented during the end of epoch cleanup, but not when handle_end_of_result_collection is called for the specific uid that actually triggered the result collection (which is specifically NOT a cleanup event). This changes outstanding_global_views values to be a set of outstanding uids, instead of a count. This allows handle_end_of_result_collection to remove any uids from the set as it sees them. --- scripts/base/frameworks/sumstats/cluster.bro | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/base/frameworks/sumstats/cluster.bro b/scripts/base/frameworks/sumstats/cluster.bro index 0286e0f6e6..c1e0cb0257 100644 --- a/scripts/base/frameworks/sumstats/cluster.bro +++ b/scripts/base/frameworks/sumstats/cluster.bro @@ -237,7 +237,7 @@ global dynamic_requests: set[string] &read_expire=1min; # to too many intermediate updates. Each sumstat is tracked separately so that # one won't overwhelm and degrade other quieter sumstats. # Indexed on a sumstat id. -global outstanding_global_views: table[string] of count &read_expire=1min &default=0; +global outstanding_global_views: table[string] of set[string] &read_expire=1min; const zero_time = double_to_time(0.0); # Managers handle logging. @@ -303,12 +303,10 @@ function handle_end_of_result_collection(uid: string, ss_name: string, key: Key, ss$epoch_result(now, key, ir); } - # Check that there is an outstanding view before subtracting. - # Global views only apply to non-dynamic requests. Dynamic - # requests must be serviced. - if ( outstanding_global_views[ss_name] > 0 ) - --outstanding_global_views[ss_name]; } + # Check if this was an intermediate update + if ( ss_name in outstanding_global_views ) + delete outstanding_global_views[ss_name][uid]; delete key_requests[uid]; delete done_with[uid]; @@ -444,8 +442,9 @@ event SumStats::cluster_key_intermediate_response(ss_name: string, key: Key) return; add recent_global_view_keys[ss_name, key]; - if ( ss_name in outstanding_global_views && - |outstanding_global_views[ss_name]| > max_outstanding_global_views ) + if ( ss_name !in outstanding_global_views) + outstanding_global_views[ss_name] = set(); + else if ( |outstanding_global_views[ss_name]| > max_outstanding_global_views ) { # Don't do this intermediate update. Perhaps at some point in the future # we will queue and randomly select from these ignored intermediate @@ -453,9 +452,8 @@ event SumStats::cluster_key_intermediate_response(ss_name: string, key: Key) return; } - ++outstanding_global_views[ss_name]; - local uid = unique_id(""); + add outstanding_global_views[ss_name][uid]; done_with[uid] = 0; #print fmt("requesting results for: %s", uid); event SumStats::cluster_get_result(uid, ss_name, key, F);