diff --git a/scripts/base/frameworks/sumstats/plugins/unique.bro b/scripts/base/frameworks/sumstats/plugins/unique.bro index 011949ce2f..60dec66ae4 100644 --- a/scripts/base/frameworks/sumstats/plugins/unique.bro +++ b/scripts/base/frameworks/sumstats/plugins/unique.bro @@ -3,6 +3,11 @@ module SumStats; export { + redef record Reducer += { + ## Maximum number of unique elements to store. + unique_max: count &optional; + }; + redef enum Calculation += { ## Calculate the number of unique values. UNIQUE @@ -16,6 +21,11 @@ export { } redef record ResultVal += { + # Internal use only. This is used when multiple ResultVals + # are being merged and they need to abide the unique limit + # set in the reducer. + unique_max: count &optional; + # Internal use only. This is not meant to be publically available # because we don't want to trust that we can inspect the values # since we will like move to a probalistic data structure in the future. @@ -29,7 +39,12 @@ hook register_observe_plugins() { if ( ! rv?$unique_vals ) rv$unique_vals=set(); - add rv$unique_vals[obs]; + if ( r?$unique_max ) + rv$unique_max=r$unique_max; + + if ( ! r?$unique_max || |rv$unique_vals| <= r$unique_max ) + add rv$unique_vals[obs]; + rv$unique = |rv$unique_vals|; }); } @@ -38,15 +53,31 @@ hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) { if ( rv1?$unique_vals || rv2?$unique_vals ) { + if ( rv1?$unique_max ) + result$unique_max = rv1$unique_max; + else if ( rv2?$unique_max ) + result$unique_max = rv2$unique_max; + if ( rv1?$unique_vals ) result$unique_vals = copy(rv1$unique_vals); if ( rv2?$unique_vals ) + { if ( ! result?$unique_vals ) + { result$unique_vals = copy(rv2$unique_vals); + } else + { for ( val2 in rv2$unique_vals ) + { + if ( result?$unique_max && |result$unique_vals| >= result$unique_max ) + break; + add result$unique_vals[copy(val2)]; + } + } + } result$unique = |result$unique_vals|; } diff --git a/scripts/policy/misc/scan.bro b/scripts/policy/misc/scan.bro index 909ccac02b..b1b63b74da 100644 --- a/scripts/policy/misc/scan.bro +++ b/scripts/policy/misc/scan.bro @@ -52,7 +52,7 @@ export { event bro_init() &priority=5 { - local r1: SumStats::Reducer = [$stream="scan.addr.fail", $apply=set(SumStats::UNIQUE)]; + local r1: SumStats::Reducer = [$stream="scan.addr.fail", $apply=set(SumStats::UNIQUE), $unique_max=double_to_count(addr_scan_threshold+2)]; SumStats::create([$name="addr-scan", $epoch=addr_scan_interval, $reducers=set(r1), @@ -77,7 +77,7 @@ event bro_init() &priority=5 }]); # Note: port scans are tracked similar to: table[src_ip, dst_ip] of set(port); - local r2: SumStats::Reducer = [$stream="scan.port.fail", $apply=set(SumStats::UNIQUE)]; + local r2: SumStats::Reducer = [$stream="scan.port.fail", $apply=set(SumStats::UNIQUE), $unique_max=double_to_count(port_scan_threshold+2)]; SumStats::create([$name="port-scan", $epoch=port_scan_interval, $reducers=set(r2), diff --git a/scripts/policy/protocols/ftp/detect-bruteforcing.bro b/scripts/policy/protocols/ftp/detect-bruteforcing.bro index 36dfafb53a..1af9bb081e 100644 --- a/scripts/policy/protocols/ftp/detect-bruteforcing.bro +++ b/scripts/policy/protocols/ftp/detect-bruteforcing.bro @@ -27,7 +27,7 @@ export { event bro_init() { - local r1: SumStats::Reducer = [$stream="ftp.failed_auth", $apply=set(SumStats::UNIQUE)]; + local r1: SumStats::Reducer = [$stream="ftp.failed_auth", $apply=set(SumStats::UNIQUE), $unique_max=double_to_count(bruteforce_threshold+2)]; SumStats::create([$name="ftp-detect-bruteforcing", $epoch=bruteforce_measurement_interval, $reducers=set(r1), diff --git a/testing/btest/Baseline/scripts.base.frameworks.sumstats.thresholding/.stdout b/testing/btest/Baseline/scripts.base.frameworks.sumstats.thresholding/.stdout index 132a1114fc..3afedd785d 100644 --- a/testing/btest/Baseline/scripts.base.frameworks.sumstats.thresholding/.stdout +++ b/testing/btest/Baseline/scripts.base.frameworks.sumstats.thresholding/.stdout @@ -1,6 +1,6 @@ THRESHOLD_SERIES: hit a threshold series value at 3 for sumstats_key(host=1.2.3.4) -THRESHOLD_SERIES: hit a threshold series value at 6 for sumstats_key(host=1.2.3.4) THRESHOLD: hit a threshold value at 6 for sumstats_key(host=1.2.3.4) -THRESHOLD_SERIES: hit a threshold series value at 1001 for sumstats_key(host=7.2.1.5) +THRESHOLD_SERIES: hit a threshold series value at 6 for sumstats_key(host=1.2.3.4) THRESHOLD: hit a threshold value at 1001 for sumstats_key(host=7.2.1.5) +THRESHOLD_SERIES: hit a threshold series value at 1001 for sumstats_key(host=7.2.1.5) THRESHOLD WITH RATIO BETWEEN REDUCERS: hit a threshold value at 55x for sumstats_key(host=7.2.1.5)