From e8de3de2d488218380affad222a70d680b4858d7 Mon Sep 17 00:00:00 2001 From: Justin Azoff Date: Thu, 1 May 2025 17:42:17 -0400 Subject: [PATCH 1/2] Optimize software found cluster communication As a followup to 3bf8c8ceb6d4b5d3444e43d9e0212ec8bef35574 that added the parse cache, add a small short lived cache on the workers to effectively debounce the number of Software::new events sent up to the proxies. User-Agents are highly repetitive, workers often see exact duplicate user-agents on the same orig_h. Worse, due to NAT, virtualization, and the proliferation of Electron based applications, variations of the same user-agent can be seen at the same time. For example: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.18 Safari/537.36 Zoom/6.2.0 (1855) When these two user-agents are seen concurrently, the software framework will log each flip as a new user-agent. This can be fixed separately on the proxy side, but a reduction of Software::new events is still needed to reduce cluster communication overhead as well as the load on the proxies. With a 10 minute cache on the workers, this should greatly reduce the number of redundant user-agents logged in the software.log --- scripts/base/frameworks/software/main.zeek | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/base/frameworks/software/main.zeek b/scripts/base/frameworks/software/main.zeek index 40d39ee21c..863eb1e106 100644 --- a/scripts/base/frameworks/software/main.zeek +++ b/scripts/base/frameworks/software/main.zeek @@ -239,7 +239,10 @@ function parse(unparsed_version: string): Description return [$version=v, $unparsed_version=unparsed_version, $name=alternate_names[software_name]]; } +# A cache for the proxies that stores the result of parsing unparsed_version. global parse_cache: table[string] of Description &read_expire=65secs; +# A suppression cache for the workers to prevent sending the same information to the proxies multiple times. +global found_cache: set[Info] &create_expire=10mins; # Call parse, but cache results in the parse_cache table function parse_with_cache(unparsed_version: string): Description @@ -523,6 +526,11 @@ function found(id: conn_id, info: Info): bool if ( ! info$force_log && ! addr_matches_host(info$host, asset_tracking) ) return F; + # This assumes that callers do not fill in info$ts, none of the current callers do. + if (info in found_cache) + return T; + add found_cache[info]; + if ( ! info?$ts ) info$ts = network_time(); From 1f346453353d7bd903b92052db3afbade3cb0288 Mon Sep 17 00:00:00 2001 From: Justin Date: Mon, 5 May 2025 15:08:37 -0400 Subject: [PATCH 2/2] Update scripts/base/frameworks/software/main.zeek Add appropriate white space in if statement Co-authored-by: Arne Welzel --- scripts/base/frameworks/software/main.zeek | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base/frameworks/software/main.zeek b/scripts/base/frameworks/software/main.zeek index 863eb1e106..da35b68ddc 100644 --- a/scripts/base/frameworks/software/main.zeek +++ b/scripts/base/frameworks/software/main.zeek @@ -527,7 +527,7 @@ function found(id: conn_id, info: Info): bool return F; # This assumes that callers do not fill in info$ts, none of the current callers do. - if (info in found_cache) + if ( info in found_cache ) return T; add found_cache[info];