script optimization support for tracking information associated with BiFs/functions

This commit is contained in:
Vern Paxson 2023-12-04 16:53:57 -08:00
parent ce09a11a0c
commit c028901146
3 changed files with 524 additions and 0 deletions

506
src/script_opt/FuncInfo.cc Normal file
View file

@ -0,0 +1,506 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/FuncInfo.h"
#include <unordered_set>
namespace zeek::detail {
// The following BiFs do not have any script-level side effects. It's
// followed by comments listing the BiFs that have been omitted, and why.
//
// See script_opt/ZAM/maint/README for maintenance of these lists.
static std::unordered_set<std::string> side_effects_free_BiFs = {
"Analyzer::__disable_all_analyzers",
"Analyzer::__disable_analyzer",
"Analyzer::__enable_analyzer",
"Analyzer::__has_tag",
"Analyzer::__name",
"Analyzer::__register_for_port",
"Analyzer::__schedule_analyzer",
"Analyzer::__tag",
"FileExtract::__set_limit",
"Files::__add_analyzer",
"Files::__analyzer_enabled",
"Files::__analyzer_name",
"Files::__disable_analyzer",
"Files::__disable_reassembly",
"Files::__enable_analyzer",
"Files::__enable_reassembly",
"Files::__file_exists",
"Files::__lookup_file",
"Files::__remove_analyzer",
"Files::__set_reassembly_buffer",
"Files::__set_timeout_interval",
"Files::__stop",
"Input::__create_analysis_stream",
"Input::__create_event_stream",
"Input::__create_table_stream",
"Input::__force_update",
"Input::__remove_stream",
"Log::__add_filter",
"Log::__create_stream",
"Log::__disable_stream",
"Log::__enable_stream",
"Log::__flush",
"Log::__remove_filter",
"Log::__remove_stream",
"Log::__set_buf",
"Option::any_set_to_any_vec",
"Option::set_change_handler",
"PacketAnalyzer::GTPV1::remove_gtpv1_connection",
"PacketAnalyzer::TEREDO::remove_teredo_connection",
"PacketAnalyzer::__disable_analyzer",
"PacketAnalyzer::__enable_analyzer",
"PacketAnalyzer::__set_ignore_checksums_nets",
"PacketAnalyzer::register_packet_analyzer",
"PacketAnalyzer::register_protocol_detection",
"PacketAnalyzer::try_register_packet_analyzer_by_name",
"Pcap::error",
"Pcap::findalldevs",
"Pcap::get_filter_state",
"Pcap::get_filter_state_string",
"Pcap::install_pcap_filter",
"Pcap::precompile_pcap_filter",
"Reporter::conn_weird",
"Reporter::error",
"Reporter::fatal",
"Reporter::fatal_error_with_core",
"Reporter::file_weird",
"Reporter::flow_weird",
"Reporter::get_weird_sampling_duration",
"Reporter::get_weird_sampling_global_list",
"Reporter::get_weird_sampling_rate",
"Reporter::get_weird_sampling_threshold",
"Reporter::get_weird_sampling_whitelist",
"Reporter::info",
"Reporter::net_weird",
"Reporter::set_weird_sampling_duration",
"Reporter::set_weird_sampling_global_list",
"Reporter::set_weird_sampling_rate",
"Reporter::set_weird_sampling_threshold",
"Reporter::set_weird_sampling_whitelist",
"Reporter::warning",
"Spicy::__resource_usage",
"Spicy::__toggle_analyzer",
"Supervisor::__create",
"Supervisor::__destroy",
"Supervisor::__init_cluster",
"Supervisor::__is_supervised",
"Supervisor::__is_supervisor",
"Supervisor::__node",
"Supervisor::__restart",
"Supervisor::__status",
"Supervisor::__stem_pid",
"Telemetry::__collect_histogram_metrics",
"Telemetry::__collect_metrics",
"Telemetry::__dbl_counter_family",
"Telemetry::__dbl_counter_inc",
"Telemetry::__dbl_counter_metric_get_or_add",
"Telemetry::__dbl_counter_value",
"Telemetry::__dbl_gauge_dec",
"Telemetry::__dbl_gauge_family",
"Telemetry::__dbl_gauge_inc",
"Telemetry::__dbl_gauge_metric_get_or_add",
"Telemetry::__dbl_gauge_value",
"Telemetry::__dbl_histogram_family",
"Telemetry::__dbl_histogram_metric_get_or_add",
"Telemetry::__dbl_histogram_observe",
"Telemetry::__dbl_histogram_sum",
"Telemetry::__int_counter_family",
"Telemetry::__int_counter_inc",
"Telemetry::__int_counter_metric_get_or_add",
"Telemetry::__int_counter_value",
"Telemetry::__int_gauge_dec",
"Telemetry::__int_gauge_family",
"Telemetry::__int_gauge_inc",
"Telemetry::__int_gauge_metric_get_or_add",
"Telemetry::__int_gauge_value",
"Telemetry::__int_histogram_family",
"Telemetry::__int_histogram_metric_get_or_add",
"Telemetry::__int_histogram_observe",
"Telemetry::__int_histogram_sum",
"__init_primary_bifs",
"__init_secondary_bifs",
"active_file",
"addr_to_counts",
"addr_to_ptr_name",
"addr_to_subnet",
"all_set",
"anonymize_addr",
"any_set",
"backtrace",
"bare_mode",
"bloomfilter_add",
"bloomfilter_basic_init",
"bloomfilter_basic_init2",
"bloomfilter_clear",
"bloomfilter_counting_init",
"bloomfilter_decrement",
"bloomfilter_internal_state",
"bloomfilter_intersect",
"bloomfilter_lookup",
"bloomfilter_merge",
"bytestring_to_count",
"bytestring_to_double",
"bytestring_to_float",
"bytestring_to_hexstr",
"calc_next_rotate",
"cat",
"cat_sep",
"ceil",
"check_subnet",
"clean",
"close",
"community_id_v1",
"compress_path",
"connection_exists",
"continue_processing",
"convert_for_pattern",
"count_substr",
"count_to_double",
"count_to_port",
"count_to_v4_addr",
"counts_to_addr",
"current_analyzer",
"current_event_time",
"current_time",
"decode_base64",
"decode_base64_conn",
"decode_netbios_name",
"decode_netbios_name_type",
"disable_event_group",
"disable_module_events",
"do_profiling",
"double_to_count",
"double_to_int",
"double_to_interval",
"double_to_time",
"dump_current_packet",
"dump_packet",
"dump_rule_stats",
"edit",
"enable_event_group",
"enable_module_events",
"enable_raw_output",
"encode_base64",
"ends_with",
"entropy_test_add",
"entropy_test_finish",
"entropy_test_init",
"enum_names",
"enum_to_int",
"escape_string",
"exit",
"exp",
"file_magic",
"file_mode",
"file_size",
"filter_subnet_table",
"find_all",
"find_all_ordered",
"find_entropy",
"find_last",
"find_str",
"floor",
"flush_all",
"fmt",
"fmt_ftp_port",
"fnv1a32",
"generate_all_events",
"get_broker_stats",
"get_conn_stats",
"get_conn_transport_proto",
"get_contents_file",
"get_current_conn_bytes_threshold",
"get_current_conn_duration_threshold",
"get_current_conn_packets_threshold",
"get_current_packet",
"get_current_packet_header",
"get_dns_stats",
"get_event_handler_stats",
"get_event_stats",
"get_file_analysis_stats",
"get_file_name",
"get_gap_stats",
"get_identifier_comments",
"get_identifier_declaring_script",
"get_login_state",
"get_matcher_stats",
"get_net_stats",
"get_orig_seq",
"get_package_readme",
"get_port_transport_proto",
"get_proc_stats",
"get_reassembler_stats",
"get_record_field_comments",
"get_record_field_declaring_script",
"get_reporter_stats",
"get_resp_seq",
"get_script_comments",
"get_thread_stats",
"get_timer_stats",
"getenv",
"gethostname",
"getpid",
"global_container_footprints",
"global_ids",
"global_options",
"gsub",
"has_event_group",
"has_module_events",
"have_spicy",
"have_spicy_analyzers",
"haversine_distance",
"hexdump",
"hexstr_to_bytestring",
"hll_cardinality_add",
"hll_cardinality_copy",
"hll_cardinality_estimate",
"hll_cardinality_init",
"hll_cardinality_merge_into",
"hrw_weight",
"identify_data",
"install_dst_addr_filter",
"install_dst_net_filter",
"install_src_addr_filter",
"install_src_net_filter",
"int_to_count",
"int_to_double",
"interval_to_double",
"is_alnum",
"is_alpha",
"is_ascii",
"is_file_analyzer",
"is_icmp_port",
"is_local_interface",
"is_num",
"is_packet_analyzer",
"is_processing_suspended",
"is_protocol_analyzer",
"is_remote_event",
"is_tcp_port",
"is_udp_port",
"is_v4_addr",
"is_v4_subnet",
"is_v6_addr",
"is_v6_subnet",
"is_valid_ip",
"join_string_set",
"join_string_vec",
"levenshtein_distance",
"ljust",
"ln",
"load_CPP",
"log10",
"log2",
"lookup_ID",
"lookup_addr",
"lookup_autonomous_system",
"lookup_connection",
"lookup_hostname",
"lookup_hostname_txt",
"lookup_location",
"lstrip",
"mask_addr",
"match_signatures",
"matching_subnets",
"md5_hash",
"md5_hash_finish",
"md5_hash_init",
"md5_hash_update",
"md5_hmac",
"mkdir",
"mmdb_open_asn_db",
"mmdb_open_location_db",
"network_time",
"open",
"open_for_append",
"packet_source",
"paraglob_equals",
"paraglob_init",
"paraglob_match",
"parse_distinguished_name",
"parse_eftp_port",
"parse_ftp_epsv",
"parse_ftp_pasv",
"parse_ftp_port",
"piped_exec",
"port_to_count",
"pow",
"preserve_prefix",
"preserve_subnet",
"print_raw",
"ptr_name_to_addr",
"rand",
"raw_bytes_to_v4_addr",
"raw_bytes_to_v6_addr",
"reading_live_traffic",
"reading_traces",
"record_fields",
"record_type_to_vector",
"remask_addr",
"remove_prefix",
"remove_suffix",
"rename",
"reverse",
"rfind_str",
"rjust",
"rmdir",
"rotate_file",
"rotate_file_by_name",
"routing0_data_to_addrs",
"rstrip",
"safe_shell_quote",
"same_object",
"sct_verify",
"set_buf",
"set_contents_file",
"set_current_conn_bytes_threshold",
"set_current_conn_duration_threshold",
"set_current_conn_packets_threshold",
"set_file_handle",
"set_inactivity_timeout",
"set_keys",
"set_login_state",
"set_network_time",
"set_record_packets",
"set_secret",
"set_ssl_established",
"setenv",
"sha1_hash",
"sha1_hash_finish",
"sha1_hash_init",
"sha1_hash_update",
"sha256_hash",
"sha256_hash_finish",
"sha256_hash_init",
"sha256_hash_update",
"skip_further_processing",
"skip_http_entity_data",
"skip_smtp_data",
"split_string",
"split_string1",
"split_string_all",
"split_string_n",
"sqrt",
"srand",
"starts_with",
"str_smith_waterman",
"str_split_indices",
"strcmp",
"strftime",
"string_cat",
"string_fill",
"string_to_ascii_hex",
"string_to_pattern",
"strip",
"strptime",
"strstr",
"sub",
"sub_bytes",
"subnet_to_addr",
"subnet_width",
"subst_string",
"suspend_processing",
"swap_case",
"syslog",
"system",
"system_env",
"table_keys",
"table_values",
"terminate",
"time_to_double",
"to_addr",
"to_count",
"to_double",
"to_int",
"to_json",
"to_lower",
"to_port",
"to_string_literal",
"to_subnet",
"to_title",
"to_upper",
"topk_add",
"topk_count",
"topk_epsilon",
"topk_get_top",
"topk_init",
"topk_merge",
"topk_merge_prune",
"topk_size",
"topk_sum",
"type_aliases",
"type_name",
"unescape_URI",
"uninstall_dst_addr_filter",
"uninstall_dst_net_filter",
"uninstall_src_addr_filter",
"uninstall_src_net_filter",
"unique_id",
"unique_id_from",
"unlink",
"uuid_to_string",
"val_footprint",
"write_file",
"x509_check_cert_hostname",
"x509_check_hostname",
"x509_from_der",
"x509_get_certificate_string",
"x509_issuer_name_hash",
"x509_ocsp_verify",
"x509_parse",
"x509_set_certificate_cache",
"x509_set_certificate_cache_hit_callback",
"x509_spki_hash",
"x509_subject_name_hash",
"x509_verify",
"zeek_args",
"zeek_is_terminating",
"zeek_version",
"zfill",
};
// Ones not listed:
//
// Broker::*
// These can manipulate unspecified (at script level) records.
//
// Cluster::publish_hrw
// Cluster::publish_rr
// These call script functions to get topic names.
//
// Log::__write
// Calls log policy functions.
//
// Option::set
// Both explicitly changes a global and potentially calls a
// function specified at run-time.
//
// clear_table
// Both clears a set/table and potentially calls an &on_change handler.
//
// disable_analyzer
// Can call Analyzer::disabling_analyzer hook.
//
// from_json
// Can call a normalization function.
//
// order
// Can call a comparison function.
//
// resize
// Changes a vector in place.
//
// sort
// Both changes a vector in place and can call an arbitrary comparison
// function.
//
// Some of these have side effects that could be checked for in a specific
// context, but the gains from doing so likely aren't worth the complexity.
bool is_side_effect_free(std::string func_name) { return side_effects_free_BiFs.count(func_name) > 0; }
} // namespace zeek::detail