diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8552ad3105..37f6109cd2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -395,6 +395,7 @@ set(MAIN_SRCS script_opt/CPP/Vars.cc ${_gen_zeek_script_cpp} script_opt/Expr.cc + script_opt/FuncInfo.cc script_opt/GenIDDefs.cc script_opt/IDOptInfo.cc script_opt/Inline.cc diff --git a/src/script_opt/FuncInfo.cc b/src/script_opt/FuncInfo.cc new file mode 100644 index 0000000000..57730670e0 --- /dev/null +++ b/src/script_opt/FuncInfo.cc @@ -0,0 +1,506 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/FuncInfo.h" + +#include + +namespace zeek::detail { + +// The following BiFs do not have any script-level side effects. It's +// followed by comments listing the BiFs that have been omitted, and why. +// +// See script_opt/ZAM/maint/README for maintenance of these lists. + +static std::unordered_set side_effects_free_BiFs = { + "Analyzer::__disable_all_analyzers", + "Analyzer::__disable_analyzer", + "Analyzer::__enable_analyzer", + "Analyzer::__has_tag", + "Analyzer::__name", + "Analyzer::__register_for_port", + "Analyzer::__schedule_analyzer", + "Analyzer::__tag", + "FileExtract::__set_limit", + "Files::__add_analyzer", + "Files::__analyzer_enabled", + "Files::__analyzer_name", + "Files::__disable_analyzer", + "Files::__disable_reassembly", + "Files::__enable_analyzer", + "Files::__enable_reassembly", + "Files::__file_exists", + "Files::__lookup_file", + "Files::__remove_analyzer", + "Files::__set_reassembly_buffer", + "Files::__set_timeout_interval", + "Files::__stop", + "Input::__create_analysis_stream", + "Input::__create_event_stream", + "Input::__create_table_stream", + "Input::__force_update", + "Input::__remove_stream", + "Log::__add_filter", + "Log::__create_stream", + "Log::__disable_stream", + "Log::__enable_stream", + "Log::__flush", + "Log::__remove_filter", + "Log::__remove_stream", + "Log::__set_buf", + "Option::any_set_to_any_vec", + "Option::set_change_handler", + "PacketAnalyzer::GTPV1::remove_gtpv1_connection", + "PacketAnalyzer::TEREDO::remove_teredo_connection", + "PacketAnalyzer::__disable_analyzer", + "PacketAnalyzer::__enable_analyzer", + "PacketAnalyzer::__set_ignore_checksums_nets", + "PacketAnalyzer::register_packet_analyzer", + "PacketAnalyzer::register_protocol_detection", + "PacketAnalyzer::try_register_packet_analyzer_by_name", + "Pcap::error", + "Pcap::findalldevs", + "Pcap::get_filter_state", + "Pcap::get_filter_state_string", + "Pcap::install_pcap_filter", + "Pcap::precompile_pcap_filter", + "Reporter::conn_weird", + "Reporter::error", + "Reporter::fatal", + "Reporter::fatal_error_with_core", + "Reporter::file_weird", + "Reporter::flow_weird", + "Reporter::get_weird_sampling_duration", + "Reporter::get_weird_sampling_global_list", + "Reporter::get_weird_sampling_rate", + "Reporter::get_weird_sampling_threshold", + "Reporter::get_weird_sampling_whitelist", + "Reporter::info", + "Reporter::net_weird", + "Reporter::set_weird_sampling_duration", + "Reporter::set_weird_sampling_global_list", + "Reporter::set_weird_sampling_rate", + "Reporter::set_weird_sampling_threshold", + "Reporter::set_weird_sampling_whitelist", + "Reporter::warning", + "Spicy::__resource_usage", + "Spicy::__toggle_analyzer", + "Supervisor::__create", + "Supervisor::__destroy", + "Supervisor::__init_cluster", + "Supervisor::__is_supervised", + "Supervisor::__is_supervisor", + "Supervisor::__node", + "Supervisor::__restart", + "Supervisor::__status", + "Supervisor::__stem_pid", + "Telemetry::__collect_histogram_metrics", + "Telemetry::__collect_metrics", + "Telemetry::__dbl_counter_family", + "Telemetry::__dbl_counter_inc", + "Telemetry::__dbl_counter_metric_get_or_add", + "Telemetry::__dbl_counter_value", + "Telemetry::__dbl_gauge_dec", + "Telemetry::__dbl_gauge_family", + "Telemetry::__dbl_gauge_inc", + "Telemetry::__dbl_gauge_metric_get_or_add", + "Telemetry::__dbl_gauge_value", + "Telemetry::__dbl_histogram_family", + "Telemetry::__dbl_histogram_metric_get_or_add", + "Telemetry::__dbl_histogram_observe", + "Telemetry::__dbl_histogram_sum", + "Telemetry::__int_counter_family", + "Telemetry::__int_counter_inc", + "Telemetry::__int_counter_metric_get_or_add", + "Telemetry::__int_counter_value", + "Telemetry::__int_gauge_dec", + "Telemetry::__int_gauge_family", + "Telemetry::__int_gauge_inc", + "Telemetry::__int_gauge_metric_get_or_add", + "Telemetry::__int_gauge_value", + "Telemetry::__int_histogram_family", + "Telemetry::__int_histogram_metric_get_or_add", + "Telemetry::__int_histogram_observe", + "Telemetry::__int_histogram_sum", + "__init_primary_bifs", + "__init_secondary_bifs", + "active_file", + "addr_to_counts", + "addr_to_ptr_name", + "addr_to_subnet", + "all_set", + "anonymize_addr", + "any_set", + "backtrace", + "bare_mode", + "bloomfilter_add", + "bloomfilter_basic_init", + "bloomfilter_basic_init2", + "bloomfilter_clear", + "bloomfilter_counting_init", + "bloomfilter_decrement", + "bloomfilter_internal_state", + "bloomfilter_intersect", + "bloomfilter_lookup", + "bloomfilter_merge", + "bytestring_to_count", + "bytestring_to_double", + "bytestring_to_float", + "bytestring_to_hexstr", + "calc_next_rotate", + "cat", + "cat_sep", + "ceil", + "check_subnet", + "clean", + "close", + "community_id_v1", + "compress_path", + "connection_exists", + "continue_processing", + "convert_for_pattern", + "count_substr", + "count_to_double", + "count_to_port", + "count_to_v4_addr", + "counts_to_addr", + "current_analyzer", + "current_event_time", + "current_time", + "decode_base64", + "decode_base64_conn", + "decode_netbios_name", + "decode_netbios_name_type", + "disable_event_group", + "disable_module_events", + "do_profiling", + "double_to_count", + "double_to_int", + "double_to_interval", + "double_to_time", + "dump_current_packet", + "dump_packet", + "dump_rule_stats", + "edit", + "enable_event_group", + "enable_module_events", + "enable_raw_output", + "encode_base64", + "ends_with", + "entropy_test_add", + "entropy_test_finish", + "entropy_test_init", + "enum_names", + "enum_to_int", + "escape_string", + "exit", + "exp", + "file_magic", + "file_mode", + "file_size", + "filter_subnet_table", + "find_all", + "find_all_ordered", + "find_entropy", + "find_last", + "find_str", + "floor", + "flush_all", + "fmt", + "fmt_ftp_port", + "fnv1a32", + "generate_all_events", + "get_broker_stats", + "get_conn_stats", + "get_conn_transport_proto", + "get_contents_file", + "get_current_conn_bytes_threshold", + "get_current_conn_duration_threshold", + "get_current_conn_packets_threshold", + "get_current_packet", + "get_current_packet_header", + "get_dns_stats", + "get_event_handler_stats", + "get_event_stats", + "get_file_analysis_stats", + "get_file_name", + "get_gap_stats", + "get_identifier_comments", + "get_identifier_declaring_script", + "get_login_state", + "get_matcher_stats", + "get_net_stats", + "get_orig_seq", + "get_package_readme", + "get_port_transport_proto", + "get_proc_stats", + "get_reassembler_stats", + "get_record_field_comments", + "get_record_field_declaring_script", + "get_reporter_stats", + "get_resp_seq", + "get_script_comments", + "get_thread_stats", + "get_timer_stats", + "getenv", + "gethostname", + "getpid", + "global_container_footprints", + "global_ids", + "global_options", + "gsub", + "has_event_group", + "has_module_events", + "have_spicy", + "have_spicy_analyzers", + "haversine_distance", + "hexdump", + "hexstr_to_bytestring", + "hll_cardinality_add", + "hll_cardinality_copy", + "hll_cardinality_estimate", + "hll_cardinality_init", + "hll_cardinality_merge_into", + "hrw_weight", + "identify_data", + "install_dst_addr_filter", + "install_dst_net_filter", + "install_src_addr_filter", + "install_src_net_filter", + "int_to_count", + "int_to_double", + "interval_to_double", + "is_alnum", + "is_alpha", + "is_ascii", + "is_file_analyzer", + "is_icmp_port", + "is_local_interface", + "is_num", + "is_packet_analyzer", + "is_processing_suspended", + "is_protocol_analyzer", + "is_remote_event", + "is_tcp_port", + "is_udp_port", + "is_v4_addr", + "is_v4_subnet", + "is_v6_addr", + "is_v6_subnet", + "is_valid_ip", + "join_string_set", + "join_string_vec", + "levenshtein_distance", + "ljust", + "ln", + "load_CPP", + "log10", + "log2", + "lookup_ID", + "lookup_addr", + "lookup_autonomous_system", + "lookup_connection", + "lookup_hostname", + "lookup_hostname_txt", + "lookup_location", + "lstrip", + "mask_addr", + "match_signatures", + "matching_subnets", + "md5_hash", + "md5_hash_finish", + "md5_hash_init", + "md5_hash_update", + "md5_hmac", + "mkdir", + "mmdb_open_asn_db", + "mmdb_open_location_db", + "network_time", + "open", + "open_for_append", + "packet_source", + "paraglob_equals", + "paraglob_init", + "paraglob_match", + "parse_distinguished_name", + "parse_eftp_port", + "parse_ftp_epsv", + "parse_ftp_pasv", + "parse_ftp_port", + "piped_exec", + "port_to_count", + "pow", + "preserve_prefix", + "preserve_subnet", + "print_raw", + "ptr_name_to_addr", + "rand", + "raw_bytes_to_v4_addr", + "raw_bytes_to_v6_addr", + "reading_live_traffic", + "reading_traces", + "record_fields", + "record_type_to_vector", + "remask_addr", + "remove_prefix", + "remove_suffix", + "rename", + "reverse", + "rfind_str", + "rjust", + "rmdir", + "rotate_file", + "rotate_file_by_name", + "routing0_data_to_addrs", + "rstrip", + "safe_shell_quote", + "same_object", + "sct_verify", + "set_buf", + "set_contents_file", + "set_current_conn_bytes_threshold", + "set_current_conn_duration_threshold", + "set_current_conn_packets_threshold", + "set_file_handle", + "set_inactivity_timeout", + "set_keys", + "set_login_state", + "set_network_time", + "set_record_packets", + "set_secret", + "set_ssl_established", + "setenv", + "sha1_hash", + "sha1_hash_finish", + "sha1_hash_init", + "sha1_hash_update", + "sha256_hash", + "sha256_hash_finish", + "sha256_hash_init", + "sha256_hash_update", + "skip_further_processing", + "skip_http_entity_data", + "skip_smtp_data", + "split_string", + "split_string1", + "split_string_all", + "split_string_n", + "sqrt", + "srand", + "starts_with", + "str_smith_waterman", + "str_split_indices", + "strcmp", + "strftime", + "string_cat", + "string_fill", + "string_to_ascii_hex", + "string_to_pattern", + "strip", + "strptime", + "strstr", + "sub", + "sub_bytes", + "subnet_to_addr", + "subnet_width", + "subst_string", + "suspend_processing", + "swap_case", + "syslog", + "system", + "system_env", + "table_keys", + "table_values", + "terminate", + "time_to_double", + "to_addr", + "to_count", + "to_double", + "to_int", + "to_json", + "to_lower", + "to_port", + "to_string_literal", + "to_subnet", + "to_title", + "to_upper", + "topk_add", + "topk_count", + "topk_epsilon", + "topk_get_top", + "topk_init", + "topk_merge", + "topk_merge_prune", + "topk_size", + "topk_sum", + "type_aliases", + "type_name", + "unescape_URI", + "uninstall_dst_addr_filter", + "uninstall_dst_net_filter", + "uninstall_src_addr_filter", + "uninstall_src_net_filter", + "unique_id", + "unique_id_from", + "unlink", + "uuid_to_string", + "val_footprint", + "write_file", + "x509_check_cert_hostname", + "x509_check_hostname", + "x509_from_der", + "x509_get_certificate_string", + "x509_issuer_name_hash", + "x509_ocsp_verify", + "x509_parse", + "x509_set_certificate_cache", + "x509_set_certificate_cache_hit_callback", + "x509_spki_hash", + "x509_subject_name_hash", + "x509_verify", + "zeek_args", + "zeek_is_terminating", + "zeek_version", + "zfill", +}; + +// Ones not listed: +// +// Broker::* +// These can manipulate unspecified (at script level) records. +// +// Cluster::publish_hrw +// Cluster::publish_rr +// These call script functions to get topic names. +// +// Log::__write +// Calls log policy functions. +// +// Option::set +// Both explicitly changes a global and potentially calls a +// function specified at run-time. +// +// clear_table +// Both clears a set/table and potentially calls an &on_change handler. +// +// disable_analyzer +// Can call Analyzer::disabling_analyzer hook. +// +// from_json +// Can call a normalization function. +// +// order +// Can call a comparison function. +// +// resize +// Changes a vector in place. +// +// sort +// Both changes a vector in place and can call an arbitrary comparison +// function. +// +// Some of these have side effects that could be checked for in a specific +// context, but the gains from doing so likely aren't worth the complexity. + +bool is_side_effect_free(std::string func_name) { return side_effects_free_BiFs.count(func_name) > 0; } + +} // namespace zeek::detail diff --git a/src/script_opt/FuncInfo.h b/src/script_opt/FuncInfo.h new file mode 100644 index 0000000000..a7925619ae --- /dev/null +++ b/src/script_opt/FuncInfo.h @@ -0,0 +1,17 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Utility functions that return information about Zeek functions. Currently +// this is limited to information about whether BiFs are side-effect-free +// (from a Zeek scripting perspective), but could be expanded in the future +// to include information about Zeek script functions, idempotency, and the +// like. + +#pragma once + +#include "zeek/Func.h" + +namespace zeek::detail { + +extern bool is_side_effect_free(std::string func_name); + +} // namespace zeek::detail