diff --git a/src/script_opt/FuncInfo.cc b/src/script_opt/FuncInfo.cc index 6b89e4c3a6..efce28a613 100644 --- a/src/script_opt/FuncInfo.cc +++ b/src/script_opt/FuncInfo.cc @@ -27,13 +27,22 @@ namespace zeek::detail { // ATTR_NO_SCRIPT_SIDE_EFFECTS. #define ATTR_NO_ZEEK_SIDE_EFFECTS 0x2 -// Calls made with the same arguments yield the same results. Implies -// ATTR_NO_ZEEK_SIDE_EFFECTS. +// Calls made with the same arguments yield the same results, if made +// after full Zeek initialization. Implies ATTR_NO_ZEEK_SIDE_EFFECTS. #define ATTR_IDEMPOTENT 0x4 +// Calls with constant arguments can always be folded, even prior to +// full Zeek initialization. Such functions must not have the potential +// to generate errors. Implies ATTR_IDEMPOTENT. +#define ATTR_FOLDABLE 0x8 + // The event engine knows about this script function and may call it // during its processing. -#define ATTR_SPECIAL_SCRIPT_FUNC 0x8 +#define ATTR_SPECIAL_SCRIPT_FUNC 0x10 + +// ZAM knows about this script function and will replace it with specialized +// instructions. +#define ATTR_ZAM_REPLACEABLE_SCRIPT_FUNC 0x20 static std::unordered_map func_attrs = { // Script functions. @@ -52,15 +61,17 @@ static std::unordered_map func_attrs = { {"discarder_check_udp", ATTR_SPECIAL_SCRIPT_FUNC}, {"from_json_default_key_mapper", ATTR_SPECIAL_SCRIPT_FUNC}, + {"id_string", ATTR_ZAM_REPLACEABLE_SCRIPT_FUNC}, + // BiFs. {"Analyzer::__disable_all_analyzers", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Analyzer::__disable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Analyzer::__enable_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"Analyzer::__has_tag", ATTR_IDEMPOTENT}, - {"Analyzer::__name", ATTR_IDEMPOTENT}, + {"Analyzer::__has_tag", ATTR_FOLDABLE}, + {"Analyzer::__name", ATTR_FOLDABLE}, {"Analyzer::__register_for_port", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Analyzer::__schedule_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"Analyzer::__tag", ATTR_IDEMPOTENT}, + {"Analyzer::__tag", ATTR_FOLDABLE}, {"FileExtract::__set_limit", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Files::__add_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Files::__analyzer_enabled", ATTR_NO_ZEEK_SIDE_EFFECTS}, @@ -92,7 +103,7 @@ static std::unordered_map func_attrs = { {"Log::__set_buf", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Log::__set_max_delay_interval", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Log::__set_max_delay_queue_size", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"Option::any_set_to_any_vec", ATTR_IDEMPOTENT}, + {"Option::any_set_to_any_vec", ATTR_FOLDABLE}, {"Option::set_change_handler", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"PacketAnalyzer::GTPV1::remove_gtpv1_connection", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"PacketAnalyzer::TEREDO::remove_teredo_connection", ATTR_NO_SCRIPT_SIDE_EFFECTS}, @@ -166,17 +177,18 @@ static std::unordered_map func_attrs = { {"Telemetry::__int_histogram_metric_get_or_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Telemetry::__int_histogram_observe", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"Telemetry::__int_histogram_sum", ATTR_NO_SCRIPT_SIDE_EFFECTS}, + {"WebSocket::__configure_analyzer", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"__init_primary_bifs", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"__init_secondary_bifs", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"active_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"addr_to_counts", ATTR_IDEMPOTENT}, - {"addr_to_ptr_name", ATTR_IDEMPOTENT}, - {"addr_to_subnet", ATTR_IDEMPOTENT}, - {"all_set", ATTR_IDEMPOTENT}, - {"anonymize_addr", ATTR_IDEMPOTENT}, - {"any_set", ATTR_IDEMPOTENT}, + {"addr_to_counts", ATTR_FOLDABLE}, + {"addr_to_ptr_name", ATTR_FOLDABLE}, + {"addr_to_subnet", ATTR_FOLDABLE}, + {"all_set", ATTR_FOLDABLE}, + {"anonymize_addr", ATTR_FOLDABLE}, + {"any_set", ATTR_FOLDABLE}, {"backtrace", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"bare_mode", ATTR_IDEMPOTENT}, + {"bare_mode", ATTR_FOLDABLE}, {"bloomfilter_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"bloomfilter_basic_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"bloomfilter_basic_init2", ATTR_NO_SCRIPT_SIDE_EFFECTS}, @@ -187,76 +199,77 @@ static std::unordered_map func_attrs = { {"bloomfilter_intersect", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"bloomfilter_lookup", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"bloomfilter_merge", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"bytestring_to_count", ATTR_IDEMPOTENT}, - {"bytestring_to_double", ATTR_IDEMPOTENT}, - {"bytestring_to_float", ATTR_IDEMPOTENT}, - {"bytestring_to_hexstr", ATTR_IDEMPOTENT}, + {"bytestring_to_count", ATTR_IDEMPOTENT}, // can error + {"bytestring_to_double", ATTR_IDEMPOTENT}, // can error + {"bytestring_to_float", ATTR_IDEMPOTENT}, // can error + {"bytestring_to_hexstr", ATTR_FOLDABLE}, {"calc_next_rotate", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"cat", ATTR_IDEMPOTENT}, - {"cat_sep", ATTR_IDEMPOTENT}, - {"ceil", ATTR_IDEMPOTENT}, - {"check_subnet", ATTR_IDEMPOTENT}, - {"clean", ATTR_IDEMPOTENT}, + {"cat", ATTR_FOLDABLE}, + {"cat_sep", ATTR_IDEMPOTENT}, // can error + {"ceil", ATTR_FOLDABLE}, + {"check_subnet", ATTR_FOLDABLE}, + {"clean", ATTR_FOLDABLE}, {"close", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"community_id_v1", ATTR_IDEMPOTENT}, - {"compress_path", ATTR_IDEMPOTENT}, + {"community_id_v1", ATTR_FOLDABLE}, + {"compress_path", ATTR_FOLDABLE}, {"connection_exists", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"continue_processing", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"convert_for_pattern", ATTR_IDEMPOTENT}, - {"count_substr", ATTR_IDEMPOTENT}, - {"count_to_double", ATTR_IDEMPOTENT}, - {"count_to_port", ATTR_IDEMPOTENT}, - {"count_to_v4_addr", ATTR_IDEMPOTENT}, - {"counts_to_addr", ATTR_IDEMPOTENT}, + {"convert_for_pattern", ATTR_FOLDABLE}, + {"count_substr", ATTR_FOLDABLE}, + {"count_to_double", ATTR_FOLDABLE}, + {"count_to_port", ATTR_FOLDABLE}, + {"count_to_v4_addr", ATTR_IDEMPOTENT}, // can error + {"counts_to_addr", ATTR_IDEMPOTENT}, // can error {"current_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"current_event_time", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"current_time", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"decode_base64", ATTR_IDEMPOTENT}, + {"decode_base64", ATTR_IDEMPOTENT}, // can error {"decode_base64_conn", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"decode_netbios_name", ATTR_IDEMPOTENT}, - {"decode_netbios_name_type", ATTR_IDEMPOTENT}, + {"decode_netbios_name", ATTR_FOLDABLE}, + {"decode_netbios_name_type", ATTR_FOLDABLE}, {"disable_event_group", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"disable_module_events", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"do_profiling", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"double_to_count", ATTR_IDEMPOTENT}, - {"double_to_int", ATTR_IDEMPOTENT}, - {"double_to_interval", ATTR_IDEMPOTENT}, - {"double_to_time", ATTR_IDEMPOTENT}, + {"double_to_count", ATTR_IDEMPOTENT}, // can error + {"double_to_int", ATTR_FOLDABLE}, + {"double_to_interval", ATTR_FOLDABLE}, + {"double_to_time", ATTR_FOLDABLE}, {"dump_current_packet", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"dump_packet", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"dump_rule_stats", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"edit", ATTR_IDEMPOTENT}, + {"edit", ATTR_FOLDABLE}, {"enable_event_group", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"enable_module_events", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"enable_raw_output", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"encode_base64", ATTR_IDEMPOTENT}, - {"ends_with", ATTR_IDEMPOTENT}, + {"encode_base64", ATTR_FOLDABLE}, + {"ends_with", ATTR_FOLDABLE}, {"entropy_test_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"entropy_test_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"entropy_test_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"enum_names", ATTR_IDEMPOTENT}, - {"enum_to_int", ATTR_IDEMPOTENT}, - {"escape_string", ATTR_IDEMPOTENT}, + {"enum_to_int", ATTR_IDEMPOTENT}, // can error + {"escape_string", ATTR_FOLDABLE}, {"exit", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"exp", ATTR_IDEMPOTENT}, - {"file_magic", ATTR_IDEMPOTENT}, + {"exp", ATTR_FOLDABLE}, + {"file_magic", ATTR_FOLDABLE}, {"file_mode", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"file_size", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"filter_subnet_table", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"find_all", ATTR_IDEMPOTENT}, - {"find_all_ordered", ATTR_IDEMPOTENT}, - {"find_entropy", ATTR_IDEMPOTENT}, - {"find_last", ATTR_IDEMPOTENT}, - {"find_str", ATTR_IDEMPOTENT}, - {"floor", ATTR_IDEMPOTENT}, + {"find_all", ATTR_FOLDABLE}, + {"find_all_ordered", ATTR_FOLDABLE}, + {"find_entropy", ATTR_FOLDABLE}, + {"find_in_zeekpath", ATTR_IDEMPOTENT}, // can error + {"find_last", ATTR_FOLDABLE}, + {"find_str", ATTR_FOLDABLE}, + {"floor", ATTR_FOLDABLE}, {"flush_all", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"fmt", ATTR_IDEMPOTENT}, - {"fmt_ftp_port", ATTR_IDEMPOTENT}, - {"fnv1a32", ATTR_IDEMPOTENT}, + {"fmt", ATTR_FOLDABLE}, + {"fmt_ftp_port", ATTR_IDEMPOTENT}, // can error + {"fnv1a32", ATTR_FOLDABLE}, {"generate_all_events", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"get_broker_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_conn_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"get_conn_transport_proto", ATTR_IDEMPOTENT}, + {"get_conn_transport_proto", ATTR_FOLDABLE}, {"get_contents_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_current_conn_bytes_threshold", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_current_conn_duration_threshold", ATTR_NO_ZEEK_SIDE_EFFECTS}, @@ -267,7 +280,7 @@ static std::unordered_map func_attrs = { {"get_event_handler_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_event_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_file_analysis_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"get_file_name", ATTR_IDEMPOTENT}, + {"get_file_name", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_gap_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_identifier_comments", ATTR_IDEMPOTENT}, {"get_identifier_declaring_script", ATTR_IDEMPOTENT}, @@ -275,8 +288,8 @@ static std::unordered_map func_attrs = { {"get_matcher_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_net_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_orig_seq", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"get_package_readme", ATTR_IDEMPOTENT}, - {"get_port_transport_proto", ATTR_IDEMPOTENT}, + {"get_package_readme", ATTR_FOLDABLE}, + {"get_port_transport_proto", ATTR_FOLDABLE}, {"get_proc_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_reassembler_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"get_record_field_comments", ATTR_IDEMPOTENT}, @@ -292,54 +305,54 @@ static std::unordered_map func_attrs = { {"global_container_footprints", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"global_ids", ATTR_IDEMPOTENT}, {"global_options", ATTR_IDEMPOTENT}, - {"gsub", ATTR_IDEMPOTENT}, + {"gsub", ATTR_FOLDABLE}, {"has_event_group", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"has_module_events", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"have_spicy", ATTR_IDEMPOTENT}, {"have_spicy_analyzers", ATTR_IDEMPOTENT}, - {"haversine_distance", ATTR_IDEMPOTENT}, - {"hexdump", ATTR_IDEMPOTENT}, - {"hexstr_to_bytestring", ATTR_IDEMPOTENT}, + {"haversine_distance", ATTR_FOLDABLE}, + {"hexdump", ATTR_FOLDABLE}, + {"hexstr_to_bytestring", ATTR_IDEMPOTENT}, // can error {"hll_cardinality_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"hll_cardinality_copy", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"hll_cardinality_estimate", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"hll_cardinality_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"hll_cardinality_merge_into", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"hrw_weight", ATTR_IDEMPOTENT}, - {"identify_data", ATTR_IDEMPOTENT}, + {"hrw_weight", ATTR_FOLDABLE}, + {"identify_data", ATTR_FOLDABLE}, {"install_dst_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"install_dst_net_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"install_src_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"install_src_net_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"int_to_count", ATTR_IDEMPOTENT}, - {"int_to_double", ATTR_IDEMPOTENT}, - {"interval_to_double", ATTR_IDEMPOTENT}, - {"is_alnum", ATTR_IDEMPOTENT}, - {"is_alpha", ATTR_IDEMPOTENT}, - {"is_ascii", ATTR_IDEMPOTENT}, + {"int_to_count", ATTR_IDEMPOTENT}, // can error + {"int_to_double", ATTR_FOLDABLE}, + {"interval_to_double", ATTR_FOLDABLE}, + {"is_alnum", ATTR_FOLDABLE}, + {"is_alpha", ATTR_FOLDABLE}, + {"is_ascii", ATTR_FOLDABLE}, {"is_file_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"is_icmp_port", ATTR_IDEMPOTENT}, + {"is_icmp_port", ATTR_FOLDABLE}, {"is_local_interface", ATTR_IDEMPOTENT}, - {"is_num", ATTR_IDEMPOTENT}, + {"is_num", ATTR_FOLDABLE}, {"is_packet_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"is_processing_suspended", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"is_protocol_analyzer", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"is_remote_event", ATTR_IDEMPOTENT}, - {"is_tcp_port", ATTR_IDEMPOTENT}, - {"is_udp_port", ATTR_IDEMPOTENT}, - {"is_v4_addr", ATTR_IDEMPOTENT}, - {"is_v4_subnet", ATTR_IDEMPOTENT}, - {"is_v6_addr", ATTR_IDEMPOTENT}, - {"is_v6_subnet", ATTR_IDEMPOTENT}, - {"is_valid_ip", ATTR_IDEMPOTENT}, - {"join_string_set", ATTR_IDEMPOTENT}, - {"join_string_vec", ATTR_IDEMPOTENT}, - {"levenshtein_distance", ATTR_IDEMPOTENT}, - {"ljust", ATTR_IDEMPOTENT}, - {"ln", ATTR_IDEMPOTENT}, + {"is_tcp_port", ATTR_FOLDABLE}, + {"is_udp_port", ATTR_FOLDABLE}, + {"is_v4_addr", ATTR_FOLDABLE}, + {"is_v4_subnet", ATTR_FOLDABLE}, + {"is_v6_addr", ATTR_FOLDABLE}, + {"is_v6_subnet", ATTR_FOLDABLE}, + {"is_valid_ip", ATTR_FOLDABLE}, + {"join_string_set", ATTR_FOLDABLE}, + {"join_string_vec", ATTR_FOLDABLE}, + {"levenshtein_distance", ATTR_FOLDABLE}, + {"ljust", ATTR_FOLDABLE}, + {"ln", ATTR_FOLDABLE}, {"load_CPP", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"log10", ATTR_IDEMPOTENT}, - {"log2", ATTR_IDEMPOTENT}, + {"log10", ATTR_FOLDABLE}, + {"log2", ATTR_FOLDABLE}, {"lookup_ID", ATTR_IDEMPOTENT}, {"lookup_addr", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"lookup_autonomous_system", ATTR_NO_SCRIPT_SIDE_EFFECTS}, @@ -347,15 +360,15 @@ static std::unordered_map func_attrs = { {"lookup_hostname", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"lookup_hostname_txt", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"lookup_location", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"lstrip", ATTR_IDEMPOTENT}, - {"mask_addr", ATTR_IDEMPOTENT}, + {"lstrip", ATTR_FOLDABLE}, + {"mask_addr", ATTR_FOLDABLE}, {"match_signatures", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"matching_subnets", ATTR_IDEMPOTENT}, - {"md5_hash", ATTR_IDEMPOTENT}, + {"matching_subnets", ATTR_FOLDABLE}, + {"md5_hash", ATTR_FOLDABLE}, {"md5_hash_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"md5_hash_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"md5_hash_update", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"md5_hmac", ATTR_IDEMPOTENT}, + {"md5_hmac", ATTR_FOLDABLE}, {"mkdir", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"mmdb_open_asn_db", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"mmdb_open_location_db", ATTR_NO_SCRIPT_SIDE_EFFECTS}, @@ -366,38 +379,38 @@ static std::unordered_map func_attrs = { {"paraglob_equals", ATTR_IDEMPOTENT}, {"paraglob_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"paraglob_match", ATTR_IDEMPOTENT}, - {"parse_distinguished_name", ATTR_IDEMPOTENT}, - {"parse_eftp_port", ATTR_IDEMPOTENT}, - {"parse_ftp_epsv", ATTR_IDEMPOTENT}, - {"parse_ftp_pasv", ATTR_IDEMPOTENT}, - {"parse_ftp_port", ATTR_IDEMPOTENT}, + {"parse_distinguished_name", ATTR_FOLDABLE}, + {"parse_eftp_port", ATTR_FOLDABLE}, + {"parse_ftp_epsv", ATTR_FOLDABLE}, + {"parse_ftp_pasv", ATTR_FOLDABLE}, + {"parse_ftp_port", ATTR_FOLDABLE}, {"piped_exec", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"port_to_count", ATTR_IDEMPOTENT}, - {"pow", ATTR_IDEMPOTENT}, + {"port_to_count", ATTR_FOLDABLE}, + {"pow", ATTR_FOLDABLE}, {"preserve_prefix", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"preserve_subnet", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"print_raw", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"ptr_name_to_addr", ATTR_IDEMPOTENT}, + {"ptr_name_to_addr", ATTR_IDEMPOTENT}, // can error {"rand", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"raw_bytes_to_v4_addr", ATTR_IDEMPOTENT}, - {"raw_bytes_to_v6_addr", ATTR_IDEMPOTENT}, + {"raw_bytes_to_v4_addr", ATTR_IDEMPOTENT}, // can error + {"raw_bytes_to_v6_addr", ATTR_IDEMPOTENT}, // can error {"reading_live_traffic", ATTR_IDEMPOTENT}, {"reading_traces", ATTR_IDEMPOTENT}, - {"record_fields", ATTR_IDEMPOTENT}, - {"record_type_to_vector", ATTR_IDEMPOTENT}, - {"remask_addr", ATTR_IDEMPOTENT}, - {"remove_prefix", ATTR_IDEMPOTENT}, - {"remove_suffix", ATTR_IDEMPOTENT}, + {"record_fields", ATTR_FOLDABLE}, + {"record_type_to_vector", ATTR_FOLDABLE}, + {"remask_addr", ATTR_FOLDABLE}, + {"remove_prefix", ATTR_FOLDABLE}, + {"remove_suffix", ATTR_FOLDABLE}, {"rename", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"reverse", ATTR_IDEMPOTENT}, - {"rfind_str", ATTR_IDEMPOTENT}, - {"rjust", ATTR_IDEMPOTENT}, + {"reverse", ATTR_FOLDABLE}, + {"rfind_str", ATTR_FOLDABLE}, + {"rjust", ATTR_FOLDABLE}, {"rmdir", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"rotate_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"rotate_file_by_name", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"routing0_data_to_addrs", ATTR_IDEMPOTENT}, - {"rstrip", ATTR_IDEMPOTENT}, - {"safe_shell_quote", ATTR_IDEMPOTENT}, + {"routing0_data_to_addrs", ATTR_IDEMPOTENT}, // can error + {"rstrip", ATTR_FOLDABLE}, + {"safe_shell_quote", ATTR_FOLDABLE}, {"same_object", ATTR_IDEMPOTENT}, {"sct_verify", ATTR_IDEMPOTENT}, {"set_buf", ATTR_NO_SCRIPT_SIDE_EFFECTS}, @@ -414,73 +427,73 @@ static std::unordered_map func_attrs = { {"set_secret", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"set_ssl_established", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"setenv", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"sha1_hash", ATTR_IDEMPOTENT}, + {"sha1_hash", ATTR_FOLDABLE}, {"sha1_hash_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"sha1_hash_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"sha1_hash_update", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"sha256_hash", ATTR_IDEMPOTENT}, + {"sha256_hash", ATTR_FOLDABLE}, {"sha256_hash_finish", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"sha256_hash_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"sha256_hash_update", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"skip_further_processing", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"skip_http_entity_data", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"skip_smtp_data", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"split_string", ATTR_IDEMPOTENT}, - {"split_string1", ATTR_IDEMPOTENT}, - {"split_string_all", ATTR_IDEMPOTENT}, - {"split_string_n", ATTR_IDEMPOTENT}, - {"sqrt", ATTR_IDEMPOTENT}, + {"split_string", ATTR_FOLDABLE}, + {"split_string1", ATTR_FOLDABLE}, + {"split_string_all", ATTR_FOLDABLE}, + {"split_string_n", ATTR_FOLDABLE}, + {"sqrt", ATTR_IDEMPOTENT}, // can error {"srand", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"starts_with", ATTR_IDEMPOTENT}, - {"str_smith_waterman", ATTR_IDEMPOTENT}, - {"str_split_indices", ATTR_IDEMPOTENT}, - {"strcmp", ATTR_IDEMPOTENT}, - {"strftime", ATTR_IDEMPOTENT}, - {"string_cat", ATTR_IDEMPOTENT}, - {"string_fill", ATTR_IDEMPOTENT}, - {"string_to_ascii_hex", ATTR_IDEMPOTENT}, - {"string_to_pattern", ATTR_IDEMPOTENT}, - {"strip", ATTR_IDEMPOTENT}, - {"strptime", ATTR_IDEMPOTENT}, - {"strstr", ATTR_IDEMPOTENT}, - {"sub", ATTR_IDEMPOTENT}, - {"sub_bytes", ATTR_IDEMPOTENT}, - {"subnet_to_addr", ATTR_IDEMPOTENT}, - {"subnet_width", ATTR_IDEMPOTENT}, - {"subst_string", ATTR_IDEMPOTENT}, + {"starts_with", ATTR_FOLDABLE}, + {"str_smith_waterman", ATTR_FOLDABLE}, + {"str_split_indices", ATTR_FOLDABLE}, + {"strcmp", ATTR_FOLDABLE}, + {"strftime", ATTR_FOLDABLE}, + {"string_cat", ATTR_FOLDABLE}, + {"string_fill", ATTR_FOLDABLE}, + {"string_to_ascii_hex", ATTR_FOLDABLE}, + {"string_to_pattern", ATTR_FOLDABLE}, + {"strip", ATTR_FOLDABLE}, + {"strptime", ATTR_FOLDABLE}, + {"strstr", ATTR_FOLDABLE}, + {"sub", ATTR_FOLDABLE}, + {"sub_bytes", ATTR_FOLDABLE}, + {"subnet_to_addr", ATTR_FOLDABLE}, + {"subnet_width", ATTR_FOLDABLE}, + {"subst_string", ATTR_FOLDABLE}, {"suspend_processing", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"swap_case", ATTR_IDEMPOTENT}, + {"swap_case", ATTR_FOLDABLE}, {"syslog", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"system", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"system_env", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"table_keys", ATTR_IDEMPOTENT}, - {"table_pattern_matcher_stats", ATTR_IDEMPOTENT}, - {"table_values", ATTR_IDEMPOTENT}, + {"table_keys", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"table_pattern_matcher_stats", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"table_values", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"terminate", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"time_to_double", ATTR_IDEMPOTENT}, - {"to_addr", ATTR_IDEMPOTENT}, - {"to_count", ATTR_IDEMPOTENT}, - {"to_double", ATTR_IDEMPOTENT}, - {"to_int", ATTR_IDEMPOTENT}, - {"to_json", ATTR_IDEMPOTENT}, - {"to_lower", ATTR_IDEMPOTENT}, - {"to_port", ATTR_IDEMPOTENT}, - {"to_string_literal", ATTR_IDEMPOTENT}, - {"to_subnet", ATTR_IDEMPOTENT}, - {"to_title", ATTR_IDEMPOTENT}, - {"to_upper", ATTR_IDEMPOTENT}, + {"time_to_double", ATTR_FOLDABLE}, + {"to_addr", ATTR_IDEMPOTENT}, // can error + {"to_count", ATTR_IDEMPOTENT}, // can error + {"to_double", ATTR_IDEMPOTENT}, // can error + {"to_int", ATTR_FOLDABLE}, + {"to_json", ATTR_FOLDABLE}, + {"to_lower", ATTR_FOLDABLE}, + {"to_port", ATTR_IDEMPOTENT}, // can error + {"to_string_literal", ATTR_FOLDABLE}, + {"to_subnet", ATTR_IDEMPOTENT}, // can error + {"to_title", ATTR_FOLDABLE}, + {"to_upper", ATTR_FOLDABLE}, {"topk_add", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"topk_count", ATTR_IDEMPOTENT}, - {"topk_epsilon", ATTR_IDEMPOTENT}, + {"topk_count", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"topk_epsilon", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"topk_get_top", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"topk_init", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"topk_merge", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"topk_merge_prune", ATTR_NO_SCRIPT_SIDE_EFFECTS}, - {"topk_size", ATTR_IDEMPOTENT}, - {"topk_sum", ATTR_IDEMPOTENT}, - {"type_aliases", ATTR_IDEMPOTENT}, - {"type_name", ATTR_IDEMPOTENT}, - {"unescape_URI", ATTR_IDEMPOTENT}, + {"topk_size", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"topk_sum", ATTR_NO_ZEEK_SIDE_EFFECTS}, + {"type_aliases", ATTR_FOLDABLE}, + {"type_name", ATTR_FOLDABLE}, + {"unescape_URI", ATTR_FOLDABLE}, {"uninstall_dst_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"uninstall_dst_net_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"uninstall_src_addr_filter", ATTR_NO_SCRIPT_SIDE_EFFECTS}, @@ -488,7 +501,7 @@ static std::unordered_map func_attrs = { {"unique_id", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"unique_id_from", ATTR_NO_SCRIPT_SIDE_EFFECTS}, {"unlink", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"uuid_to_string", ATTR_IDEMPOTENT}, + {"uuid_to_string", ATTR_FOLDABLE}, {"val_footprint", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"write_file", ATTR_NO_ZEEK_SIDE_EFFECTS}, {"x509_check_cert_hostname", ATTR_IDEMPOTENT}, @@ -503,10 +516,10 @@ static std::unordered_map func_attrs = { {"x509_spki_hash", ATTR_IDEMPOTENT}, {"x509_subject_name_hash", ATTR_IDEMPOTENT}, {"x509_verify", ATTR_IDEMPOTENT}, - {"zeek_args", ATTR_IDEMPOTENT}, + {"zeek_args", ATTR_FOLDABLE}, {"zeek_is_terminating", ATTR_NO_ZEEK_SIDE_EFFECTS}, - {"zeek_version", ATTR_IDEMPOTENT}, - {"zfill", ATTR_IDEMPOTENT}, + {"zeek_version", ATTR_FOLDABLE}, + {"zfill", ATTR_FOLDABLE}, }; // Ones not listed: @@ -555,9 +568,19 @@ bool is_special_script_func(const std::string& func_name) { return f_attr != func_attrs.end() && (f_attr->second & ATTR_SPECIAL_SCRIPT_FUNC) != 0; } +bool is_ZAM_replaceable_script_func(const std::string& func_name) { + auto f_attr = func_attrs.find(func_name); + return f_attr != func_attrs.end() && (f_attr->second & ATTR_ZAM_REPLACEABLE_SCRIPT_FUNC) != 0; +} + bool is_idempotent(const std::string& func_name) { auto f_attr = func_attrs.find(func_name); - return f_attr != func_attrs.end() && (f_attr->second & ATTR_IDEMPOTENT) != 0; + return f_attr != func_attrs.end() && (f_attr->second & (ATTR_IDEMPOTENT | ATTR_FOLDABLE)) != 0; +} + +bool is_foldable(const std::string& func_name) { + auto f_attr = func_attrs.find(func_name); + return f_attr != func_attrs.end() && (f_attr->second & ATTR_FOLDABLE) != 0; } bool has_script_side_effects(const std::string& func_name) { @@ -566,7 +589,8 @@ bool has_script_side_effects(const std::string& func_name) { // We don't know about it, so be conservative. return true; - return (f_attr->second & (ATTR_NO_SCRIPT_SIDE_EFFECTS | ATTR_NO_ZEEK_SIDE_EFFECTS | ATTR_IDEMPOTENT)) == 0; + return (f_attr->second & + (ATTR_NO_SCRIPT_SIDE_EFFECTS | ATTR_NO_ZEEK_SIDE_EFFECTS | ATTR_IDEMPOTENT | ATTR_FOLDABLE)) == 0; } } // namespace zeek::detail diff --git a/src/script_opt/FuncInfo.h b/src/script_opt/FuncInfo.h index 53095d227d..f36601c8b2 100644 --- a/src/script_opt/FuncInfo.h +++ b/src/script_opt/FuncInfo.h @@ -12,11 +12,18 @@ namespace zeek::detail { // knows about. extern bool is_special_script_func(const std::string& func_name); +// Whether ZAM can replace the given script with specialized instructions. +extern bool is_ZAM_replaceable_script_func(const std::string& func_name); + // An idempotent function returns the same value when called with the // same arguments (and has no meaningful side effects in terms of script-level // or Zeek-internal state). extern bool is_idempotent(const std::string& func_name); +// A foldable function can be folded at compile time if all of its arguments +// are constants. Such functions cannot generate error messages. +extern bool is_foldable(const std::string& func_name); + // Whether the given function (currently, just BiFs) has Zeek-script-level // side effects. extern bool has_script_side_effects(const std::string& func_name); diff --git a/src/script_opt/Inline.cc b/src/script_opt/Inline.cc index f6ef4d1fb5..8cf2479bd5 100644 --- a/src/script_opt/Inline.cc +++ b/src/script_opt/Inline.cc @@ -34,6 +34,11 @@ void Inliner::Analyze() { if ( is_special_script_func(f.Func()->Name()) ) continue; + // If ZAM can replace the script, don't inline it, so its usage + // remains visible during the AST reduction process. + if ( is_ZAM_replaceable_script_func(f.Func()->Name()) ) + continue; + std::unordered_set cs; // Aspirational ....