diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8552ad3105..37f6109cd2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -395,6 +395,7 @@ set(MAIN_SRCS script_opt/CPP/Vars.cc ${_gen_zeek_script_cpp} script_opt/Expr.cc + script_opt/FuncInfo.cc script_opt/GenIDDefs.cc script_opt/IDOptInfo.cc script_opt/Inline.cc diff --git a/src/Expr.cc b/src/Expr.cc index 32445ccd69..c37ec9f3b6 100644 --- a/src/Expr.cc +++ b/src/Expr.cc @@ -431,7 +431,8 @@ ValPtr NameExpr::Eval(Frame* f) const { if ( v ) return v; else { - RuntimeError("value used but not set"); + if ( f ) + RuntimeError("value used but not set"); return nullptr; } } diff --git a/src/parse.y b/src/parse.y index c469ec93e4..e76ca6696e 100644 --- a/src/parse.y +++ b/src/parse.y @@ -132,6 +132,10 @@ std::string zeek::detail::current_module = GLOBAL_MODULE_NAME; bool is_export = false; // true if in an export {} block +// Used to temporarily turn off "is_export". A stack because the need +// to do so can nest. +std::vector hold_is_export; + // When parsing an expression for the debugger, where to put the result // (obviously not reentrant). extern Expr* g_curr_debug_expr; @@ -1584,8 +1588,17 @@ lambda_body: ; anonymous_function: - TOK_FUNCTION begin_lambda conditional_list lambda_body - { $$ = $4; } + TOK_FUNCTION + { + hold_is_export.push_back(is_export); + is_export = false; + } + begin_lambda conditional_list lambda_body + { + is_export = hold_is_export.back(); + hold_is_export.pop_back(); + $$ = $5; + } ; begin_lambda: diff --git a/src/script_opt/FuncInfo.cc b/src/script_opt/FuncInfo.cc new file mode 100644 index 0000000000..45bf2d9dda --- /dev/null +++ b/src/script_opt/FuncInfo.cc @@ -0,0 +1,559 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include +#include "zeek/script_opt/FuncInfo.h" + +namespace zeek::detail { + +static std::unordered_set side_effects_free_BiFs = { + "Analyzer::__disable_all_analyzers", + "Analyzer::__disable_analyzer", + "Analyzer::__enable_analyzer", + "Analyzer::__has_tag", + "Analyzer::__name", + "Analyzer::__register_for_port", + "Analyzer::__schedule_analyzer", + "Analyzer::__tag", + "Broker::__append", + "Broker::__auto_publish", + "Broker::__auto_unpublish", + "Broker::__clear", + "Broker::__close", + "Broker::__create_clone", + "Broker::__create_master", + "Broker::__data", + "Broker::__data_type", + "Broker::__decrement", + "Broker::__erase", + "Broker::__exists", + "Broker::__flush_logs", + "Broker::__forward", + "Broker::__get", + "Broker::__get_index_from_value", + "Broker::__increment", + "Broker::__insert_into_set", + "Broker::__insert_into_table", + "Broker::__is_closed", + "Broker::__keys", + "Broker::__listen", + "Broker::__node_id", + "Broker::__opaque_clone_through_serialization", + "Broker::__peer", + "Broker::__peer_no_retry", + "Broker::__peers", + "Broker::__pop", + "Broker::__publish_id", + "Broker::__push", + "Broker::__put", + "Broker::__put_unique", + "Broker::__record_assign", + "Broker::__record_create", + "Broker::__record_iterator", + "Broker::__record_iterator_last", + "Broker::__record_iterator_next", + "Broker::__record_iterator_value", + "Broker::__record_lookup", + "Broker::__record_size", + "Broker::__remove_from", + "Broker::__set_clear", + "Broker::__set_contains", + "Broker::__set_create", + "Broker::__set_insert", + "Broker::__set_iterator", + "Broker::__set_iterator_last", + "Broker::__set_iterator_next", + "Broker::__set_iterator_value", + "Broker::__set_metrics_export_endpoint_name", + "Broker::__set_metrics_export_interval", + "Broker::__set_metrics_export_prefixes", + "Broker::__set_metrics_export_topic", + "Broker::__set_metrics_import_topics", + "Broker::__set_remove", + "Broker::__set_size", + "Broker::__store_name", + "Broker::__subscribe", + "Broker::__table_clear", + "Broker::__table_contains", + "Broker::__table_create", + "Broker::__table_insert", + "Broker::__table_iterator", + "Broker::__table_iterator_last", + "Broker::__table_iterator_next", + "Broker::__table_iterator_value", + "Broker::__table_lookup", + "Broker::__table_remove", + "Broker::__table_size", + "Broker::__unpeer", + "Broker::__unsubscribe", + "Broker::__vector_clear", + "Broker::__vector_create", + "Broker::__vector_insert", + "Broker::__vector_iterator", + "Broker::__vector_iterator_last", + "Broker::__vector_iterator_next", + "Broker::__vector_iterator_value", + "Broker::__vector_lookup", + "Broker::__vector_remove", + "Broker::__vector_replace", + "Broker::__vector_size", + "Broker::make_event", + "Broker::publish", + "Cluster::publish_hrw", + "Cluster::publish_rr", + "FileExtract::__set_limit", + "Files::__add_analyzer", + "Files::__analyzer_enabled", + "Files::__analyzer_name", + "Files::__disable_analyzer", + "Files::__disable_reassembly", + "Files::__enable_analyzer", + "Files::__enable_reassembly", + "Files::__file_exists", + "Files::__lookup_file", + "Files::__remove_analyzer", + "Files::__set_reassembly_buffer", + "Files::__set_timeout_interval", + "Files::__stop", + "Input::__create_analysis_stream", + "Input::__create_event_stream", + "Input::__create_table_stream", + "Input::__force_update", + "Input::__remove_stream", + "Log::__add_filter", + "Log::__create_stream", + "Log::__disable_stream", + "Log::__enable_stream", + "Log::__flush", + "Log::__remove_filter", + "Log::__remove_stream", + "Log::__set_buf", + "Log::__write", + "Option::any_set_to_any_vec", + // "Option::set", + "Option::set_change_handler", + "PacketAnalyzer::GTPV1::remove_gtpv1_connection", + "PacketAnalyzer::TEREDO::remove_teredo_connection", + "PacketAnalyzer::__disable_analyzer", + "PacketAnalyzer::__enable_analyzer", + "PacketAnalyzer::__set_ignore_checksums_nets", + "PacketAnalyzer::register_packet_analyzer", + "PacketAnalyzer::register_protocol_detection", + "PacketAnalyzer::try_register_packet_analyzer_by_name", + "Pcap::error", + "Pcap::findalldevs", + "Pcap::get_filter_state", + "Pcap::get_filter_state_string", + "Pcap::install_pcap_filter", + "Pcap::precompile_pcap_filter", + "Reporter::conn_weird", + "Reporter::error", + "Reporter::fatal", + "Reporter::fatal_error_with_core", + "Reporter::file_weird", + "Reporter::flow_weird", + "Reporter::get_weird_sampling_duration", + "Reporter::get_weird_sampling_global_list", + "Reporter::get_weird_sampling_rate", + "Reporter::get_weird_sampling_threshold", + "Reporter::get_weird_sampling_whitelist", + "Reporter::info", + "Reporter::net_weird", + "Reporter::set_weird_sampling_duration", + "Reporter::set_weird_sampling_global_list", + "Reporter::set_weird_sampling_rate", + "Reporter::set_weird_sampling_threshold", + "Reporter::set_weird_sampling_whitelist", + "Reporter::warning", + "Spicy::__resource_usage", + "Spicy::__toggle_analyzer", + "Supervisor::__create", + "Supervisor::__destroy", + "Supervisor::__init_cluster", + "Supervisor::__is_supervised", + "Supervisor::__is_supervisor", + "Supervisor::__node", + "Supervisor::__restart", + "Supervisor::__status", + "Supervisor::__stem_pid", + "Telemetry::__collect_histogram_metrics", + "Telemetry::__collect_metrics", + "Telemetry::__dbl_counter_family", + "Telemetry::__dbl_counter_inc", + "Telemetry::__dbl_counter_metric_get_or_add", + "Telemetry::__dbl_counter_value", + "Telemetry::__dbl_gauge_dec", + "Telemetry::__dbl_gauge_family", + "Telemetry::__dbl_gauge_inc", + "Telemetry::__dbl_gauge_metric_get_or_add", + "Telemetry::__dbl_gauge_value", + "Telemetry::__dbl_histogram_family", + "Telemetry::__dbl_histogram_metric_get_or_add", + "Telemetry::__dbl_histogram_observe", + "Telemetry::__dbl_histogram_sum", + "Telemetry::__int_counter_family", + "Telemetry::__int_counter_inc", + "Telemetry::__int_counter_metric_get_or_add", + "Telemetry::__int_counter_value", + "Telemetry::__int_gauge_dec", + "Telemetry::__int_gauge_family", + "Telemetry::__int_gauge_inc", + "Telemetry::__int_gauge_metric_get_or_add", + "Telemetry::__int_gauge_value", + "Telemetry::__int_histogram_family", + "Telemetry::__int_histogram_metric_get_or_add", + "Telemetry::__int_histogram_observe", + "Telemetry::__int_histogram_sum", + "__init_primary_bifs", + "__init_secondary_bifs", + "active_file", + "addr_to_counts", + "addr_to_ptr_name", + "addr_to_subnet", + "all_set", + "anonymize_addr", + "any_set", + "backtrace", + "bare_mode", + "bloomfilter_add", + "bloomfilter_basic_init", + "bloomfilter_basic_init2", + "bloomfilter_clear", + "bloomfilter_counting_init", + "bloomfilter_decrement", + "bloomfilter_internal_state", + "bloomfilter_intersect", + "bloomfilter_lookup", + "bloomfilter_merge", + "bytestring_to_count", + "bytestring_to_double", + "bytestring_to_float", + "bytestring_to_hexstr", + "calc_next_rotate", + "cat", + "cat_sep", + "ceil", + "check_subnet", + "clean", + // "clear_table", + "close", + "community_id_v1", + "compress_path", + "connection_exists", + "continue_processing", + "convert_for_pattern", + "count_substr", + "count_to_double", + "count_to_port", + "count_to_v4_addr", + "counts_to_addr", + "current_analyzer", + "current_event_time", + "current_time", + "decode_base64", + "decode_base64_conn", + "decode_netbios_name", + "decode_netbios_name_type", + "disable_analyzer", + "disable_event_group", + "disable_module_events", + "do_profiling", + "double_to_count", + "double_to_int", + "double_to_interval", + "double_to_time", + "dump_current_packet", + "dump_packet", + "dump_rule_stats", + "edit", + "enable_event_group", + "enable_module_events", + "enable_raw_output", + "encode_base64", + "ends_with", + "entropy_test_add", + "entropy_test_finish", + "entropy_test_init", + "enum_names", + "enum_to_int", + "escape_string", + "exit", + "exp", + "file_magic", + "file_mode", + "file_size", + "filter_subnet_table", + "find_all", + "find_all_ordered", + "find_entropy", + "find_last", + "find_str", + "floor", + "flush_all", + "fmt", + "fmt_ftp_port", + "fnv1a32", + "from_json", + "generate_all_events", + "get_broker_stats", + "get_conn_stats", + "get_conn_transport_proto", + "get_contents_file", + "get_current_conn_bytes_threshold", + "get_current_conn_duration_threshold", + "get_current_conn_packets_threshold", + "get_current_packet", + "get_current_packet_header", + "get_dns_stats", + "get_event_handler_stats", + "get_event_stats", + "get_file_analysis_stats", + "get_file_name", + "get_gap_stats", + "get_identifier_comments", + "get_identifier_declaring_script", + "get_login_state", + "get_matcher_stats", + "get_net_stats", + "get_orig_seq", + "get_package_readme", + "get_port_transport_proto", + "get_proc_stats", + "get_reassembler_stats", + "get_record_field_comments", + "get_record_field_declaring_script", + "get_reporter_stats", + "get_resp_seq", + "get_script_comments", + "get_thread_stats", + "get_timer_stats", + "getenv", + "gethostname", + "getpid", + "global_container_footprints", + "global_ids", + "global_options", + "gsub", + "has_event_group", + "has_module_events", + "have_spicy", + "have_spicy_analyzers", + "haversine_distance", + "hexdump", + "hexstr_to_bytestring", + "hll_cardinality_add", + "hll_cardinality_copy", + "hll_cardinality_estimate", + "hll_cardinality_init", + "hll_cardinality_merge_into", + "hrw_weight", + "identify_data", + "install_dst_addr_filter", + "install_dst_net_filter", + "install_src_addr_filter", + "install_src_net_filter", + "int_to_count", + "int_to_double", + "interval_to_double", + "is_alnum", + "is_alpha", + "is_ascii", + "is_file_analyzer", + "is_icmp_port", + "is_local_interface", + "is_num", + "is_packet_analyzer", + "is_processing_suspended", + "is_protocol_analyzer", + "is_remote_event", + "is_tcp_port", + "is_udp_port", + "is_v4_addr", + "is_v4_subnet", + "is_v6_addr", + "is_v6_subnet", + "is_valid_ip", + "join_string_set", + "join_string_vec", + "levenshtein_distance", + "ljust", + "ln", + "load_CPP", + "log10", + "log2", + "lookup_ID", + "lookup_addr", + "lookup_autonomous_system", + "lookup_connection", + "lookup_hostname", + "lookup_hostname_txt", + "lookup_location", + "lstrip", + "mask_addr", + "match_signatures", + "matching_subnets", + "md5_hash", + "md5_hash_finish", + "md5_hash_init", + "md5_hash_update", + "md5_hmac", + "mkdir", + "mmdb_open_asn_db", + "mmdb_open_location_db", + "network_time", + "open", + "open_for_append", + "order", + "packet_source", + "paraglob_equals", + "paraglob_init", + "paraglob_match", + "parse_distinguished_name", + "parse_eftp_port", + "parse_ftp_epsv", + "parse_ftp_pasv", + "parse_ftp_port", + "piped_exec", + "port_to_count", + "pow", + "preserve_prefix", + "preserve_subnet", + "print_raw", + "ptr_name_to_addr", + "rand", + "raw_bytes_to_v4_addr", + "raw_bytes_to_v6_addr", + "reading_live_traffic", + "reading_traces", + "record_fields", + "record_type_to_vector", + "remask_addr", + "remove_prefix", + "remove_suffix", + "rename", + // "resize", + "reverse", + "rfind_str", + "rjust", + "rmdir", + "rotate_file", + "rotate_file_by_name", + "routing0_data_to_addrs", + "rstrip", + "safe_shell_quote", + "same_object", + "sct_verify", + "set_buf", + "set_contents_file", + "set_current_conn_bytes_threshold", + "set_current_conn_duration_threshold", + "set_current_conn_packets_threshold", + "set_file_handle", + "set_inactivity_timeout", + "set_keys", + "set_login_state", + "set_network_time", + "set_record_packets", + "set_secret", + "set_ssl_established", + "setenv", + "sha1_hash", + "sha1_hash_finish", + "sha1_hash_init", + "sha1_hash_update", + "sha256_hash", + "sha256_hash_finish", + "sha256_hash_init", + "sha256_hash_update", + "skip_further_processing", + "skip_http_entity_data", + "skip_smtp_data", + // "sort", + "split_string", + "split_string1", + "split_string_all", + "split_string_n", + "sqrt", + "srand", + "starts_with", + "str_smith_waterman", + "str_split_indices", + "strcmp", + "strftime", + "string_cat", + "string_fill", + "string_to_ascii_hex", + "string_to_pattern", + "strip", + "strptime", + "strstr", + "sub", + "sub_bytes", + "subnet_to_addr", + "subnet_width", + "subst_string", + "suspend_processing", + "swap_case", + "syslog", + "system", + "system_env", + "table_keys", + "table_values", + "terminate", + "time_to_double", + "to_addr", + "to_count", + "to_double", + "to_int", + "to_json", + "to_lower", + "to_port", + "to_string_literal", + "to_subnet", + "to_title", + "to_upper", + "topk_add", + "topk_count", + "topk_epsilon", + "topk_get_top", + "topk_init", + "topk_merge", + "topk_merge_prune", + "topk_size", + "topk_sum", + "type_aliases", + "type_name", + "unescape_URI", + "uninstall_dst_addr_filter", + "uninstall_dst_net_filter", + "uninstall_src_addr_filter", + "uninstall_src_net_filter", + "unique_id", + "unique_id_from", + "unlink", + "uuid_to_string", + "val_footprint", + "write_file", + "x509_check_cert_hostname", + "x509_check_hostname", + "x509_from_der", + "x509_get_certificate_string", + "x509_issuer_name_hash", + "x509_ocsp_verify", + "x509_parse", + "x509_set_certificate_cache", + "x509_set_certificate_cache_hit_callback", + "x509_spki_hash", + "x509_subject_name_hash", + "x509_verify", + "zeek_args", + "zeek_is_terminating", + "zeek_version", + "zfill", +}; + +bool is_side_effect_free(std::string f) + { + return side_effects_free_BiFs.count(f) > 0; + } + +} // namespace zeek::detail diff --git a/src/script_opt/FuncInfo.h b/src/script_opt/FuncInfo.h new file mode 100644 index 0000000000..ce0d1ea390 --- /dev/null +++ b/src/script_opt/FuncInfo.h @@ -0,0 +1,17 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Utility functions that return information about Zeek functions. Currently +// this is limited to information about whether BiFs are side-effect-free +// (from a Zeek scripting perspective), but could be expanded in the future +// to include information about Zeek script functions, idempotency, and the +// like. + +#pragma once + +#include "zeek/Func.h" + +namespace zeek::detail { + +extern bool is_side_effect_free(std::string f); + +} // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index bd4990113d..272148b15b 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -8,6 +8,7 @@ #include "zeek/Desc.h" #include "zeek/Func.h" #include "zeek/Stmt.h" +#include "zeek/script_opt/FuncInfo.h" #include "zeek/script_opt/IDOptInfo.h" namespace zeek::detail { @@ -147,6 +148,15 @@ TraversalCode ProfileFunc::PreStmt(const Stmt* s) { expr_switches.insert(sw); } break; + case STMT_ADD: + case STMT_DELETE: { + auto ad_stmt = static_cast(s); + auto ad_e = ad_stmt->StmtExpr(); + auto& lhs_t = ad_e->GetOp1()->GetType(); + if ( lhs_t->Tag() == TYPE_TABLE ) + aggr_mods.insert(lhs_t.get()); + } break; + default: break; } @@ -199,16 +209,18 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { break; } - case EXPR_FIELD: - if ( abs_rec_fields ) { - auto f = e->AsFieldExpr()->Field(); + case EXPR_FIELD: { + auto f = e->AsFieldExpr()->Field(); + if ( abs_rec_fields ) addl_hashes.push_back(p_hash(f)); - } else { auto fn = e->AsFieldExpr()->FieldName(); addl_hashes.push_back(p_hash(fn)); } - break; + aggr_refs.insert(std::make_pair(e->GetOp1()->GetType().get(), f)); + } + + break; case EXPR_HAS_FIELD: if ( abs_rec_fields ) { @@ -221,32 +233,62 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { } break; + case EXPR_INDEX: { + auto lhs_t = e->GetOp1()->GetType(); + if ( lhs_t->Tag() == TYPE_TABLE ) + aggr_refs.insert(std::make_pair(lhs_t.get(), 0)); + } break; + case EXPR_INCR: case EXPR_DECR: case EXPR_ADD_TO: case EXPR_REMOVE_FROM: case EXPR_ASSIGN: { - if ( e->GetOp1()->Tag() != EXPR_REF ) - // this isn't a direct assignment + auto lhs = e->GetOp1(); + + if ( lhs->Tag() == EXPR_REF ) + lhs = lhs->GetOp1(); + + else if ( e->Tag() == EXPR_ASSIGN ) + // This isn't a direct assignment, but instead an overloaded + // use of "=" such as in a table constructor. break; - auto lhs = e->GetOp1()->GetOp1(); - if ( lhs->Tag() != EXPR_NAME ) - break; - - auto id = lhs->AsNameExpr()->Id(); - TrackAssignment(id); - - if ( e->Tag() == EXPR_ASSIGN ) { - auto a_e = static_cast(e); - auto& av = a_e->AssignVal(); - if ( av ) - // This is a funky "local" assignment - // inside a when clause. - when_locals.insert(id); + auto lhs_t = lhs->GetType(); + if ( IsAggr(lhs_t->Tag()) ) { + // Determine which aggregate is being modified. For an + // assignment "a[b] = aggr", it's not a[b]'s type but rather + // a's type. However, for any of the others, e.g. "a[b] -= aggr" + // it is a[b]'s type. + if ( e->Tag() == EXPR_ASSIGN ) { + // The following might be nil for an assignment like + // "aggr = new_val". + auto lhs_parent = lhs->GetOp1(); + if ( lhs_parent ) + aggr_mods.insert(lhs_parent->GetType().get()); + } + else + // Operation directly modifies LHS. + aggr_mods.insert(lhs_t.get()); } - break; - } + + if ( lhs->Tag() == EXPR_NAME ) { + auto id = lhs->AsNameExpr()->Id(); + TrackAssignment(id); + + if ( e->Tag() == EXPR_ASSIGN ) { + auto a_e = static_cast(e); + auto& av = a_e->AssignVal(); + if ( av ) + // This is a funky "local" assignment + // inside a when clause. + when_locals.insert(id); + } + break; + } + + + } break; case EXPR_CALL: { auto c = e->AsCallExpr(); @@ -272,8 +314,8 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { auto func_vf = func_v->AsFunc(); if ( func_vf->GetKind() == Func::SCRIPT_FUNC ) { - auto bf = static_cast(func_vf); - script_calls.insert(bf); + auto sf = static_cast(func_vf); + script_calls.insert(sf); } else BiF_globals.insert(func); @@ -329,8 +371,9 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { // In general, we don't want to recurse into the body. // However, we still want to *profile* it so we can // identify calls within it. - ProfileFunc body_pf(l->Ingredients()->Body().get(), false); - script_calls.insert(body_pf.ScriptCalls().begin(), body_pf.ScriptCalls().end()); + auto pf = std::make_shared(l->Ingredients()->Body().get(), false); + // func_profs[l->PrimaryFunc()] = pf; + script_calls.insert(pf->ScriptCalls().begin(), pf->ScriptCalls().end()); return TC_ABORTSTMT; } @@ -340,7 +383,7 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { const auto& attrs = sc->GetAttrs(); if ( attrs ) - constructor_attrs.insert(attrs.get()); + constructor_attrs[attrs.get()] = sc->GetType(); } break; case EXPR_TABLE_CONSTRUCTOR: { @@ -348,7 +391,17 @@ TraversalCode ProfileFunc::PreExpr(const Expr* e) { const auto& attrs = tc->GetAttrs(); if ( attrs ) - constructor_attrs.insert(attrs.get()); + constructor_attrs[attrs.get()] = tc->GetType(); + } break; + + case EXPR_RECORD_COERCE: + case EXPR_TABLE_COERCE: { + auto res_type = e->GetType().get(); + auto orig_type = e->GetOp1()->GetType().get(); + if ( type_aliases.count(res_type) == 0 ) + type_aliases[orig_type] = {res_type}; + else + type_aliases[orig_type].insert(res_type); } break; default: break; @@ -395,6 +448,9 @@ void ProfileFunc::TrackAssignment(const ID* id) { ++assignees[id]; else assignees[id] = 1; + + if ( id->IsGlobal() || captures.count(id) > 0 ) + non_local_assignees.insert(id); } ProfileFuncs::ProfileFuncs(std::vector& funcs, is_compilable_pred pred, bool _full_record_hashes) { @@ -432,6 +488,8 @@ ProfileFuncs::ProfileFuncs(std::vector& funcs, is_compilable_pred pred // Computing those hashes could have led to traversals that // create more pending expressions to analyze. } while ( ! pending_exprs.empty() ); + + ComputeSideEffects(); } void ProfileFuncs::MergeInProfile(ProfileFunc* pf) { @@ -460,7 +518,7 @@ void ProfileFuncs::MergeInProfile(ProfileFunc* pf) { auto& attrs = g->GetAttrs(); if ( attrs ) - AnalyzeAttrs(attrs.get()); + AnalyzeAttrs(attrs.get(), t.get()); } constants.insert(pf->Constants().begin(), pf->Constants().end()); @@ -475,7 +533,13 @@ void ProfileFuncs::MergeInProfile(ProfileFunc* pf) { } for ( auto& a : pf->ConstructorAttrs() ) - AnalyzeAttrs(a); + AnalyzeAttrs(a.first, a.second.get()); + + for ( auto& ta : pf->TypeAliases() ) { + if ( type_aliases.count(ta.first) == 0 ) + type_aliases[ta.first] = std::set{}; + type_aliases[ta.first].insert(ta.second.begin(), ta.second.end()); + } } void ProfileFuncs::TraverseValue(const ValPtr& v) { @@ -579,8 +643,12 @@ void ProfileFuncs::ComputeBodyHashes(std::vector& funcs) { if ( ! f.ShouldSkip() ) ComputeProfileHash(f.ProfilePtr()); - for ( auto& l : lambdas ) - ComputeProfileHash(ExprProf(l)); + for ( auto& l : lambdas ) { + auto pf = ExprProf(l); + printf("adding lambda profile for %s (%p)\n", l->PrimaryFunc()->Name(), l->PrimaryFunc().get()); + func_profs[l->PrimaryFunc().get()] = pf; + ComputeProfileHash(pf); + } } void ProfileFuncs::ComputeProfileHash(std::shared_ptr pf) { @@ -713,7 +781,7 @@ p_hash_type ProfileFuncs::HashType(const Type* t) { if ( f->attrs ) { if ( do_hash ) h = merge_p_hashes(h, HashAttrs(f->attrs)); - AnalyzeAttrs(f->attrs.get()); + AnalyzeAttrs(f->attrs.get(), t, i); } } } break; @@ -731,8 +799,24 @@ p_hash_type ProfileFuncs::HashType(const Type* t) { auto ft = t->AsFuncType(); auto flv = ft->FlavorString(); h = merge_p_hashes(h, p_hash(flv)); + + // We deal with the parameters individually, rather than just + // recursing into the RecordType that's used (for convenience) + // to represent them. We do so because their properties are + // somewhat different - in particular, an &default on a parameter + // field is resolved in the context of the caller, not the + // function itself, and so we don't want to track those as + // attributes associated with the function body's execution. h = merge_p_hashes(h, p_hash("params")); - h = merge_p_hashes(h, HashType(ft->Params())); + auto params = ft->Params()->Types(); + + if ( params ) { + h = merge_p_hashes(h, p_hash(params->length())); + + for ( auto p : *params ) + h = merge_p_hashes(h, HashType(p->type)); + } + h = merge_p_hashes(h, p_hash("func-yield")); h = merge_p_hashes(h, HashType(ft->Yield())); } break; @@ -803,18 +887,269 @@ p_hash_type ProfileFuncs::HashAttrs(const AttributesPtr& Attrs) { return h; } -void ProfileFuncs::AnalyzeAttrs(const Attributes* Attrs) { - auto attrs = Attrs->GetAttrs(); +extern const char* attr_name(AttrTag t); - for ( const auto& a : attrs ) { - const Expr* e = a->GetExpr().get(); +void ProfileFuncs::AnalyzeAttrs(const Attributes* attrs, const Type* t, int field) { + for ( const auto& a : attrs->GetAttrs() ) { + auto& e = a->GetExpr(); - if ( e ) { - pending_exprs.push_back(e); - if ( e->Tag() == EXPR_LAMBDA ) - lambdas.insert(e->AsLambdaExpr()); + if ( ! e ) + continue; + + pending_exprs.push_back(e.get()); + + auto prev_ea = expr_attrs.find(a.get()); + if ( prev_ea == expr_attrs.end() ) + expr_attrs[a.get()] = {std::pair{t, field}}; + else { + // Add it if new. This is rare, but can arise due to attributes + // being shared for example from initializers with a variable + // itself. + bool found = false; + for ( auto ea : prev_ea->second ) + if ( ea.first == t && ea.second == field ) { + found = true; + break; + } + + if ( ! found ) + prev_ea->second.emplace_back(std::pair{t, field}); } + + if ( e->Tag() == EXPR_LAMBDA ) + lambdas.insert(e->AsLambdaExpr()); + +#if 0 + // If this is an attribute that can be triggered by statement/expression + // execution, then we need to determine any modifications it might make + // to non-local state. + auto at = a->Tag(); + if ( at != ATTR_DEFAULT && at != ATTR_DEFAULT_INSERT && at != ATTR_ON_CHANGE ) + continue; + + if ( ! CouldHaveSideEffects(e) ) + continue; + + std::vector changes; + GetExprChangesToLocalState(e, changes); + + if ( ! changes.empty() ) + printf("problematic expr: %s\n", obj_desc(e.get()).c_str()); +#endif } } +void ProfileFuncs::ComputeSideEffects() { + // Computing side effects is an iterative process, because whether + // a given expression has a side effect can depend on whether it + // includes accesses to types that have side effects. + + // Step one: assemble a candidate pool of attributes to assess. + for ( auto& ea : expr_attrs ) { + // Is this an attribute that can be triggered by + // statement/expression execution? + auto a = ea.first; + auto at = a->Tag(); + if ( at == ATTR_DEFAULT || at == ATTR_DEFAULT_INSERT || at == ATTR_ON_CHANGE ) { + // Weed out very-common-and-completely-safe expressions. + if ( DefinitelyHasNoSideEffects(a->GetExpr()) ) + continue; + + printf("adding candidate %s\n", obj_desc(a).c_str()); + candidates.insert(a); + } + } + + std::vector> side_effects; + + while ( ! candidates.empty() ) { + std::unordered_set made_decision; + + for ( auto c : candidates ) { + IDSet non_local_ids; + std::unordered_set aggrs; + bool is_unknown = false; + + if ( ! AssessSideEffects(c->GetExpr(), non_local_ids, aggrs, is_unknown) ) + // Can't make a decision yet. + continue; + + made_decision.insert(c); + auto& effects_vec = attr_side_effects[c] = std::vector>{}; + + if ( non_local_ids.empty() && aggrs.empty() && ! is_unknown ) + // Definitely no side effects. + continue; + + // Track the associated side effects. + auto at = c->Tag() == ATTR_ON_CHANGE ? SideEffectsOp::WRITE : SideEffectsOp::READ; + for ( auto& ea : expr_attrs[c] ) { + auto seo = std::make_shared(at, ea.first, ea.second); + seo->AddModNonGlobal(non_local_ids); + seo->AddModAggrs(aggrs); + + if ( is_unknown ) + seo->SetUnknownChanges(); + + effects_vec.push_back(seo); + side_effects.push_back(std::move(seo)); + } + } + + ASSERT(! made_decision.empty()); + for ( auto md : made_decision ) + candidates.erase(md); + } +} + +bool ProfileFuncs::DefinitelyHasNoSideEffects(const ExprPtr& e) const { + if ( e->Tag() == EXPR_CONST || e->Tag() == EXPR_VECTOR_CONSTRUCTOR ) + return true; + + if ( e->Tag() == EXPR_NAME ) + return e->GetType()->Tag() != TYPE_FUNC; + + auto ep = expr_profs.find(e.get()); + ASSERT(ep != expr_profs.end()); + + const auto& pf = ep->second; + + if ( ! pf->NonLocalAssignees().empty() || ! pf->AggrRefs().empty() || ! pf->AggrMods().empty() || + ! pf->ScriptCalls().empty() ) + return false; + + for ( auto& b : pf->BiFGlobals() ) + if ( ! is_side_effect_free(b->Name()) ) + return false; + + return true; +} + +std::vector ProfileFuncs::AssociatedAttrs(const Type* t, int f) { + std::vector assoc_attrs; + + for ( auto c : candidates ) + for ( auto& ea : expr_attrs[c] ) + for ( auto ta : type_aliases[ea.first] ) + if ( same_type(t, ta) && f == ea.second ) { + assoc_attrs.push_back(c); + break; + } + + return assoc_attrs; +} + +bool ProfileFuncs::AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, std::unordered_set& aggrs, + bool& is_unknown) { + std::shared_ptr pf; + + if ( e->Tag() == EXPR_NAME && e->GetType()->Tag() == TYPE_FUNC ) { + // This occurs when the expression is itself a function name, and + // in an attribute context indicates an implicit call. + auto fid = e->AsNameExpr()->Id(); + auto fv = fid->GetVal(); + + if ( ! fv || ! fid->IsConst() ) { + // The value is unavailable (likely a bug), or might change + // at run-time. + is_unknown = true; + return true; + } + + auto func = fv->AsFunc(); + if ( func->GetKind() == Func::BUILTIN_FUNC ) { + if ( ! is_side_effect_free(func->Name()) ) + is_unknown = true; + return true; + } + + auto sf = static_cast(func); + if ( func_profs.count(sf) == 0 ) { + printf("no function profile for %s / %s (%p)\n", obj_desc(e.get()).c_str(), sf->Name(), sf); + is_unknown = true; + return true; + } + + pf = func_profs[sf]; + } + else { + ASSERT(expr_profs.count(e.get()) > 0); + pf = expr_profs[e.get()]; + } + + return AssessSideEffects(pf.get(), non_local_ids, aggrs, is_unknown); +} + +bool ProfileFuncs::AssessSideEffects(const ProfileFunc* pf, IDSet& non_local_ids, + std::unordered_set& aggrs, bool& is_unknown) { + if ( pf->DoesIndirectCalls() ) + is_unknown = true; + + for ( auto& b : pf->BiFGlobals() ) + if ( ! is_side_effect_free(b->Name()) ) { + is_unknown = true; + break; + } + + IDSet nla; + std::unordered_set mod_aggrs; + + for ( auto& a : pf->NonLocalAssignees() ) + nla.insert(a); + + for ( auto& r : pf->AggrRefs() ) + if ( ! AssessAggrEffects(SideEffectsOp::READ, r.first, r.second, nla, mod_aggrs, is_unknown) ) + return is_unknown; + + for ( auto& a : pf->AggrMods() ) + if ( ! AssessAggrEffects(SideEffectsOp::WRITE, a, 0, nla, mod_aggrs, is_unknown) ) + return is_unknown; + + for ( auto& f : pf->ScriptCalls() ) { + auto pff = func_profs[f]; + if ( active_func_profiles.count(pff) > 0 ) + continue; + + active_func_profiles.insert(pff); + auto a = AssessSideEffects(pff.get(), nla, mod_aggrs, is_unknown); + active_func_profiles.erase(pff); + + if ( ! a ) + return is_unknown; + } + + non_local_ids.insert(nla.begin(), nla.end()); + aggrs.insert(mod_aggrs.begin(), mod_aggrs.end()); + + return true; +} + +bool ProfileFuncs::AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, int f, IDSet& non_local_ids, + std::unordered_set& aggrs, bool& is_unknown) { + auto assoc_attrs = AssociatedAttrs(t, f); + + for ( auto a : assoc_attrs ) { + auto ase = attr_side_effects.find(a); + if ( ase == attr_side_effects.end() ) + return false; + + for ( auto& se : ase->second ) { + if ( se->GetAccessType() != access ) + continue; + + if ( se->HasUnknownChanges() ) { + is_unknown = true; + return true; + } + + for ( auto a : se->ModAggrs() ) + aggrs.insert(a); + for ( auto nl : se->ModNonLocals() ) + non_local_ids.insert(nl); + } + } + + return true; +} + } // namespace zeek::detail diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index cf34b1b6df..399cfd1483 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -37,6 +37,7 @@ #include "zeek/Stmt.h" #include "zeek/Traverse.h" #include "zeek/script_opt/ScriptOpt.h" +#include "zeek/script_opt/SideEffects.h" namespace zeek::detail { @@ -93,6 +94,9 @@ public: const IDSet& WhenLocals() const { return when_locals; } const IDSet& Params() const { return params; } const std::unordered_map& Assignees() const { return assignees; } + const std::unordered_set& NonLocalAssignees() const { return non_local_assignees; } + const auto& AggrRefs() const { return aggr_refs; } + const auto& AggrMods() const { return aggr_mods; } const IDSet& Inits() const { return inits; } const std::vector& Stmts() const { return stmts; } const std::vector& Exprs() const { return exprs; } @@ -102,14 +106,15 @@ public: const std::vector& OrderedIdentifiers() const { return ordered_ids; } const std::unordered_set& UnorderedTypes() const { return types; } const std::vector& OrderedTypes() const { return ordered_types; } + const auto& TypeAliases() const { return type_aliases; } const std::unordered_set& ScriptCalls() const { return script_calls; } const IDSet& BiFGlobals() const { return BiF_globals; } const std::unordered_set& Events() const { return events; } - const std::unordered_set& ConstructorAttrs() const { return constructor_attrs; } + const std::unordered_map& ConstructorAttrs() const { return constructor_attrs; } const std::unordered_set& ExprSwitches() const { return expr_switches; } const std::unordered_set& TypeSwitches() const { return type_switches; } - bool DoesIndirectCalls() { return does_indirect_calls; } + bool DoesIndirectCalls() const { return does_indirect_calls; } int NumParams() const { return num_params; } int NumLambdas() const { return lambdas.size(); } @@ -175,6 +180,12 @@ protected: // captured in "inits". std::unordered_map assignees; + // ### + std::unordered_set non_local_assignees; + + std::set> aggr_refs; + std::unordered_set aggr_mods; + // Same for locals seen in initializations, so we can find, // for example, unused aggregates. IDSet inits; @@ -211,9 +222,13 @@ protected: // the same type can be seen numerous times. std::unordered_set types; + std::unordered_map> type_aliases; + // The same, but in a deterministic order, with duplicates removed. std::vector ordered_types; + std::unordered_set modified_aggrs; + // Script functions that this script calls. Includes calls made // by lambdas and when bodies, as the goal is to identify recursion. std::unordered_set script_calls; @@ -229,7 +244,7 @@ protected: std::unordered_set events; // Attributes seen in set or table constructors. - std::unordered_set constructor_attrs; + std::unordered_map constructor_attrs; // Switch statements with either expression cases or type cases. std::unordered_set expr_switches; @@ -286,11 +301,17 @@ public: const std::unordered_set& Lambdas() const { return lambdas; } const std::unordered_set& Events() const { return events; } - std::shared_ptr FuncProf(const ScriptFunc* f) { return func_profs[f]; } + // ### Might not be needed if the lambda is found in ExprProf. + const auto& FuncProfs() const { return func_profs; } - // This is only externally germane for LambdaExpr's. + // Profiles associated with LambdaExpr's and expressions appearing in + // attributes. std::shared_ptr ExprProf(const Expr* e) { return expr_profs[e]; } + // Expression-valued attributes that appear in the context of different + // types. + const auto& ExprAttrs() const { return expr_attrs; } + // Returns the "representative" Type* for the hash associated with // the parameter (which might be the parameter itself). const Type* TypeRep(const Type* orig) { @@ -332,8 +353,25 @@ protected: void ComputeProfileHash(std::shared_ptr pf); // Analyze the expressions and lambdas appearing in a set of - // attributes. - void AnalyzeAttrs(const Attributes* Attrs); + // attributes, in the context of a given type. "field" is only + // meaningful if "t" is a RecordType. + void AnalyzeAttrs(const Attributes* attrs, const Type* t, int field = 0); + + void ComputeSideEffects(); + + bool DefinitelyHasNoSideEffects(const ExprPtr& e) const; + + std::vector AssociatedAttrs(const Type* t, int f); + + // ### False on can't-make-decision-yet + bool AssessSideEffects(const ExprPtr& e, IDSet& non_local_ids, std::unordered_set& types, + bool& is_unknown); + bool AssessSideEffects(const ProfileFunc* e, IDSet& non_local_ids, std::unordered_set& types, + bool& is_unknown); + + // ### const? etc. + bool AssessAggrEffects(SideEffectsOp::AccessType access, const Type* t, int f, IDSet& non_local_ids, + std::unordered_set& aggrs, bool& is_unknown); // Globals seen across the functions, other than those solely seen // as the function being called in a call. @@ -357,6 +395,9 @@ protected: // Maps a type to its representative (which might be itself). std::unordered_map type_to_rep; + // ### + std::unordered_map> type_aliases; + // Script functions that get called. std::unordered_set script_calls; @@ -369,7 +410,7 @@ protected: // Names of generated events. std::unordered_set events; - // Maps script functions to associated profiles. This isn't + // ### Maps script functions to associated profiles. This isn't // actually well-defined in the case of event handlers and hooks, // which can have multiple bodies. However, the need for this // is temporary (it's for skipping compilation of functions that @@ -381,9 +422,22 @@ protected: // management. std::unordered_map> expr_profs; + // Maps expression-valued attributes to a collection of types in which + // the attribute appears. For records, the mapping also includes the + // field offset in the record. + std::unordered_map>> expr_attrs; + + std::unordered_map>> attr_side_effects; + // These remaining member variables are only used internally, // not provided via accessors: + // ### + std::unordered_set candidates; + + // ### + std::unordered_set> active_func_profiles; + // Maps types to their hashes. std::unordered_map type_hashes; @@ -400,6 +454,10 @@ protected: // record attributes. std::vector pending_exprs; + // ### + std::vector> side_effects_ops; + + // Whether the hashes for extended records should cover their final, // full form, or only their original fields. bool full_record_hashes; diff --git a/src/script_opt/Reduce.cc b/src/script_opt/Reduce.cc index 580f32806a..de1b144d12 100644 --- a/src/script_opt/Reduce.cc +++ b/src/script_opt/Reduce.cc @@ -911,6 +911,31 @@ TraversalCode CSE_ValidityChecker::PreExpr(const Expr* e) { case EXPR_CALL: if ( sensitive_to_calls ) { + auto c = e->AsCallExpr(); + auto func = c->Func(); + std::string desc; + if ( func->Tag() == EXPR_NAME ) { + auto f = func->AsNameExpr()->Id(); + if ( f->IsGlobal() ) { + auto func_v = f->GetVal(); + if ( func_v ) { + auto func_vf = func_v->AsFunc(); + + if ( func_vf->GetKind() == Func::SCRIPT_FUNC ) + desc = "script"; + else + desc = "BiF"; + } + else + desc = "missing"; + } + else + desc = "indirect"; + } + else + desc = "compound-indirect"; + + // printf("call sensitivity: %s %s\n", desc.c_str(), obj_desc(e).c_str()); is_valid = false; return TC_ABORTALL; } diff --git a/src/script_opt/SideEffects.h b/src/script_opt/SideEffects.h new file mode 100644 index 0000000000..8591e6fa81 --- /dev/null +++ b/src/script_opt/SideEffects.h @@ -0,0 +1,52 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +// Analyses regarding operations where non-locals or aggregates can be modified +// indirectly, in support of ensuring that after such an operation, script +// optimization doesn't use a stale version of the non-local/aggregate. + +#pragma once + +#include "zeek/ID.h" + +namespace zeek::detail { + +// Describes an operation for which some forms of access can lead to state +// modifications. +class SideEffectsOp { +public: + // ### remove NONE? + enum AccessType { NONE, READ, WRITE }; + + // SideEffectsOp() : access(NONE), type(nullptr) {} + // SideEffectsOp(AccessType at, const Type* t) : access(at), type(t) {} + SideEffectsOp(AccessType at, const Type* t, int f) : access(at), type(t), field(f) {} + + auto GetAccessType() const { return access; } + bool NoSideEffects() const { return access == NONE; } + bool OnReadAccess() const { return access == READ; } + bool OnWriteAccess() const { return access == WRITE; } + + const Type* GetType() const { return type; } + auto Field() const { return field; } + + void SetUnknownChanges() { has_unknown_changes = true; } + bool HasUnknownChanges() const { return has_unknown_changes; } + + void AddModNonGlobal(std::unordered_set ids) { mod_non_locals.insert(ids.begin(), ids.end()); } + void AddModAggrs(std::unordered_set types) { mod_aggrs.insert(types.begin(), types.end()); } + + const auto& ModNonLocals() const { return mod_non_locals; } + const auto& ModAggrs() const { return mod_aggrs; } + +private: + AccessType access; + const Type* type; // type for which some operations alter state + std::optional field; // field, if the type is a record + + std::unordered_set mod_non_locals; + std::unordered_set mod_aggrs; + + bool has_unknown_changes = false; +}; + +} // namespace zeek::detail diff --git a/src/script_opt/ZAM/maint/BiFs.list b/src/script_opt/ZAM/maint/BiFs.list new file mode 100644 index 0000000000..d07fcb33b5 --- /dev/null +++ b/src/script_opt/ZAM/maint/BiFs.list @@ -0,0 +1,543 @@ +Analyzer::__disable_all_analyzers +Analyzer::__disable_analyzer +Analyzer::__enable_analyzer +Analyzer::__has_tag +Analyzer::__name +Analyzer::__register_for_port +Analyzer::__schedule_analyzer +Analyzer::__tag +Broker::__append +Broker::__auto_publish +Broker::__auto_unpublish +Broker::__clear +Broker::__close +Broker::__create_clone +Broker::__create_master +Broker::__data +Broker::__data_type +Broker::__decrement +Broker::__erase +Broker::__exists +Broker::__flush_logs +Broker::__forward +Broker::__get +Broker::__get_index_from_value +Broker::__increment +Broker::__insert_into_set +Broker::__insert_into_table +Broker::__is_closed +Broker::__keys +Broker::__listen +Broker::__node_id +Broker::__opaque_clone_through_serialization +Broker::__peer +Broker::__peer_no_retry +Broker::__peers +Broker::__pop +Broker::__publish_id +Broker::__push +Broker::__put +Broker::__put_unique +Broker::__record_assign +Broker::__record_create +Broker::__record_iterator +Broker::__record_iterator_last +Broker::__record_iterator_next +Broker::__record_iterator_value +Broker::__record_lookup +Broker::__record_size +Broker::__remove_from +Broker::__set_clear +Broker::__set_contains +Broker::__set_create +Broker::__set_insert +Broker::__set_iterator +Broker::__set_iterator_last +Broker::__set_iterator_next +Broker::__set_iterator_value +Broker::__set_metrics_export_endpoint_name +Broker::__set_metrics_export_interval +Broker::__set_metrics_export_prefixes +Broker::__set_metrics_export_topic +Broker::__set_metrics_import_topics +Broker::__set_remove +Broker::__set_size +Broker::__store_name +Broker::__subscribe +Broker::__table_clear +Broker::__table_contains +Broker::__table_create +Broker::__table_insert +Broker::__table_iterator +Broker::__table_iterator_last +Broker::__table_iterator_next +Broker::__table_iterator_value +Broker::__table_lookup +Broker::__table_remove +Broker::__table_size +Broker::__unpeer +Broker::__unsubscribe +Broker::__vector_clear +Broker::__vector_create +Broker::__vector_insert +Broker::__vector_iterator +Broker::__vector_iterator_last +Broker::__vector_iterator_next +Broker::__vector_iterator_value +Broker::__vector_lookup +Broker::__vector_remove +Broker::__vector_replace +Broker::__vector_size +Broker::make_event +Broker::publish +Cluster::publish_hrw +Cluster::publish_rr +FileExtract::__set_limit +Files::__add_analyzer +Files::__analyzer_enabled +Files::__analyzer_name +Files::__disable_analyzer +Files::__disable_reassembly +Files::__enable_analyzer +Files::__enable_reassembly +Files::__file_exists +Files::__lookup_file +Files::__remove_analyzer +Files::__set_reassembly_buffer +Files::__set_timeout_interval +Files::__stop +Input::__create_analysis_stream +Input::__create_event_stream +Input::__create_table_stream +Input::__force_update +Input::__remove_stream +Log::__add_filter +Log::__create_stream +Log::__disable_stream +Log::__enable_stream +Log::__flush +Log::__remove_filter +Log::__remove_stream +Log::__set_buf +Log::__write +Option::any_set_to_any_vec +Option::set +Option::set_change_handler +PacketAnalyzer::GTPV1::remove_gtpv1_connection +PacketAnalyzer::TEREDO::remove_teredo_connection +PacketAnalyzer::__disable_analyzer +PacketAnalyzer::__enable_analyzer +PacketAnalyzer::__set_ignore_checksums_nets +PacketAnalyzer::register_packet_analyzer +PacketAnalyzer::register_protocol_detection +PacketAnalyzer::try_register_packet_analyzer_by_name +Pcap::error +Pcap::findalldevs +Pcap::get_filter_state +Pcap::get_filter_state_string +Pcap::install_pcap_filter +Pcap::precompile_pcap_filter +Reporter::conn_weird +Reporter::error +Reporter::fatal +Reporter::fatal_error_with_core +Reporter::file_weird +Reporter::flow_weird +Reporter::get_weird_sampling_duration +Reporter::get_weird_sampling_global_list +Reporter::get_weird_sampling_rate +Reporter::get_weird_sampling_threshold +Reporter::get_weird_sampling_whitelist +Reporter::info +Reporter::net_weird +Reporter::set_weird_sampling_duration +Reporter::set_weird_sampling_global_list +Reporter::set_weird_sampling_rate +Reporter::set_weird_sampling_threshold +Reporter::set_weird_sampling_whitelist +Reporter::warning +Spicy::__resource_usage +Spicy::__toggle_analyzer +Supervisor::__create +Supervisor::__destroy +Supervisor::__init_cluster +Supervisor::__is_supervised +Supervisor::__is_supervisor +Supervisor::__node +Supervisor::__restart +Supervisor::__status +Supervisor::__stem_pid +Telemetry::__collect_histogram_metrics +Telemetry::__collect_metrics +Telemetry::__dbl_counter_family +Telemetry::__dbl_counter_inc +Telemetry::__dbl_counter_metric_get_or_add +Telemetry::__dbl_counter_value +Telemetry::__dbl_gauge_dec +Telemetry::__dbl_gauge_family +Telemetry::__dbl_gauge_inc +Telemetry::__dbl_gauge_metric_get_or_add +Telemetry::__dbl_gauge_value +Telemetry::__dbl_histogram_family +Telemetry::__dbl_histogram_metric_get_or_add +Telemetry::__dbl_histogram_observe +Telemetry::__dbl_histogram_sum +Telemetry::__int_counter_family +Telemetry::__int_counter_inc +Telemetry::__int_counter_metric_get_or_add +Telemetry::__int_counter_value +Telemetry::__int_gauge_dec +Telemetry::__int_gauge_family +Telemetry::__int_gauge_inc +Telemetry::__int_gauge_metric_get_or_add +Telemetry::__int_gauge_value +Telemetry::__int_histogram_family +Telemetry::__int_histogram_metric_get_or_add +Telemetry::__int_histogram_observe +Telemetry::__int_histogram_sum +__init_primary_bifs +__init_secondary_bifs +active_file +addr_to_counts +addr_to_ptr_name +addr_to_subnet +all_set +anonymize_addr +any_set +backtrace +bare_mode +bloomfilter_add +bloomfilter_basic_init +bloomfilter_basic_init2 +bloomfilter_clear +bloomfilter_counting_init +bloomfilter_decrement +bloomfilter_internal_state +bloomfilter_intersect +bloomfilter_lookup +bloomfilter_merge +bytestring_to_count +bytestring_to_double +bytestring_to_float +bytestring_to_hexstr +calc_next_rotate +cat +cat_sep +ceil +check_subnet +clean +clear_table +close +community_id_v1 +compress_path +connection_exists +continue_processing +convert_for_pattern +count_substr +count_to_double +count_to_port +count_to_v4_addr +counts_to_addr +current_analyzer +current_event_time +current_time +decode_base64 +decode_base64_conn +decode_netbios_name +decode_netbios_name_type +disable_analyzer +disable_event_group +disable_module_events +do_profiling +double_to_count +double_to_int +double_to_interval +double_to_time +dump_current_packet +dump_packet +dump_rule_stats +edit +enable_event_group +enable_module_events +enable_raw_output +encode_base64 +ends_with +entropy_test_add +entropy_test_finish +entropy_test_init +enum_names +enum_to_int +escape_string +exit +exp +file_magic +file_mode +file_size +filter_subnet_table +find_all +find_all_ordered +find_entropy +find_last +find_str +floor +flush_all +fmt +fmt_ftp_port +fnv1a32 +from_json +generate_all_events +get_broker_stats +get_conn_stats +get_conn_transport_proto +get_contents_file +get_current_conn_bytes_threshold +get_current_conn_duration_threshold +get_current_conn_packets_threshold +get_current_packet +get_current_packet_header +get_dns_stats +get_event_handler_stats +get_event_stats +get_file_analysis_stats +get_file_name +get_gap_stats +get_identifier_comments +get_identifier_declaring_script +get_login_state +get_matcher_stats +get_net_stats +get_orig_seq +get_package_readme +get_port_transport_proto +get_proc_stats +get_reassembler_stats +get_record_field_comments +get_record_field_declaring_script +get_reporter_stats +get_resp_seq +get_script_comments +get_thread_stats +get_timer_stats +getenv +gethostname +getpid +global_container_footprints +global_ids +global_options +gsub +has_event_group +has_module_events +have_spicy +have_spicy_analyzers +haversine_distance +hexdump +hexstr_to_bytestring +hll_cardinality_add +hll_cardinality_copy +hll_cardinality_estimate +hll_cardinality_init +hll_cardinality_merge_into +hrw_weight +identify_data +install_dst_addr_filter +install_dst_net_filter +install_src_addr_filter +install_src_net_filter +int_to_count +int_to_double +interval_to_double +is_alnum +is_alpha +is_ascii +is_file_analyzer +is_icmp_port +is_local_interface +is_num +is_packet_analyzer +is_processing_suspended +is_protocol_analyzer +is_remote_event +is_tcp_port +is_udp_port +is_v4_addr +is_v4_subnet +is_v6_addr +is_v6_subnet +is_valid_ip +join_string_set +join_string_vec +levenshtein_distance +ljust +ln +load_CPP +log10 +log2 +lookup_ID +lookup_addr +lookup_autonomous_system +lookup_connection +lookup_hostname +lookup_hostname_txt +lookup_location +lstrip +mask_addr +match_signatures +matching_subnets +md5_hash +md5_hash_finish +md5_hash_init +md5_hash_update +md5_hmac +mkdir +mmdb_open_asn_db +mmdb_open_location_db +network_time +open +open_for_append +order +packet_source +paraglob_equals +paraglob_init +paraglob_match +parse_distinguished_name +parse_eftp_port +parse_ftp_epsv +parse_ftp_pasv +parse_ftp_port +piped_exec +port_to_count +pow +preserve_prefix +preserve_subnet +print_raw +ptr_name_to_addr +rand +raw_bytes_to_v4_addr +raw_bytes_to_v6_addr +reading_live_traffic +reading_traces +record_fields +record_type_to_vector +remask_addr +remove_prefix +remove_suffix +rename +resize +reverse +rfind_str +rjust +rmdir +rotate_file +rotate_file_by_name +routing0_data_to_addrs +rstrip +safe_shell_quote +same_object +sct_verify +set_buf +set_contents_file +set_current_conn_bytes_threshold +set_current_conn_duration_threshold +set_current_conn_packets_threshold +set_file_handle +set_inactivity_timeout +set_keys +set_login_state +set_network_time +set_record_packets +set_secret +set_ssl_established +setenv +sha1_hash +sha1_hash_finish +sha1_hash_init +sha1_hash_update +sha256_hash +sha256_hash_finish +sha256_hash_init +sha256_hash_update +skip_further_processing +skip_http_entity_data +skip_smtp_data +sort +split_string +split_string1 +split_string_all +split_string_n +sqrt +srand +starts_with +str_smith_waterman +str_split_indices +strcmp +strftime +string_cat +string_fill +string_to_ascii_hex +string_to_pattern +strip +strptime +strstr +sub +sub_bytes +subnet_to_addr +subnet_width +subst_string +suspend_processing +swap_case +syslog +system +system_env +table_keys +table_values +terminate +time_to_double +to_addr +to_count +to_double +to_int +to_json +to_lower +to_port +to_string_literal +to_subnet +to_title +to_upper +topk_add +topk_count +topk_epsilon +topk_get_top +topk_init +topk_merge +topk_merge_prune +topk_size +topk_sum +type_aliases +type_name +unescape_URI +uninstall_dst_addr_filter +uninstall_dst_net_filter +uninstall_src_addr_filter +uninstall_src_net_filter +unique_id +unique_id_from +unlink +uuid_to_string +val_footprint +write_file +x509_check_cert_hostname +x509_check_hostname +x509_from_der +x509_get_certificate_string +x509_issuer_name_hash +x509_ocsp_verify +x509_parse +x509_set_certificate_cache +x509_set_certificate_cache_hit_callback +x509_spki_hash +x509_subject_name_hash +x509_verify +zeek_args +zeek_is_terminating +zeek_version +zfill diff --git a/src/script_opt/ZAM/maint/README b/src/script_opt/ZAM/maint/README new file mode 100644 index 0000000000..0ebe927645 --- /dev/null +++ b/src/script_opt/ZAM/maint/README @@ -0,0 +1,14 @@ +This directory holds scripts and associated data to support maintenance of +ZAM optimization: + +list-bifs.zeek + A Zeek script that prints to stdout a list of the BiFs available + for the Zeek invocation. + + Use this to compare with BiFs.list to see whether there are any + new BiFs (or old ones that have been removed). If so, update + src/script_opt/FuncInfo.cc and then BiFs.list accordingly. + +BiFs.list + The BiFs that were present last time ZAM maintenance included + looking for any updates to available BiFs. diff --git a/src/script_opt/ZAM/maint/list-bifs.zeek b/src/script_opt/ZAM/maint/list-bifs.zeek new file mode 100644 index 0000000000..07215d37b6 --- /dev/null +++ b/src/script_opt/ZAM/maint/list-bifs.zeek @@ -0,0 +1,14 @@ +# Prints to stdout an alphabetized list of all of the BiFs registered with Zeek. +event zeek_init() + { + local bifs: vector of string; + + for ( gn, gi in global_ids() ) + if ( gi$type_name == "func" && gi?$value && fmt("%s", gi$value) == gn ) + bifs += gn; + + bifs = sort(bifs, strcmp); + + for ( _, b in bifs ) + print b; + }