diff --git a/CHANGES b/CHANGES index 21bd77114d..4efd02a76a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,37 @@ +8.1.0-dev.612 | 2025-09-29 18:04:24 +0200 + + * Supervisor: Make last_signal atomic to squelch data race (Arne Welzel, Corelight) + + When the stem process terminates and SIGCHLD is sent to the supervisor, + the signal might be handled by the main thread or any other threads that + aren't blocking SIGCHLD explicitly. Convert last_signal to a std::atomic + such that non-main threads can safely set last_signal without triggering + data race as reported by TSAN. This doesn't make it less racy to work + last_signal, but it appears we only use it for debug printing anyhow and + another option might have been to just remove last_signal altogether. + + Follow-up for #4849 + +8.1.0-dev.610 | 2025-09-29 08:21:01 -0700 + + * Update docs submodule [nomail] (Tim Wojtulewicz, Corelight) + +8.1.0-dev.609 | 2025-09-29 13:08:15 +0200 + + * cluster/zeromq: Fix Cluster::subscribe() block if not initialized (Arne Welzel, Corelight) + + If Cluster::init() hasn't been invoked yet, Cluster::subscribe() with the + ZeroMQ backend would block because the main_inproc socket didn't + yet have a connection from the child thread. Prevent this by connecting + the main and child socket pair at construction time. + + This will queue the subscriptions and start processing them once the + child thread has started. + +8.1.0-dev.607 | 2025-09-26 14:19:40 -0700 + + * Fixes for -O gen-standalone-C++ for tracking BiFs, lambdas, attribute types, and independent globals (Vern Paxson, Corelight) + 8.1.0-dev.605 | 2025-09-26 11:19:17 -0700 * OpaqueVal, OCSP, X509: drop outdated LibreSSL guards to fix OpenBSD (Klemens Nanni) diff --git a/VERSION b/VERSION index 1e5aee7b8b..2e45d2803d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -8.1.0-dev.605 +8.1.0-dev.612 diff --git a/doc b/doc index 2731def915..8f38ae2fd5 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 2731def9159247e6da8a3191783c89683363689c +Subproject commit 8f38ae2fd563314393eb1ca58c827d26e9966520 diff --git a/src/cluster/backend/zeromq/ZeroMQ.cc b/src/cluster/backend/zeromq/ZeroMQ.cc index 4085179b8b..4ce1525987 100644 --- a/src/cluster/backend/zeromq/ZeroMQ.cc +++ b/src/cluster/backend/zeromq/ZeroMQ.cc @@ -85,6 +85,7 @@ ZeroMQBackend::ZeroMQBackend(std::unique_ptr es, std::unique_pt : ThreadedBackend("ZeroMQ", std::move(es), std::move(ls), std::move(ehs), new zeek::detail::OnLoopProcess(this, "ZeroMQ", onloop_queue_hwm)), main_inproc(zmq::socket_t(ctx, zmq::socket_type::pair)), + child_inproc(zmq::socket_t(ctx, zmq::socket_type::pair)), // Counters for block and drop metrics. total_xpub_drops( zeek::telemetry_mgr->CounterInstance("zeek", "cluster_zeromq_xpub_drops", {}, @@ -94,7 +95,13 @@ ZeroMQBackend::ZeroMQBackend(std::unique_ptr es, std::unique_pt "Number of received events dropped due to OnLoop queue full.")), total_msg_errors( zeek::telemetry_mgr->CounterInstance("zeek", "cluster_zeromq_msg_errors", {}, - "Number of events with the wrong number of message parts.")) {} + "Number of events with the wrong number of message parts.")) { + // Establish the socket connection between main thread and child thread + // already in the constructor. This allows Subscribe() and Unsubscribe() + // calls to be delayed until DoInit() was called. + main_inproc.bind("inproc://inproc-bridge"); + child_inproc.connect("inproc://inproc-bridge"); +} ZeroMQBackend::~ZeroMQBackend() { try { @@ -169,10 +176,11 @@ void ZeroMQBackend::DoTerminate() { ctx.shutdown(); // Close the sockets that are used from the main thread, - // the remaining sockets were closed by self_thread during - // shutdown already. + // the remaining sockets except for the child_inproc one + // were closed by self_thread during shutdown already. log_push.close(); main_inproc.close(); + child_inproc.close(); ZEROMQ_DEBUG("Closing ctx"); ctx.close(); @@ -197,7 +205,6 @@ bool ZeroMQBackend::DoInit() { xpub = zmq::socket_t(ctx, zmq::socket_type::xpub); log_push = zmq::socket_t(ctx, zmq::socket_type::push); log_pull = zmq::socket_t(ctx, zmq::socket_type::pull); - child_inproc = zmq::socket_t(ctx, zmq::socket_type::pair); xpub.set(zmq::sockopt::linger, linger_ms); @@ -286,10 +293,6 @@ bool ZeroMQBackend::DoInit() { // // https://funcptr.net/2012/09/10/zeromq---edge-triggered-notification/ - // Setup connectivity between main and child thread. - main_inproc.bind("inproc://inproc-bridge"); - child_inproc.connect("inproc://inproc-bridge"); - // Thread is joined in backend->DoTerminate(), backend outlives it. self_thread = std::thread([](auto* backend) { backend->Run(); }, this); @@ -634,7 +637,6 @@ void ZeroMQBackend::Run() { // Called when Run() terminates. auto deferred_close = util::Deferred([this]() { - child_inproc.close(); xpub.close(); xsub.close(); log_pull.close(); diff --git a/src/script_opt/CPP/Driver.cc b/src/script_opt/CPP/Driver.cc index b7cfd4a625..06037eeb42 100644 --- a/src/script_opt/CPP/Driver.cc +++ b/src/script_opt/CPP/Driver.cc @@ -83,9 +83,22 @@ void CPPCompile::Compile(bool report_uncompilable) { accessed_globals.insert(g); for ( auto& ag : pf->AllGlobals() ) all_accessed_globals.insert(ag); + for ( auto& l : pf->Lambdas() ) + // We might not have profiled this previously if none + // of the functions refer to the global. This can + // happen for example for a global "const" table that's + // made available for external lookup use. + pfs->ProfileLambda(l); } } + for ( auto& g : pfs->BiFGlobals() ) + all_accessed_globals.insert(g); + + for ( auto& t : pfs->MainTypes() ) + if ( obj_matches_opt_files(t) == AnalyzeDecision::SHOULD ) + rep_types.insert(TypeRep(t)); + for ( auto& l : pfs->Lambdas() ) if ( obj_matches_opt_files(l) == AnalyzeDecision::SHOULD ) accessed_lambdas.insert(l); diff --git a/src/script_opt/ProfileFunc.cc b/src/script_opt/ProfileFunc.cc index 95bcd405e6..0bf0d7ff96 100644 --- a/src/script_opt/ProfileFunc.cc +++ b/src/script_opt/ProfileFunc.cc @@ -622,6 +622,20 @@ ProfileFuncs::ProfileFuncs(std::vector& funcs, is_compilable_pred pred ComputeSideEffects(); } +void ProfileFuncs::ProfileLambda(const LambdaExpr* l) { + if ( lambdas.contains(l) ) + return; + + lambdas.insert(l); + pending_exprs.push_back(l); + + do + DrainPendingExprs(); + while ( ! pending_exprs.empty() ); + + AnalyzeLambdaProfile(l); +} + bool ProfileFuncs::IsTableWithDefaultAggr(const Type* t) { auto analy = tbl_has_aggr_default.find(t); if ( analy != tbl_has_aggr_default.end() ) @@ -848,14 +862,22 @@ void ProfileFuncs::ComputeBodyHashes(std::vector& funcs) { ComputeProfileHash(f.ProfilePtr()); } - for ( auto& l : lambdas ) { - auto pf = ExprProf(l); - func_profs[l->PrimaryFunc().get()] = pf; - lambda_primaries[l->Name()] = l->PrimaryFunc().get(); + for ( auto& l : lambdas ) + AnalyzeLambdaProfile(l); +} - if ( compute_func_hashes || ! pf->HasHashVal() ) - ComputeProfileHash(pf); - } +void ProfileFuncs::AnalyzeLambdaProfile(const LambdaExpr* l) { + if ( processed_lambdas.contains(l) ) + return; + + processed_lambdas.insert(l); + + auto pf = ExprProf(l); + func_profs[l->PrimaryFunc().get()] = pf; + lambda_primaries[l->Name()] = l->PrimaryFunc().get(); + + if ( compute_func_hashes || ! pf->HasHashVal() ) + ComputeProfileHash(pf); } void ProfileFuncs::ComputeProfileHash(std::shared_ptr pf) { diff --git a/src/script_opt/ProfileFunc.h b/src/script_opt/ProfileFunc.h index d4ad16300e..7b6186eb06 100644 --- a/src/script_opt/ProfileFunc.h +++ b/src/script_opt/ProfileFunc.h @@ -366,6 +366,10 @@ public: ProfileFuncs(std::vector& funcs, is_compilable_pred pred, bool compute_func_hashes, bool full_record_hashes); + // Used to profile additional lambdas that (potentially) weren't part + // of the overall function profiling. + void ProfileLambda(const LambdaExpr* l); + // The following accessors provide a global profile across all of // the (non-skipped) functions in "funcs". See the comments for // the associated member variables for documentation. @@ -449,6 +453,9 @@ protected: // Compute hashes to associate with each function void ComputeBodyHashes(std::vector& funcs); + // For a given lambda, completes analysis of its profile. + void AnalyzeLambdaProfile(const LambdaExpr* l); + // Compute the hash associated with a single function profile. void ComputeProfileHash(std::shared_ptr pf); @@ -540,6 +547,10 @@ protected: // And for lambda's. std::unordered_set lambdas; + // Lambdas that we have already processed. An optimization to avoid + // unnecessary work. + std::unordered_set processed_lambdas; + // Names of generated events. std::unordered_set events; diff --git a/src/supervisor/Supervisor.cc b/src/supervisor/Supervisor.cc index 48894d1f0e..974367c87c 100644 --- a/src/supervisor/Supervisor.cc +++ b/src/supervisor/Supervisor.cc @@ -415,7 +415,7 @@ static ForkResult fork_with_stdio_redirect(const char* where) { void Supervisor::HandleChildSignal() { if ( last_signal >= 0 ) { - DBG_LOG(DBG_SUPERVISOR, "Supervisor received signal %d", last_signal); + DBG_LOG(DBG_SUPERVISOR, "Supervisor received signal %d", last_signal.load()); last_signal = -1; } diff --git a/src/supervisor/Supervisor.h b/src/supervisor/Supervisor.h index c433c0536a..84c5595138 100644 --- a/src/supervisor/Supervisor.h +++ b/src/supervisor/Supervisor.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include #include @@ -323,7 +324,7 @@ private: Config config; pid_t stem_pid; - int last_signal = -1; + std::atomic last_signal = -1; std::unique_ptr stem_pipe; detail::LineBufferedPipe stem_stdout; detail::LineBufferedPipe stem_stderr; diff --git a/testing/btest/Baseline/cluster.zeromq.subscribe-before-init/..manager.out b/testing/btest/Baseline/cluster.zeromq.subscribe-before-init/..manager.out new file mode 100644 index 0000000000..e4f236ab1a --- /dev/null +++ b/testing/btest/Baseline/cluster.zeromq.subscribe-before-init/..manager.out @@ -0,0 +1,2 @@ +### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. +test(), 42 diff --git a/testing/btest/cluster/zeromq/subscribe-before-init.zeek b/testing/btest/cluster/zeromq/subscribe-before-init.zeek new file mode 100644 index 0000000000..4040fd2399 --- /dev/null +++ b/testing/btest/cluster/zeromq/subscribe-before-init.zeek @@ -0,0 +1,55 @@ +# @TEST-DOC: Regression test Cluster::subscribe() blocking if called in a high-priority zeek_init() handler +# +# @TEST-REQUIRES: have-zeromq +# +# @TEST-GROUP: cluster-zeromq +# +# @TEST-PORT: XPUB_PORT +# @TEST-PORT: XSUB_PORT +# @TEST-PORT: LOG_PULL_PORT +# +# @TEST-EXEC: cp $FILES/zeromq/cluster-layout-simple.zeek cluster-layout.zeek +# @TEST-EXEC: cp $FILES/zeromq/test-bootstrap.zeek zeromq-test-bootstrap.zeek +# +# @TEST-EXEC: zeek --parse-only ./manager.zeek ./worker.zeek +# +# @TEST-EXEC: btest-bg-run manager "ZEEKPATH=$ZEEKPATH:.. && CLUSTER_NODE=manager zeek -b ../manager.zeek >out" +# @TEST-EXEC: btest-bg-run worker "ZEEKPATH=$ZEEKPATH:.. && CLUSTER_NODE=worker-1 zeek -b ../worker.zeek >out" +# +# @TEST-EXEC: btest-bg-wait 30 +# @TEST-EXEC: btest-diff ./manager/out + +# @TEST-START-FILE common.zeek +@load ./zeromq-test-bootstrap + +global test: event(c: count) &is_used; + +# @TEST-END-FILE + +# @TEST-START-FILE manager.zeek +@load ./common.zeek + +event zeek_init() &priority=1000000 + { + Cluster::subscribe("test.early"); + } + +event test(c: count) + { + print "test()", c; + } + +event Cluster::node_down(name: string, id: string) + { + terminate(); + } +# @TEST-END-FILE + +# @TEST-START-FILE worker.zeek +@load ./common.zeek +event Cluster::node_up(name: string, id: string) + { + Cluster::publish("test.early", test, 42); + terminate(); + } +# @TEST-END-FILE