Merge remote-tracking branch 'origin/master' into topic/johanna/spicy-tls

* origin/master: (139 commits) Given the -C flag, set script-layer ignore_checksums to true. Add btest for "-C" flag vs the script-layer ignore_checksums global. Update doc submodule [nomail] [skip ci] Remove references to bro_broker in broker/Manager.h cmake: Fixup BRO_PLUGIN_INSTALL_PATH references testing/external: Bump hashes for community_id addition NEWS: Add entry for Community ID policy: Import zeek-community-id scripts into protocols/conn frameworks/notice Add irc_dcc_send_ack event and fix missing fields Fix install directory for plugins Update doc submodule [nomail] [skip ci] Add community_id_v1() based on corelight/zeek-community-id Update NEWS to cover cluster framework changes. Add cluster_started restart tests. Add basic cluster_started tests. Add cluster_started and node_fully_connected events. Add hook into cluster connection setup. Add broadcast_topics set. Generalize Cluster::worker_count. Edit pass over the current 6.0 NEWS entries. [nomail] [skip ci] ...
2025-10-10 02:28:21 +00:00 · 2023-04-25 12:27:32 +01:00 · 2023-04-25 12:27:32 +01:00 · 63a4cc824a
commit 63a4cc824a
parent b510b0d8d1 e856e953b8
462 changed files with 10072 additions and 4434 deletions
--- a/scripts/base/frameworks/cluster/main.zeek
+++ b/scripts/base/frameworks/cluster/main.zeek
@ -44,6 +44,13 @@ export {
 	## time machine nodes in a cluster.  Used with broker-enabled cluster communication.
 	const time_machine_topic = "zeek/cluster/time_machine" &redef;

+	## A set of topic names to be used for broadcasting messages that are
+	## relevant to all nodes in a cluster. Currently, there is not a common
+	## topic to broadcast to, because enabling implicit Broker forwarding would
+	## cause a routing loop for this topic.
+	const broadcast_topics = { logger_topic, manager_topic, proxy_topic,
+		worker_topic, time_machine_topic };
+
 	## The topic prefix used for exchanging messages that are relevant to
 	## a named node in a cluster.  Used with broker-enabled cluster communication.
 	const node_topic_prefix = "zeek/cluster/node/" &redef;
@ -184,6 +191,12 @@ export {
 		id: string                &optional;
 	};

+	## Record to represent a cluster node including its name.
+	type NamedNode: record {
+		name: string;
+		node: Node;
+	};
+
 	## This function can be called at any time to determine if the cluster
 	## framework is being enabled for this run.
 	##
@ -202,7 +215,7 @@ export {
 	## and it's maintained internally by the cluster framework.  It's
 	## primarily intended for use by managers to find out how many workers
 	## should be responding to requests.
-	global worker_count: count = 0;
+	global worker_count: count = 0 &deprecated="Remove in v6.1. Active worker count can be obtained via get_active_node_count(Cluster::WORKER)";

 	## The cluster layout definition.  This should be placed into a filter
 	## named cluster-layout.zeek somewhere in the ZEEKPATH.  It will be
@ -212,6 +225,15 @@ export {
 	## or "worker-1").
 	const nodes: table[string] of Node = {} &redef;

+	## Returns the number of nodes defined in the cluster layout for a given
+	## node type.
+	global get_node_count: function(node_type: NodeType): count;
+
+	## Returns the number of nodes per type, the calling node is currently
+	## connected to. This is primarily intended for use by the manager to find
+	## out how many nodes should be responding to requests.
+	global get_active_node_count: function(node_type: NodeType): count;
+
 	## Indicates whether or not the manager will act as the logger and receive
 	## logs.  This value should be set in the cluster-layout.zeek script (the
 	## value should be true only if no logger is specified in Cluster::nodes).
@ -262,35 +284,41 @@ export {
 	global nodeid_topic: function(id: string): string;
 }

-global active_worker_ids: set[string] = set();
-
-type NamedNode: record {
-	name: string;
-	node: Node;
-};
+# Track active nodes per type.
+global active_node_ids: table[NodeType] of set[string];

 function nodes_with_type(node_type: NodeType): vector of NamedNode
 	{
 	local rval: vector of NamedNode = vector();
-	local names: vector of string = vector();

-	for ( name in Cluster::nodes )
-		names += name;
-
-	names = sort(names, strcmp);
-
-	for ( i in names )
+	for ( name, n in Cluster::nodes )
 		{
-		name = names[i];
-		local n = Cluster::nodes[name];
-
 		if ( n$node_type != node_type )
 			next;

 		rval += NamedNode($name=name, $node=n);
 		}

-	return rval;
+	return sort(rval, function(n1: NamedNode, n2: NamedNode): int
+		{ return strcmp(n1$name, n2$name); });
+	}
+
+function Cluster::get_node_count(node_type: NodeType): count
+	{
+	local cnt = 0;
+
+	for ( _, n in nodes )
+		{
+		if ( n$node_type == node_type )
+			++cnt;
+		}
+
+	return cnt;
+	}
+
+function Cluster::get_active_node_count(node_type: NodeType): count
+	{
+	return |active_node_ids[node_type]|;
 	}

 function is_enabled(): bool
@ -319,6 +347,8 @@ function nodeid_topic(id: string): string
 	return nodeid_topic_prefix + id + "/";
 	}

+@if ( Cluster::is_enabled() )
+
 event Cluster::hello(name: string, id: string) &priority=10
 	{
 	if ( name !in nodes )
@ -341,11 +371,14 @@ event Cluster::hello(name: string, id: string) &priority=10
 	n$id = id;
 	Cluster::log(fmt("got hello from %s (%s)", name, id));

+	if ( n$node_type !in active_node_ids )
+		active_node_ids[n$node_type] = set();
+	add active_node_ids[n$node_type][id];
+
+@pragma push ignore-deprecations
 	if ( n$node_type == WORKER )
-		{
-		add active_worker_ids[id];
-		worker_count = |active_worker_ids|;
-		}
+		worker_count = |active_node_ids[WORKER]|;
+@pragma pop ignore-deprecations
 	}

 event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string) &priority=10
@ -365,12 +398,12 @@ event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string) &priority=1
 			{
 			Cluster::log(fmt("node down: %s", node_name));
 			delete n$id;
+			delete active_node_ids[n$node_type][endpoint$id];

+@pragma push ignore-deprecations
 			if ( n$node_type == WORKER )
-				{
-				delete active_worker_ids[endpoint$id];
-				worker_count = |active_worker_ids|;
-				}
+				worker_count = |active_node_ids[WORKER]|;
+@pragma pop ignore-deprecations

 			event Cluster::node_down(node_name, endpoint$id);
 			break;
@ -390,6 +423,8 @@ event zeek_init() &priority=5
 	Log::create_stream(Cluster::LOG, [$columns=Info, $path="cluster", $policy=log_policy]);
 	}

+@endif
+
 function create_store(name: string, persistent: bool &default=F): Cluster::StoreInfo
 	{
 	local info = stores[name];
--- a/scripts/base/frameworks/cluster/setup-connections.zeek
+++ b/scripts/base/frameworks/cluster/setup-connections.zeek
@ -7,6 +7,15 @@

 module Cluster;

+export {
+	## This hook is called when the local node connects to other nodes based on
+	## the given cluster layout. Breaking from the hook will prevent connection
+	## establishment.
+	##
+	## connectee: The node to connect to.
+	global connect_node_hook: hook(connectee: NamedNode);
+}
+
 function connect_peer(node_type: NodeType, node_name: string)
 	{
 	local nn = nodes_with_type(node_type);
@ -17,12 +26,15 @@ function connect_peer(node_type: NodeType, node_name: string)

 		if ( n$name != node_name )
 			next;
+		if ( ! hook connect_node_hook(n) )
+			return;

 		local status = Broker::peer(cat(n$node$ip), n$node$p,
 		                            Cluster::retry_interval);
 		Cluster::log(fmt("initiate peering with %s:%s, retry=%s, status=%s",
 		                 n$node$ip, n$node$p, Cluster::retry_interval,
 		                 status));
+		return;
 		}
 	}

@ -33,6 +45,10 @@ function connect_peers_with_type(node_type: NodeType)
 	for ( i in nn )
 		{
 		local n = nn[i];
+
+		if ( ! hook connect_node_hook(n) )
+			next;
+
 		local status = Broker::peer(cat(n$node$ip), n$node$p,
 		                            Cluster::retry_interval);
 		Cluster::log(fmt("initiate peering with %s:%s, retry=%s, status=%s",
--- a/scripts/base/frameworks/config/main.zeek
+++ b/scripts/base/frameworks/config/main.zeek
@ -59,14 +59,8 @@ global Config::cluster_set_option: event(ID: string, val: any, location: string)

 function broadcast_option(ID: string, val: any, location: string) &is_used
 	{
-	# There's not currently a common topic to broadcast to as then enabling
-	# implicit Broker forwarding would cause a routing loop.
-	Broker::publish(Cluster::worker_topic, Config::cluster_set_option,
-	                ID, val, location);
-	Broker::publish(Cluster::proxy_topic, Config::cluster_set_option,
-	                ID, val, location);
-	Broker::publish(Cluster::logger_topic, Config::cluster_set_option,
-	                ID, val, location);
+	for ( topic in Cluster::broadcast_topics )
+		Broker::publish(topic, Config::cluster_set_option, ID, val, location);
 	}

 event Config::cluster_set_option(ID: string, val: any, location: string)
--- a/scripts/base/frameworks/logging/main.zeek
+++ b/scripts/base/frameworks/logging/main.zeek
@ -163,9 +163,6 @@ export {
 	};

 	## A function that one may use to customize log file rotation paths.
-	## Note that the "fname" field of the *ri* argument is always an
-	## empty string for the purpose of this function call (i.e. the full
-	## file name is not determined yet).
 	const rotation_format_func: function(ri: RotationFmtInfo): RotationPath &redef;

 	## Default naming format for timestamps embedded into filenames.
--- a/scripts/base/frameworks/notice/weird.zeek
+++ b/scripts/base/frameworks/notice/weird.zeek
@ -205,6 +205,8 @@ export {
 		["RST_with_data"]                       = ACTION_LOG,
 		["SSL_many_server_names"]               = ACTION_LOG,
 		["simultaneous_open"]                   = ACTION_LOG_PER_CONN,
+		["smtp_mail_transaction_invalid"]       = ACTION_LOG_PER_CONN,
+		["smtp_excessive_invalid_mail_transactions"] = ACTION_LOG_PER_CONN,
 		["spontaneous_FIN"]                     = ACTION_IGNORE,
 		["spontaneous_RST"]                     = ACTION_IGNORE,
 		["SMB_parsing_error"]                   = ACTION_LOG,
--- a/scripts/base/frameworks/sumstats/cluster.zeek
+++ b/scripts/base/frameworks/sumstats/cluster.zeek
@ -295,7 +295,6 @@ function handle_end_of_result_collection(uid: string, ss_name: string, key: Key,
 		return;
 		}

-	#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
 	local ss = stats_store[ss_name];
 	local ir = key_requests[uid];
 	if ( check_thresholds(ss, key, ir, 1.0) )
@ -357,7 +356,7 @@ event SumStats::send_no_key(uid: string, ss_name: string)
 		done_with[uid] = 0;

 	++done_with[uid];
-	if ( Cluster::worker_count == done_with[uid] )
+	if ( Cluster::get_active_node_count(Cluster::WORKER) == done_with[uid] )
 		{
 		delete done_with[uid];

@ -394,7 +393,7 @@ event SumStats::send_a_key(uid: string, ss_name: string, key: Key)
 		add stats_keys[uid][key];

 	++done_with[uid];
-	if ( Cluster::worker_count == done_with[uid] )
+	if ( Cluster::get_active_node_count(Cluster::WORKER) == done_with[uid] )
 		{
 		delete done_with[uid];

@ -437,7 +436,7 @@ event SumStats::cluster_send_result(uid: string, ss_name: string, key: Key, resu
 	++done_with[uid];

 	if ( uid !in dynamic_requests &&
-	     uid in done_with && Cluster::worker_count == done_with[uid] )
+	     uid in done_with && Cluster::get_active_node_count(Cluster::WORKER) == done_with[uid] )
 		{
 		handle_end_of_result_collection(uid, ss_name, key, cleanup);

@ -481,7 +480,8 @@ function request_key(ss_name: string, key: Key): Result
 	add dynamic_requests[uid];

 	event SumStats::cluster_get_result(uid, ss_name, key, F);
-	return when [uid, ss_name, key] ( uid in done_with && Cluster::worker_count == done_with[uid] )
+	return when [uid, ss_name, key] ( uid in done_with &&
+		Cluster::get_active_node_count(Cluster::WORKER) == done_with[uid] )
 		{
 		#print "done with request_key";
 		local result = key_requests[uid];
--- a/scripts/base/frameworks/supervisor/api.zeek
+++ b/scripts/base/frameworks/supervisor/api.zeek
@ -27,6 +27,9 @@ export {
 		## The interface name from which the node will read/analyze packets.
 		## Typically used by worker nodes.
 		interface: string &optional;
+		## The PCAP file name from which the node will read/analyze packets.
+		## Typically used by worker nodes.
+		pcap_file: string &optional;
 	};

 	## Configuration options that influence behavior of a supervised Zeek node.
@ -36,6 +39,8 @@ export {
 		name: string;
 		## The interface name from which the node will read/analyze packets.
 		interface: string &optional;
+		## The PCAP file name from which the node will read/analyze packets.
+		pcap_file: string &optional;
 		## The working directory that the node should use.
 		directory: string &optional;
 		## The filename/path to which the node's stdout will be redirected.
--- a/scripts/base/init-bare.zeek
+++ b/scripts/base/init-bare.zeek
@ -161,6 +161,32 @@ type PacketSource: record {
 	netmask: count;
 };

+
+## If a packet source does not yield packets for this amount of time,
+## it is considered idle. When a packet source is found to be idle,
+## Zeek will update network_time to current time in order for timer expiration
+## to function. A packet source queueing up packets and not yielding them for
+## longer than this interval without yielding any packets will provoke
+## not-very-well-defined timer behavior.
+##
+## On Zeek workers with low packet rates, timer expiration may be delayed
+## by this many milliseconds after the last packet has been received.
+const packet_source_inactivity_timeout = 100msec &redef;
+
+## Whether Zeek will forward network_time to the current time upon
+## observing an idle packet source (or no configured packet source).
+##
+## Only set this to *F* if you really know what you're doing. Setting this to
+## *F* on non-worker systems causes :zeek:see:`network_time` to be stuck
+## at 0.0 and timer expiration will be non-functional.
+##
+## The main purpose of this option is to yield control over network time
+## to plugins or scripts via broker or other non-timer events.
+##
+## .. zeek:see:: network_time set_network_time packet_source_inactivity_timeout
+##
+const allow_network_time_forward = T &redef;
+
 ## A connection's transport-layer protocol. Note that Zeek uses the term
 ## "connection" broadly, using flow semantics for ICMP and UDP.
 type transport_proto: enum {
@ -643,15 +669,16 @@ type SYN_packet: record {
 ##
 ## .. zeek:see:: get_net_stats
 type NetStats: record {
-	pkts_recvd:   count &default=0;	##< Packets received by Zeek.
-	pkts_dropped: count &default=0;	##< Packets reported dropped by the system.
+	pkts_recvd:    count &default=0; ##< Packets received by Zeek.
+	pkts_dropped:  count &default=0; ##< Packets reported dropped by the system.
 	## Packets seen on the link. Note that this may differ
 	## from *pkts_recvd* because of a potential capture_filter. See
 	## :doc:`/scripts/base/frameworks/packet-filter/main.zeek`. Depending on the
 	## packet capture system, this value may not be available and will then
 	## be always set to zero.
-	pkts_link:    count &default=0;
-	bytes_recvd:  count &default=0;	##< Bytes received by Zeek.
+	pkts_link:     count &default=0;
+	bytes_recvd:   count &default=0; ##< Bytes received by Zeek.
+	pkts_filtered: count &optional;  ##< Packets filtered by the packet source.
 };

 type ConnStats: record {
@ -2007,6 +2034,7 @@ type gtp_delete_pdp_ctx_response_elements: record {

 # Prototypes of Zeek built-in functions.
@load base/bif/zeek.bif
+@load base/bif/communityid.bif
@load base/bif/stats.bif
@load base/bif/reporter.bif
@load base/bif/strings.bif
@ -5181,6 +5209,32 @@ export {
 	## interfaces.
 	const bufsize = 128 &redef;

+	## Default timeout for packet sources without file descriptors.
+	##
+	## For libpcap based packet sources that do not provide a usable
+	## file descriptor for select(), the timeout provided to the IO
+	## loop is either zero if a packet was most recently available
+	## or else this value.
+	##
+	## Depending on the expected packet rate per-worker and the amount of
+	## available packet buffer, raising this value can significantly reduce
+	## Zeek's CPU usage at the cost of a small delay before processing
+	## packets. Setting this value too high may cause packet drops due
+	## to running out of available buffer space.
+	##
+	## Increasing this value to 200usec on low-traffic Myricom based systems
+	## (5 kpps per Zeek worker) has shown a 50% reduction in CPU usage.
+	##
+	## This is an advanced setting. Do monitor dropped packets and capture
+	## loss information when changing it.
+	##
+	## .. note:: Packet sources that override ``GetNextTimeout()`` method
+	##    may not respect this value.
+	##
+	## .. zeek:see:: io_poll_interval_live
+	##
+	const non_fd_timeout = 20usec &redef;
+
 	## The definition of a "pcap interface".
 	type Interface: record {
 		## The interface/device name.
@ -5581,6 +5635,35 @@ const digest_salt = "Please change this value." &redef;
 ## :zeek:see:`find_all_ordered` BIFs.
 const max_find_all_string_length: int = 10000 &redef;

+## How many rounds to go without checking IO sources with file descriptors
+## for readiness by default. This is used when reading from traces.
+##
+## Very roughly, when reading from a pcap, setting this to 100 results in
+## 100 packets being processed without checking FD based IO sources.
+##
+## .. note:: This should not be changed outside of development or when
+##    debugging problems with the main-loop, or developing features with
+##    tight main-loop interaction.
+##
+## .. zeek:see:: io_poll_interval_live
+const io_poll_interval_default = 100 &redef;
+
+## How often to check IO sources with file descriptors for readiness when
+## monitoring with a live packet source.
+##
+## The poll interval gets defaulted to 100 which is good for cases like reading
+## from pcap files and when there isn't a packet source, but is a little too
+## infrequent for live sources (especially fast live sources). Set it down a
+## little bit for those sources.
+##
+## .. note:: This should not be changed outside of development or when
+##    debugging problems with the main-loop, or developing features with
+##    tight main-loop interaction.
+##
+## .. zeek:see:: io_poll_interval_default
+const io_poll_interval_live = 10 &redef;
+
+
 global done_with_network = F;
 event net_done(t: time)
 	{ done_with_network = T; }
--- a/scripts/base/protocols/ftp/main.zeek
+++ b/scripts/base/protocols/ftp/main.zeek
@ -316,12 +316,58 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
 event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=5
 	{
 	set_ftp_session(c);
+
+	# Skip matching up intermediate reply lines (that do not have a
+	# valid status code) with pending commands. Because they may not
+	# have a proper status code, there's little point setting whatever
+	# their reply_code and reply_msg are on the command.
+	#
+	# There's a quirk: Some FTP servers return(ed?) replies like the
+	# following, violating the multi-line reply protocol:
+	#
+	#  c: STOR intermol.ps
+	#  s: 150 Opening ASCII mode data connection for 'intermol.ps'.
+	#  s: 230- WARNING! 4 bare linefeeds received in ASCII mode
+	#  s:    File may not have transferred correctly.
+	#  s: 226 Transfer complete.
+	#
+	# This is a multiline response started with 230-, but never finalized
+	# with the same status code. It should have been completed with
+	# "230 <some final message>", but instead was completed with "226 ...".
+	# This confuses our parser, returning cont_resp = T for all following
+	# server messages. This caused a regression as the current command wasn't
+	# updated for logging.
+	#
+	# The regex below is a best effort to keep existing behavior
+	# in face of such traffic. It matches on messages that look
+	# like valid status codes (starting with 3 digits followed by
+	# at least 10 ASCII characters).
+	#
+	# There's the following in RFC 959, so in the future we could push
+	# the detection/logic down into the parser instead of here.
+	#
+	#   If an intermediary line begins with a 3-digit number, the Server
+	#   must pad the front to avoid confusion.
+	#
+	if ( cont_resp && code == 0 && c$ftp?$reply_code )
+		{
+		if ( /^[1-9][0-9]{2} [[:print:]]{10}.*/ !in msg )
+			return;
+		else
+			{
+			# This might be worth a weird, but not sure it's
+			# worth it and how trigger happy it could be.
+			# Reporter::conn_weird("FTP_intermediate_line_with_reply_code", c, msg, "FTP");
+			}
+		}
+
 	c$ftp$cmdarg = get_pending_cmd(c$ftp$pending_commands, code, msg);
 	c$ftp$reply_code = code;
 	c$ftp$reply_msg = msg;

-	# TODO: figure out what to do with continued FTP response (not used much)
-	if ( cont_resp ) return;
+	# Do not parse out information from any but the first reply line.
+	if ( cont_resp )
+		return;

 	# TODO: do some sort of generic clear text login processing here.
 	local response_xyz = parse_ftp_reply_code(code);
--- a/scripts/base/protocols/irc/dcc-send.zeek
+++ b/scripts/base/protocols/irc/dcc-send.zeek
@ -97,7 +97,7 @@ function log_dcc(f: fa_file)
 		}
 	}

-event file_new(f: fa_file) &priority=-5
+event file_sniff(f: fa_file, meta: fa_metadata) &priority=-5
 	{
 	if ( f$source == "IRC_DATA" )
 		log_dcc(f);
--- a/scripts/base/protocols/smtp/main.zeek
+++ b/scripts/base/protocols/smtp/main.zeek
@ -2,6 +2,7 @@
@load base/utils/directions-and-hosts
@load base/utils/email
@load base/protocols/conn/removal-hooks
+@load base/frameworks/notice/weird

 module SMTP;

@ -75,6 +76,11 @@ export {
 		messages_transferred:     count     &default=0;

 		pending_messages:         set[Info] &optional;
+
+		trans_mail_from_seen:     bool      &default=F;
+		trans_rcpt_to_seen:       bool      &default=F;
+		invalid_transactions:     count     &default=0;
+		analyzer_id:              count     &optional;
 	};

 	## Direction to capture the full "Received from" path.
@ -91,6 +97,16 @@ export {

 	## SMTP finalization hook.  Remaining SMTP info may get logged when it's called.
 	global finalize_smtp: Conn::RemovalHook;
+
+	## When seeing a RCPT TO or DATA command, validate that it has been
+	## preceded by a MAIL FROM or RCPT TO command, respectively, else
+	## log a weird and possibly disable the SMTP analyzer upon too
+	## many invalid transactions.
+	option mail_transaction_validation = T;
+
+	## Disable the SMTP analyzer when that many invalid transactions
+	## have been observed in an SMTP session.
+	option max_invalid_mail_transactions = 25;
 }

 redef record connection += {
@ -151,6 +167,22 @@ function set_smtp_session(c: connection)
 		c$smtp = new_smtp_log(c);
 	}

+function mail_transaction_invalid(c: connection, addl: string)
+	{
+	Reporter::conn_weird("smtp_mail_transaction_invalid", c, addl, "SMTP");
+
+	++c$smtp_state$invalid_transactions;
+
+	if ( max_invalid_mail_transactions > 0
+	     && c$smtp_state$invalid_transactions > max_invalid_mail_transactions
+	     && c$smtp_state?$analyzer_id )
+		{
+		Reporter::conn_weird("smtp_excessive_invalid_mail_transactions", c, "", "SMTP");
+		if ( disable_analyzer(c$id, c$smtp_state$analyzer_id) )
+			delete c$smtp_state$analyzer_id;
+		}
+	}
+
 function smtp_message(c: connection)
 	{
 	if ( c$smtp$has_client_activity )
@ -160,6 +192,15 @@ function smtp_message(c: connection)
 		}
 	}

+event analyzer_confirmation_info(atype: AllAnalyzers::Tag, info: AnalyzerConfirmationInfo)
+	{
+	if ( atype != Analyzer::ANALYZER_SMTP )
+		return;
+
+	set_smtp_session(info$c);
+	info$c$smtp_state$analyzer_id = info$aid;
+	}
+
 event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &priority=5
 	{
 	set_smtp_session(c);
@ -184,6 +225,13 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &
 			}

 		c$smtp$has_client_activity = T;
+		c$smtp_state$trans_rcpt_to_seen = T;
+
+		if ( mail_transaction_validation )
+			{
+			if ( ! c$smtp_state$trans_mail_from_seen )
+				mail_transaction_invalid(c, "rcpt to missing mail from");
+			}
 		}

 	else if ( upper_command == "MAIL" && /^[fF][rR][oO][mM]:/ in arg )
@ -195,6 +243,23 @@ event smtp_request(c: connection, is_orig: bool, command: string, arg: string) &
 		if ( mailfrom != "" )
 			c$smtp$mailfrom = mailfrom;
 		c$smtp$has_client_activity = T;
+
+		c$smtp_state$trans_mail_from_seen = T;
+		c$smtp_state$trans_rcpt_to_seen = F;  # Reset state on MAIL FROM
+		}
+	else if ( upper_command == "DATA" )
+		{
+		if ( mail_transaction_validation )
+			{
+			if ( ! c$smtp_state$trans_rcpt_to_seen )  # mail from checked in rctp to
+				mail_transaction_invalid(c, "data missing rcpt to");
+			}
+		}
+	else if ( upper_command == "." )
+		{
+		# Reset state when we're seeing a .
+		c$smtp_state$trans_mail_from_seen = F;
+		c$smtp_state$trans_rcpt_to_seen = F;
 		}
 	}