mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00

I replaced a few strcmps with either calls to std::str.compare or with the == operator of BroString. Also changed two of the input framework tests that did not pass anymore after the merge. The new SSH analyzer no longer loads the scripts that let network time run, hence those tests failed because updates were not propagated from the threads (that took a while to find.) * origin/topic/vladg/ssh: (25 commits) SSH: Register analyzer for 22/tcp. SSH: Add 22/tcp to likely_server_ports SSH: Ignore encrypted packets by default. SSH: Fix some edge-cases which created BinPAC exceptions SSH: Add memleak btest SSH: Update baselines SSH: Added some more events for SSH2 SSH: Intel framework integration (PUBKEY_HASH) Update baselines for new SSH analyzer. Update SSH policy scripts with new events. SSH: Add documentation Refactoring ssh-protocol.pac: SSH: Use the compression_algorithms const in another place. Some cleanup and refactoring on SSH main.bro. SSH: A bit of code cleanup. Move SSH constants to consts.pac SSH: Cleanup code style. SSH: Fix some memleaks. Refactored the SSH analyzer. Added supported for algorithm detection and more key exchange message types. Add host key support for SSH1. Add support for SSH1 Move SSH analyzer to new plugin architecture. ... Conflicts: scripts/base/protocols/ssh/main.bro testing/btest/Baseline/core.print-bpf-filters/output2 testing/btest/Baseline/plugins.hooks/output BIT-1344: #merged
376 lines
9.7 KiB
Text
376 lines
9.7 KiB
Text
##! The intelligence framework provides a way to store and query IP addresses,
|
|
##! and strings (with a str_type). Metadata can
|
|
##! also be associated with the intelligence, like for making more informed
|
|
##! decisions about matching and handling of intelligence.
|
|
|
|
@load base/frameworks/notice
|
|
|
|
module Intel;
|
|
|
|
export {
|
|
redef enum Log::ID += { LOG };
|
|
|
|
## Enum type to represent various types of intelligence data.
|
|
type Type: enum {
|
|
## An IP address.
|
|
ADDR,
|
|
## A complete URL without the prefix ``"http://"``.
|
|
URL,
|
|
## Software name.
|
|
SOFTWARE,
|
|
## Email address.
|
|
EMAIL,
|
|
## DNS domain name.
|
|
DOMAIN,
|
|
## A user name.
|
|
USER_NAME,
|
|
## File hash which is non-hash type specific. It's up to the
|
|
## user to query for any relevant hash types.
|
|
FILE_HASH,
|
|
## File name. Typically with protocols with definite
|
|
## indications of a file name.
|
|
FILE_NAME,
|
|
## Certificate SHA-1 hash.
|
|
CERT_HASH,
|
|
## Public key MD5 hash. (SSH server host keys are a good example.)
|
|
PUBKEY_HASH,
|
|
};
|
|
|
|
## Data about an :bro:type:`Intel::Item`.
|
|
type MetaData: record {
|
|
## An arbitrary string value representing the data source.
|
|
## Typically, the convention for this field will be the source
|
|
## name and feed name separated by a hyphen.
|
|
## For example: "source1-c&c".
|
|
source: string;
|
|
## A freeform description for the data.
|
|
desc: string &optional;
|
|
## A URL for more information about the data.
|
|
url: string &optional;
|
|
};
|
|
|
|
## Represents a piece of intelligence.
|
|
type Item: record {
|
|
## The intelligence indicator.
|
|
indicator: string;
|
|
|
|
## The type of data that the indicator field represents.
|
|
indicator_type: Type;
|
|
|
|
## Metadata for the item. Typically represents more deeply
|
|
## descriptive data for a piece of intelligence.
|
|
meta: MetaData;
|
|
};
|
|
|
|
## Enum to represent where data came from when it was discovered.
|
|
## The convention is to prefix the name with ``IN_``.
|
|
type Where: enum {
|
|
## A catchall value to represent data of unknown provenance.
|
|
IN_ANYWHERE,
|
|
};
|
|
|
|
## Information about a piece of "seen" data.
|
|
type Seen: record {
|
|
## The string if the data is about a string.
|
|
indicator: string &log &optional;
|
|
|
|
## The type of data that the indicator represents.
|
|
indicator_type: Type &log &optional;
|
|
|
|
## If the indicator type was :bro:enum:`Intel::ADDR`, then this
|
|
## field will be present.
|
|
host: addr &optional;
|
|
|
|
## Where the data was discovered.
|
|
where: Where &log;
|
|
|
|
## The name of the node where the match was discovered.
|
|
node: string &optional &log;
|
|
|
|
## If the data was discovered within a connection, the
|
|
## connection record should go here to give context to the data.
|
|
conn: connection &optional;
|
|
|
|
## If the data was discovered within a file, the file record
|
|
## should go here to provide context to the data.
|
|
f: fa_file &optional;
|
|
};
|
|
|
|
## Record used for the logging framework representing a positive
|
|
## hit within the intelligence framework.
|
|
type Info: record {
|
|
## Timestamp when the data was discovered.
|
|
ts: time &log;
|
|
|
|
## If a connection was associated with this intelligence hit,
|
|
## this is the uid for the connection
|
|
uid: string &log &optional;
|
|
## If a connection was associated with this intelligence hit,
|
|
## this is the conn_id for the connection.
|
|
id: conn_id &log &optional;
|
|
|
|
## If a file was associated with this intelligence hit,
|
|
## this is the uid for the file.
|
|
fuid: string &log &optional;
|
|
## A mime type if the intelligence hit is related to a file.
|
|
## If the $f field is provided this will be automatically filled
|
|
## out.
|
|
file_mime_type: string &log &optional;
|
|
## Frequently files can be "described" to give a bit more context.
|
|
## If the $f field is provided this field will be automatically
|
|
## filled out.
|
|
file_desc: string &log &optional;
|
|
|
|
## Where the data was seen.
|
|
seen: Seen &log;
|
|
## Sources which supplied data that resulted in this match.
|
|
sources: set[string] &log &default=string_set();
|
|
};
|
|
|
|
## Intelligence data manipulation function.
|
|
global insert: function(item: Item);
|
|
|
|
## Function to declare discovery of a piece of data in order to check
|
|
## it against known intelligence for matches.
|
|
global seen: function(s: Seen);
|
|
|
|
## Event to represent a match in the intelligence data from data that
|
|
## was seen. On clusters there is no assurance as to where this event
|
|
## will be generated so do not assume that arbitrary global state beyond
|
|
## the given data will be available.
|
|
##
|
|
## This is the primary mechanism where a user will take actions based on
|
|
## data within the intelligence framework.
|
|
global match: event(s: Seen, items: set[Item]);
|
|
|
|
global log_intel: event(rec: Info);
|
|
}
|
|
|
|
# Internal handler for matches with no metadata available.
|
|
global match_no_items: event(s: Seen);
|
|
|
|
# Internal events for cluster data distribution.
|
|
global new_item: event(item: Item);
|
|
global updated_item: event(item: Item);
|
|
|
|
# Optionally store metadata. This is used internally depending on
|
|
# if this is a cluster deployment or not.
|
|
const have_full_data = T &redef;
|
|
|
|
# The in memory data structure for holding intelligence.
|
|
type DataStore: record {
|
|
host_data: table[addr] of set[MetaData];
|
|
string_data: table[string, Type] of set[MetaData];
|
|
};
|
|
global data_store: DataStore &redef;
|
|
|
|
# The in memory data structure for holding the barest matchable intelligence.
|
|
# This is primarily for workers to do the initial quick matches and store
|
|
# a minimal amount of data for the full match to happen on the manager.
|
|
type MinDataStore: record {
|
|
host_data: set[addr];
|
|
string_data: set[string, Type];
|
|
};
|
|
global min_data_store: MinDataStore &redef;
|
|
|
|
|
|
event bro_init() &priority=5
|
|
{
|
|
Log::create_stream(LOG, [$columns=Info, $ev=log_intel, $path="intel"]);
|
|
}
|
|
|
|
function find(s: Seen): bool
|
|
{
|
|
if ( s?$host )
|
|
{
|
|
return ((s$host in min_data_store$host_data) ||
|
|
(have_full_data && s$host in data_store$host_data));
|
|
}
|
|
else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
|
|
(have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
|
|
{
|
|
return T;
|
|
}
|
|
else
|
|
{
|
|
return F;
|
|
}
|
|
}
|
|
|
|
function get_items(s: Seen): set[Item]
|
|
{
|
|
local return_data: set[Item];
|
|
|
|
if ( ! have_full_data )
|
|
{
|
|
# A reporter warning should be generated here because this function
|
|
# should never be called from a host that doesn't have the full data.
|
|
# TODO: do a reporter warning.
|
|
return return_data;
|
|
}
|
|
|
|
if ( s?$host )
|
|
{
|
|
# See if the host is known about and it has meta values
|
|
if ( s$host in data_store$host_data )
|
|
{
|
|
for ( m in data_store$host_data[s$host] )
|
|
{
|
|
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
local lower_indicator = to_lower(s$indicator);
|
|
# See if the string is known about and it has meta values
|
|
if ( [lower_indicator, s$indicator_type] in data_store$string_data )
|
|
{
|
|
for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
|
|
{
|
|
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
|
|
}
|
|
}
|
|
}
|
|
|
|
return return_data;
|
|
}
|
|
|
|
function Intel::seen(s: Seen)
|
|
{
|
|
if ( find(s) )
|
|
{
|
|
if ( s?$host )
|
|
{
|
|
s$indicator = cat(s$host);
|
|
s$indicator_type = Intel::ADDR;
|
|
}
|
|
|
|
if ( ! s?$node )
|
|
{
|
|
s$node = peer_description;
|
|
}
|
|
|
|
if ( have_full_data )
|
|
{
|
|
local items = get_items(s);
|
|
event Intel::match(s, items);
|
|
}
|
|
else
|
|
{
|
|
event Intel::match_no_items(s);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
function has_meta(check: MetaData, metas: set[MetaData]): bool
|
|
{
|
|
local check_hash = md5_hash(check);
|
|
for ( m in metas )
|
|
{
|
|
if ( check_hash == md5_hash(m) )
|
|
return T;
|
|
}
|
|
|
|
# The records must not be equivalent if we made it this far.
|
|
return F;
|
|
}
|
|
|
|
event Intel::match(s: Seen, items: set[Item]) &priority=5
|
|
{
|
|
local info = Info($ts=network_time(), $seen=s);
|
|
|
|
if ( s?$f )
|
|
{
|
|
if ( s$f?$conns && |s$f$conns| == 1 )
|
|
{
|
|
for ( cid in s$f$conns )
|
|
s$conn = s$f$conns[cid];
|
|
}
|
|
|
|
if ( ! info?$fuid )
|
|
info$fuid = s$f$id;
|
|
|
|
if ( ! info?$file_mime_type && s$f?$info && s$f$info?$mime_type )
|
|
info$file_mime_type = s$f$info$mime_type;
|
|
|
|
if ( ! info?$file_desc )
|
|
info$file_desc = Files::describe(s$f);
|
|
}
|
|
|
|
if ( s?$conn )
|
|
{
|
|
info$uid = s$conn$uid;
|
|
info$id = s$conn$id;
|
|
}
|
|
|
|
for ( item in items )
|
|
add info$sources[item$meta$source];
|
|
|
|
Log::write(Intel::LOG, info);
|
|
}
|
|
|
|
function insert(item: Item)
|
|
{
|
|
# Create and fill out the meta data item.
|
|
local meta = item$meta;
|
|
local metas: set[MetaData];
|
|
|
|
# All intelligence is case insensitive at the moment.
|
|
local lower_indicator = to_lower(item$indicator);
|
|
|
|
if ( item$indicator_type == ADDR )
|
|
{
|
|
local host = to_addr(item$indicator);
|
|
if ( have_full_data )
|
|
{
|
|
if ( host !in data_store$host_data )
|
|
data_store$host_data[host] = set();
|
|
|
|
metas = data_store$host_data[host];
|
|
}
|
|
|
|
add min_data_store$host_data[host];
|
|
}
|
|
else
|
|
{
|
|
if ( have_full_data )
|
|
{
|
|
if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
|
|
data_store$string_data[lower_indicator, item$indicator_type] = set();
|
|
|
|
metas = data_store$string_data[lower_indicator, item$indicator_type];
|
|
}
|
|
|
|
add min_data_store$string_data[lower_indicator, item$indicator_type];
|
|
}
|
|
|
|
local updated = F;
|
|
if ( have_full_data )
|
|
{
|
|
for ( m in metas )
|
|
{
|
|
if ( meta$source == m$source )
|
|
{
|
|
if ( has_meta(meta, metas) )
|
|
{
|
|
# It's the same item being inserted again.
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
# Same source, different metadata means updated item.
|
|
updated = T;
|
|
}
|
|
}
|
|
}
|
|
add metas[item$meta];
|
|
}
|
|
|
|
if ( updated )
|
|
event Intel::updated_item(item);
|
|
else
|
|
event Intel::new_item(item);
|
|
}
|
|
|