mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 22:58:20 +00:00
Refactoring of meta data handling for intel.
To simplify meta data handling inside the intel framework and avoid duplicate insertion of meta data on update, meta data is stored in a table indexed by meta data source.
This commit is contained in:
parent
c5c650b486
commit
0146e85c41
1 changed files with 33 additions and 57 deletions
|
@ -151,16 +151,18 @@ global match_no_items: event(s: Seen);
|
||||||
|
|
||||||
# Internal events for cluster data distribution.
|
# Internal events for cluster data distribution.
|
||||||
global new_item: event(item: Item);
|
global new_item: event(item: Item);
|
||||||
global updated_item: event(item: Item);
|
|
||||||
|
|
||||||
# Optionally store metadata. This is used internally depending on
|
# Optionally store metadata. This is used internally depending on
|
||||||
# if this is a cluster deployment or not.
|
# if this is a cluster deployment or not.
|
||||||
const have_full_data = T &redef;
|
const have_full_data = T &redef;
|
||||||
|
|
||||||
|
# Table of meta data, indexed by source string.
|
||||||
|
type MetaDataTable: table[string] of MetaData;
|
||||||
|
|
||||||
# The in memory data structure for holding intelligence.
|
# The in memory data structure for holding intelligence.
|
||||||
type DataStore: record {
|
type DataStore: record {
|
||||||
host_data: table[addr] of set[MetaData];
|
host_data: table[addr] of MetaDataTable;
|
||||||
string_data: table[string, Type] of set[MetaData];
|
string_data: table[string, Type] of MetaDataTable;
|
||||||
};
|
};
|
||||||
global data_store: DataStore &redef;
|
global data_store: DataStore &redef;
|
||||||
|
|
||||||
|
@ -186,26 +188,23 @@ function find(s: Seen): bool
|
||||||
return ((s$host in min_data_store$host_data) ||
|
return ((s$host in min_data_store$host_data) ||
|
||||||
(have_full_data && s$host in data_store$host_data));
|
(have_full_data && s$host in data_store$host_data));
|
||||||
}
|
}
|
||||||
else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
|
|
||||||
(have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
|
|
||||||
{
|
|
||||||
return T;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return F;
|
return (([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
|
||||||
|
(have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Function to abstract from different data stores for different indicator types.
|
||||||
function get_items(s: Seen): set[Item]
|
function get_items(s: Seen): set[Item]
|
||||||
{
|
{
|
||||||
local return_data: set[Item];
|
local return_data: set[Item];
|
||||||
|
local mt: MetaDataTable;
|
||||||
|
|
||||||
if ( ! have_full_data )
|
if ( ! have_full_data )
|
||||||
{
|
{
|
||||||
# A reporter warning should be generated here because this function
|
Reporter::warning(fmt("Intel::get_items was called from a host (%s) that doesn't have the full data.",
|
||||||
# should never be called from a host that doesn't have the full data.
|
peer_description));
|
||||||
# TODO: do a reporter warning.
|
|
||||||
return return_data;
|
return return_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,9 +213,10 @@ function get_items(s: Seen): set[Item]
|
||||||
# See if the host is known about and it has meta values
|
# See if the host is known about and it has meta values
|
||||||
if ( s$host in data_store$host_data )
|
if ( s$host in data_store$host_data )
|
||||||
{
|
{
|
||||||
for ( m in data_store$host_data[s$host] )
|
mt = data_store$host_data[s$host];
|
||||||
|
for ( m in mt )
|
||||||
{
|
{
|
||||||
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
|
add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=mt[m])];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -226,9 +226,10 @@ function get_items(s: Seen): set[Item]
|
||||||
# See if the string is known about and it has meta values
|
# See if the string is known about and it has meta values
|
||||||
if ( [lower_indicator, s$indicator_type] in data_store$string_data )
|
if ( [lower_indicator, s$indicator_type] in data_store$string_data )
|
||||||
{
|
{
|
||||||
for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
|
mt = data_store$string_data[lower_indicator, s$indicator_type];
|
||||||
|
for ( m in mt )
|
||||||
{
|
{
|
||||||
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
|
add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=mt[m])];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -263,20 +264,6 @@ function Intel::seen(s: Seen)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function has_meta(check: MetaData, metas: set[MetaData]): bool
|
|
||||||
{
|
|
||||||
local check_hash = md5_hash(check);
|
|
||||||
for ( m in metas )
|
|
||||||
{
|
|
||||||
if ( check_hash == md5_hash(m) )
|
|
||||||
return T;
|
|
||||||
}
|
|
||||||
|
|
||||||
# The records must not be equivalent if we made it this far.
|
|
||||||
return F;
|
|
||||||
}
|
|
||||||
|
|
||||||
event Intel::match(s: Seen, items: set[Item]) &priority=5
|
event Intel::match(s: Seen, items: set[Item]) &priority=5
|
||||||
{
|
{
|
||||||
local info = Info($ts=network_time(), $seen=s);
|
local info = Info($ts=network_time(), $seen=s);
|
||||||
|
@ -315,7 +302,8 @@ function insert(item: Item)
|
||||||
{
|
{
|
||||||
# Create and fill out the meta data item.
|
# Create and fill out the meta data item.
|
||||||
local meta = item$meta;
|
local meta = item$meta;
|
||||||
local metas: set[MetaData];
|
local meta_tbl: table [string] of MetaData;
|
||||||
|
local is_new: bool = T;
|
||||||
|
|
||||||
# All intelligence is case insensitive at the moment.
|
# All intelligence is case insensitive at the moment.
|
||||||
local lower_indicator = to_lower(item$indicator);
|
local lower_indicator = to_lower(item$indicator);
|
||||||
|
@ -326,9 +314,11 @@ function insert(item: Item)
|
||||||
if ( have_full_data )
|
if ( have_full_data )
|
||||||
{
|
{
|
||||||
if ( host !in data_store$host_data )
|
if ( host !in data_store$host_data )
|
||||||
data_store$host_data[host] = set();
|
data_store$host_data[host] = table();
|
||||||
|
else
|
||||||
|
is_new = F;
|
||||||
|
|
||||||
metas = data_store$host_data[host];
|
meta_tbl = data_store$host_data[host];
|
||||||
}
|
}
|
||||||
|
|
||||||
add min_data_store$host_data[host];
|
add min_data_store$host_data[host];
|
||||||
|
@ -338,39 +328,25 @@ function insert(item: Item)
|
||||||
if ( have_full_data )
|
if ( have_full_data )
|
||||||
{
|
{
|
||||||
if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
|
if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
|
||||||
data_store$string_data[lower_indicator, item$indicator_type] = set();
|
data_store$string_data[lower_indicator, item$indicator_type] = table();
|
||||||
|
else
|
||||||
|
is_new = F;
|
||||||
|
|
||||||
metas = data_store$string_data[lower_indicator, item$indicator_type];
|
meta_tbl = data_store$string_data[lower_indicator, item$indicator_type];
|
||||||
}
|
}
|
||||||
|
|
||||||
add min_data_store$string_data[lower_indicator, item$indicator_type];
|
add min_data_store$string_data[lower_indicator, item$indicator_type];
|
||||||
}
|
}
|
||||||
|
|
||||||
local updated = F;
|
|
||||||
if ( have_full_data )
|
if ( have_full_data )
|
||||||
{
|
{
|
||||||
for ( m in metas )
|
# Insert new meta data or update if already present
|
||||||
{
|
meta_tbl[meta$source] = meta;
|
||||||
if ( meta$source == m$source )
|
|
||||||
{
|
|
||||||
if ( has_meta(meta, metas) )
|
|
||||||
{
|
|
||||||
# It's the same item being inserted again.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
# Same source, different metadata means updated item.
|
|
||||||
updated = T;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
add metas[item$meta];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( updated )
|
if ( is_new )
|
||||||
event Intel::updated_item(item);
|
# Trigger insert for cluster in case the item is new
|
||||||
else
|
# or insert was called on a worker
|
||||||
event Intel::new_item(item);
|
event Intel::new_item(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue