zeek/policy/anon.bro

193 lines
4.6 KiB
Text

# $Id: anon.bro 6889 2009-08-21 16:45:17Z vern $
redef anonymize_ip_addr = T;
const orig_addr_anonymization = RANDOM_MD5 &redef;
const resp_addr_anonymization = RANDOM_MD5 &redef;
const other_addr_anonymization = SEQUENTIALLY_NUMBERED &redef;
const preserve_orig_addr: set[addr] = {} &redef;
const preserve_resp_addr: set[addr] = {} &redef;
const preserve_other_addr: set[addr] = {
0.0.0.0,
} &redef;
const preserved_subnet: set[subnet] = {
# 192.150.186/23,
} &redef;
const preserved_net: set[net] = {
# 192.150.186, 192.150.187,
} &redef;
global anon_log = open_log_file("anon") &redef;
global anonymized_args: table[string] of string;
global ip_anon_mapping: set[addr, addr];
event bro_init()
{
for ( n in preserved_net )
preserve_net(n);
}
function anonymize_address(a: addr, id: conn_id): addr
{
if ( a == id$orig_h )
return anonymize_addr(a, ORIG_ADDR);
else if ( a == id$resp_h )
return anonymize_addr(a, RESP_ADDR);
else
return anonymize_addr(a, OTHER_ADDR);
}
event anonymization_mapping(orig: addr, mapped: addr)
{
if ( [orig, mapped] !in ip_anon_mapping )
{
add ip_anon_mapping[orig, mapped];
print anon_log, fmt("%s -> %s", orig, mapped);
}
}
function string_anonymized(from: string, to: string, seed: count)
{
print anon_log, fmt("\"%s\" %d=> \"%s\"", from, seed, to);
}
global num_string_id: count = 0 &redef;
global anonymized_strings: table[string] of record {
s: string;
c: count;
} &redef;
# Hopefully, the total number of strings to anonymize is much less than
# 36^unique_string_length.
const unique_string_length = 8 &redef;
# const anonymized_string_pattern = /U[0-9a-f]+U/;
global unique_string_set: set[string];
event bro_init()
{
for ( s in anonymized_strings )
add unique_string_set[anonymized_strings[s]$s];
}
function unique_string(s: string, seed: count): string
{
local t = cat("U", sub_bytes(md5_hmac(seed, s),
1, unique_string_length), "U");
if ( t in unique_string_set )
return unique_string(s, seed+1);
anonymized_strings[s] = [$s = t, $c = 1];
add unique_string_set[t];
string_anonymized(s, t, seed);
return t;
}
function anonymize_string(from: string): string
{
if ( from in anonymized_strings )
{
++anonymized_strings[from]$c;
return anonymized_strings[from]$s;
}
local t = unique_string(from, 0);
return t;
}
function anonymize_arg(typ: string, arg: string): string
{
if ( arg == "" )
return ""; # an empty argument is safe
local arg_seed = string_cat(typ, arg);
if ( arg_seed in anonymized_args )
return anonymized_args[arg_seed];
local a = anonymize_string(arg_seed);
anonymized_args[arg_seed] = a;
print anon_log, fmt("anonymize_arg: (%s) {%s} -> %s ",
typ, to_string_literal(arg), to_string_literal(a));
return a;
}
# Does not contain ? and ends with an allowed suffix.
const path_to_file_pat =
/\/[^?]+\.(html|ico|icon|pdf|ps|doc|ppt|htm|js|crl|swf|shtml|h|old|c|cc|java|class|src|cfm|gif|jpg|php|rdf|rss|asp|bmp|owl|phtml|jpeg|jsp|cgi|png|txt|xml|css|avi|tex|dvi)/
;
# Acceptable domain names.
const kosher_dom_pat =
/ar|au|biz|br|ca|cc|cl|cn|co|com|cx|cz|de|ec|es|edu|fi|fm|fr|gov|hn|il|is|it|jp|lv|mx|net|no|nz|org|pe|pl|ru|sk|tv|tw|uk|us|arpa/
;
# Simple filename pattern.
const simple_filename =
/[0-9\-A-Za-z]+\.(html|ico|icon|pdf|ps|doc|ppt|htm|js|crl|swf|shtml|h|old|c|cc|java|class|src|cfm|gif|jpg|php|rdf|rss|asp|bmp|owl|phtml|jpeg|jsp|cgi|png|txt|xml|css|avi|tex|dvi)/
;
function anonymize_path(path: string): string
{
local hashed_path = "";
if ( to_lower(path) != path_to_file_pat )
{
hashed_path = anonymize_arg("path", path);
return hashed_path;
}
local file_parts = split(path, /\./);
local i = 1;
for ( part in file_parts )
{
# This looks broken to me - VP.
hashed_path = fmt("%s.%s", hashed_path, file_parts[i]);
if ( ++i == length(file_parts) )
break;
}
return fmt("%s.%s", anonymize_arg("path", hashed_path), file_parts[i]);
}
function anonymize_host(host: string): string
{
local hashed_host = "";
local host_parts = split(host, /\./);
local i = 1;
for ( hosty in host_parts )
{
if ( i == length(host_parts) )
break;
# Check against "kosher" tld list.
hashed_host = fmt("%s%s.", hashed_host,
anonymize_arg("host", host_parts[i]));
++i;
}
if ( host_parts[i] == kosher_dom_pat )
return string_cat(hashed_host, host_parts[i]);
print anon_log, fmt("anonymize_host: non-kosher domain %s", host);
return string_cat(hashed_host, anonymize_arg("host", host_parts[i]));
}
event bro_done()
{
for ( s in anonymized_strings )
{
print anon_log, fmt("appearance: %d: \"%s\" => \"%s\"",
anonymized_strings[s]$c, s, anonymized_strings[s]$s);
}
}