mirror of
https://github.com/zeek/zeek.git
synced 2025-10-06 00:28:21 +00:00
453 lines
9.8 KiB
C++
453 lines
9.8 KiB
C++
#include "zeek/Anon.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <assert.h>
|
|
#include <sys/time.h>
|
|
|
|
#include "zeek/util.h"
|
|
#include "zeek/net_util.h"
|
|
#include "zeek/Val.h"
|
|
#include "zeek/NetVar.h"
|
|
#include "zeek/Reporter.h"
|
|
#include "zeek/Scope.h"
|
|
#include "zeek/ID.h"
|
|
#include "zeek/IPAddr.h"
|
|
#include "zeek/Event.h"
|
|
|
|
namespace zeek::detail {
|
|
|
|
AnonymizeIPAddr* ip_anonymizer[NUM_ADDR_ANONYMIZATION_METHODS] = {nullptr};
|
|
|
|
static uint32_t rand32()
|
|
{
|
|
return ((util::detail::random_number() & 0xffff) << 16) | (util::detail::random_number() & 0xffff);
|
|
}
|
|
|
|
// From tcpdpriv.
|
|
static int bi_ffs(uint32_t value)
|
|
{
|
|
int add = 0;
|
|
static uint8_t bvals[] = {
|
|
0, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1
|
|
};
|
|
|
|
if ( (value & 0xFFFF0000) == 0 )
|
|
{
|
|
if ( value == 0 )
|
|
// Zero input ==> zero output.
|
|
return 0;
|
|
|
|
add += 16;
|
|
}
|
|
|
|
else
|
|
value >>= 16;
|
|
|
|
if ( (value & 0xFF00) == 0 )
|
|
add += 8;
|
|
else
|
|
value >>= 8;
|
|
|
|
if ( (value & 0xF0) == 0 )
|
|
add += 4;
|
|
else
|
|
value >>= 4;
|
|
|
|
return add + bvals[value & 0xf];
|
|
}
|
|
|
|
#define first_n_bit_mask(n) (~(0xFFFFFFFFU >> n))
|
|
|
|
ipaddr32_t AnonymizeIPAddr::Anonymize(ipaddr32_t addr)
|
|
{
|
|
std::map<ipaddr32_t, ipaddr32_t>::iterator p = mapping.find(addr);
|
|
if ( p != mapping.end() )
|
|
return p->second;
|
|
else
|
|
{
|
|
ipaddr32_t new_addr = anonymize(addr);
|
|
mapping[addr] = new_addr;
|
|
|
|
return new_addr;
|
|
}
|
|
}
|
|
|
|
// Keep the specified prefix unchanged.
|
|
bool AnonymizeIPAddr::PreservePrefix(ipaddr32_t /* input */, int /* num_bits */)
|
|
{
|
|
reporter->InternalError("prefix preserving is not supported for the anonymizer");
|
|
return false;
|
|
}
|
|
|
|
bool AnonymizeIPAddr::PreserveNet(ipaddr32_t input)
|
|
{
|
|
switch ( addr_to_class(ntohl(input)) ) {
|
|
case 'A':
|
|
return PreservePrefix(input, 8);
|
|
case 'B':
|
|
return PreservePrefix(input, 16);
|
|
case 'C':
|
|
return PreservePrefix(input, 24);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
ipaddr32_t AnonymizeIPAddr_Seq::anonymize(ipaddr32_t /* input */)
|
|
{
|
|
++seq;
|
|
return htonl(seq);
|
|
}
|
|
|
|
ipaddr32_t AnonymizeIPAddr_RandomMD5::anonymize(ipaddr32_t input)
|
|
{
|
|
uint8_t digest[16];
|
|
ipaddr32_t output = 0;
|
|
|
|
util::detail::hmac_md5(sizeof(input), (u_char*)(&input), digest);
|
|
|
|
for ( int i = 0; i < 4; ++i )
|
|
output = (output << 8) | digest[i];
|
|
|
|
return output;
|
|
}
|
|
|
|
|
|
// This code is from "On the Design and Performance of Prefix-Preserving
|
|
// IP Traffic Trace Anonymization", by Xu et al (IMW 2001)
|
|
//
|
|
// http://www.imconf.net/imw-2001/proceedings.html
|
|
|
|
ipaddr32_t AnonymizeIPAddr_PrefixMD5::anonymize(ipaddr32_t input)
|
|
{
|
|
uint8_t digest[16];
|
|
ipaddr32_t prefix_mask = 0xffffffff;
|
|
input = ntohl(input);
|
|
ipaddr32_t output = input;
|
|
|
|
for ( int i = 0; i < 32; ++i )
|
|
{
|
|
// PAD(x_0 ... x_{i-1}) = x_0 ... x_{i-1} 1 0 ... 0 .
|
|
prefix.len = htonl(i + 1);
|
|
prefix.prefix = htonl((input & ~(prefix_mask>>i)) | (1<<(31-i)));
|
|
|
|
// HK(PAD(x_0 ... x_{i-1})).
|
|
util::detail::hmac_md5(sizeof(prefix), (u_char*) &prefix, digest);
|
|
|
|
// f_{i-1} = LSB(HK(PAD(x_0 ... x_{i-1}))).
|
|
ipaddr32_t bit_mask = (digest[0] & 1) << (31-i);
|
|
|
|
// x_i' = x_i ^ f_{i-1}.
|
|
output ^= bit_mask;
|
|
}
|
|
|
|
return htonl(output);
|
|
}
|
|
|
|
AnonymizeIPAddr_A50::~AnonymizeIPAddr_A50()
|
|
{
|
|
for ( auto& b : blocks )
|
|
delete [] b;
|
|
}
|
|
|
|
void AnonymizeIPAddr_A50::init()
|
|
{
|
|
root = next_free_node = nullptr;
|
|
|
|
// Prepare special nodes for 0.0.0.0 and 255.255.255.255.
|
|
memset(&special_nodes[0], 0, sizeof(special_nodes));
|
|
special_nodes[0].input = special_nodes[0].output = 0;
|
|
special_nodes[1].input = special_nodes[1].output = 0xFFFFFFFF;
|
|
|
|
method = 0;
|
|
before_anonymization = 1;
|
|
new_mapping = 0;
|
|
}
|
|
|
|
bool AnonymizeIPAddr_A50::PreservePrefix(ipaddr32_t input, int num_bits)
|
|
{
|
|
DEBUG_MSG("%s/%d\n",
|
|
IPAddr(IPv4, &input, IPAddr::Network).AsString().c_str(),
|
|
num_bits);
|
|
|
|
if ( ! before_anonymization )
|
|
{
|
|
reporter->Error("prefix perservation specified after anonymization begun");
|
|
return false;
|
|
}
|
|
|
|
input = ntohl(input);
|
|
|
|
// Sanitize input.
|
|
input = input & first_n_bit_mask(num_bits);
|
|
|
|
Node* n = find_node(input);
|
|
|
|
// Preserve the first num_bits bits of addr.
|
|
if ( num_bits == 32 )
|
|
n->output = input;
|
|
|
|
else if ( num_bits > 0 )
|
|
{
|
|
assert((0xFFFFFFFFU >> 1) == 0x7FFFFFFFU);
|
|
uint32_t suffix_mask = (0xFFFFFFFFU >> num_bits);
|
|
uint32_t prefix_mask = ~suffix_mask;
|
|
n->output = (input & prefix_mask) | (rand32() & suffix_mask);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
ipaddr32_t AnonymizeIPAddr_A50::anonymize(ipaddr32_t a)
|
|
{
|
|
before_anonymization = 0;
|
|
new_mapping = 0;
|
|
|
|
if ( Node* n = find_node(ntohl(a)) )
|
|
{
|
|
ipaddr32_t output = htonl(n->output);
|
|
return output;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node_block()
|
|
{
|
|
assert(! next_free_node);
|
|
|
|
int block_size = 1024;
|
|
Node* block = new Node[block_size];
|
|
if ( ! block )
|
|
reporter->InternalError("out of memory!");
|
|
|
|
blocks.push_back(block);
|
|
|
|
for ( int i = 1; i < block_size - 1; ++i )
|
|
block[i].child[0] = &block[i+1];
|
|
|
|
block[block_size - 1].child[0] = nullptr;
|
|
next_free_node = &block[1];
|
|
|
|
return &block[0];
|
|
}
|
|
|
|
inline AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node()
|
|
{
|
|
new_mapping = 1;
|
|
|
|
if ( next_free_node )
|
|
{
|
|
Node* n = next_free_node;
|
|
next_free_node = n->child[0];
|
|
return n;
|
|
}
|
|
else
|
|
return new_node_block();
|
|
}
|
|
|
|
inline void AnonymizeIPAddr_A50::free_node(Node *n)
|
|
{
|
|
n->child[0] = next_free_node;
|
|
next_free_node = n;
|
|
}
|
|
|
|
ipaddr32_t AnonymizeIPAddr_A50::make_output(ipaddr32_t old_output, int swivel) const
|
|
{
|
|
// -A50 anonymization
|
|
if ( swivel == 32 )
|
|
return old_output ^ 1;
|
|
else
|
|
{
|
|
// Bits up to swivel are unchanged; bit swivel is flipped.
|
|
ipaddr32_t known_part =
|
|
((old_output >> (32 - swivel)) ^ 1) << (32 - swivel);
|
|
|
|
// Remainder of bits are random.
|
|
return known_part | ((rand32() & 0x7FFFFFFF) >> swivel);
|
|
}
|
|
}
|
|
|
|
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::make_peer(ipaddr32_t a, Node* n)
|
|
{
|
|
if ( a == 0 || a == 0xFFFFFFFFU )
|
|
reporter->InternalError("0.0.0.0 and 255.255.255.255 should never get into the tree");
|
|
|
|
// Become a peer.
|
|
// Algorithm: create two nodes, the two peers. Leave orig node as
|
|
// the parent of the two new ones.
|
|
|
|
Node* down[2];
|
|
|
|
if ( ! (down[0] = new_node()) )
|
|
return nullptr;
|
|
|
|
if ( ! (down[1] = new_node()) )
|
|
{
|
|
free_node(down[0]);
|
|
return nullptr;
|
|
}
|
|
|
|
// swivel is first bit 'a' and 'old->input' differ.
|
|
int swivel = bi_ffs(a ^ n->input);
|
|
|
|
// bitvalue is the value of that bit of 'a'.
|
|
int bitvalue = (a >> (32 - swivel)) & 1;
|
|
|
|
down[bitvalue]->input = a;
|
|
down[bitvalue]->output = make_output(n->output, swivel);
|
|
down[bitvalue]->child[0] = down[bitvalue]->child[1] = nullptr;
|
|
|
|
*down[1 - bitvalue] = *n; // copy orig node down one level
|
|
|
|
n->input = down[1]->input; // NB: 1s to the right (0s to the left)
|
|
n->output = down[1]->output;
|
|
n->child[0] = down[0]; // point to children
|
|
n->child[1] = down[1];
|
|
|
|
return down[bitvalue];
|
|
}
|
|
|
|
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::find_node(ipaddr32_t a)
|
|
{
|
|
// Watch out for special IP addresses, which never make it
|
|
// into the tree.
|
|
if ( a == 0 || a == 0xFFFFFFFFU )
|
|
return &special_nodes[a & 1];
|
|
|
|
if ( ! root )
|
|
{
|
|
root = new_node();
|
|
root->input = a;
|
|
root->output = rand32();
|
|
root->child[0] = root->child[1] = nullptr;
|
|
|
|
return root;
|
|
}
|
|
|
|
// Straight from tcpdpriv.
|
|
Node* n = root;
|
|
while ( n )
|
|
{
|
|
if ( n->input == a )
|
|
return n;
|
|
|
|
if ( ! n->child[0] )
|
|
n = make_peer(a, n);
|
|
|
|
else
|
|
{
|
|
// swivel is the first bit in which the two children
|
|
// differ.
|
|
int swivel =
|
|
bi_ffs(n->child[0]->input ^ n->child[1]->input);
|
|
|
|
if ( bi_ffs(a ^ n->input) < swivel )
|
|
// Input differs earlier.
|
|
n = make_peer(a, n);
|
|
|
|
else if ( a & (1 << (32 - swivel)) )
|
|
n = n->child[1];
|
|
|
|
else
|
|
n = n->child[0];
|
|
}
|
|
}
|
|
|
|
reporter->InternalError("out of memory!");
|
|
return nullptr;
|
|
}
|
|
|
|
static TableValPtr anon_preserve_orig_addr;
|
|
static TableValPtr anon_preserve_resp_addr;
|
|
static TableValPtr anon_preserve_other_addr;
|
|
|
|
void init_ip_addr_anonymizers()
|
|
{
|
|
ip_anonymizer[KEEP_ORIG_ADDR] = nullptr;
|
|
ip_anonymizer[SEQUENTIALLY_NUMBERED] = new AnonymizeIPAddr_Seq();
|
|
ip_anonymizer[RANDOM_MD5] = new AnonymizeIPAddr_RandomMD5();
|
|
ip_anonymizer[PREFIX_PRESERVING_A50] = new AnonymizeIPAddr_A50();
|
|
ip_anonymizer[PREFIX_PRESERVING_MD5] = new AnonymizeIPAddr_PrefixMD5();
|
|
|
|
auto id = global_scope()->Find("preserve_orig_addr");
|
|
|
|
if ( id )
|
|
anon_preserve_orig_addr = cast_intrusive<TableVal>(id->GetVal());
|
|
|
|
id = global_scope()->Find("preserve_resp_addr");
|
|
|
|
if ( id )
|
|
anon_preserve_resp_addr = cast_intrusive<TableVal>(id->GetVal());
|
|
|
|
id = global_scope()->Find("preserve_other_addr");
|
|
|
|
if ( id )
|
|
anon_preserve_other_addr = cast_intrusive<TableVal>(id->GetVal());
|
|
}
|
|
|
|
ipaddr32_t anonymize_ip(ipaddr32_t ip, enum ip_addr_anonymization_class_t cl)
|
|
{
|
|
TableVal* preserve_addr = nullptr;
|
|
auto addr = make_intrusive<AddrVal>(ip);
|
|
|
|
int method = -1;
|
|
|
|
switch ( cl ) {
|
|
case ORIG_ADDR: // client address
|
|
preserve_addr = anon_preserve_orig_addr.get();
|
|
method = orig_addr_anonymization;
|
|
break;
|
|
|
|
case RESP_ADDR: // server address
|
|
preserve_addr = anon_preserve_resp_addr.get();
|
|
method = resp_addr_anonymization;
|
|
break;
|
|
|
|
default:
|
|
preserve_addr = anon_preserve_other_addr.get();
|
|
method = other_addr_anonymization;
|
|
break;
|
|
}
|
|
|
|
ipaddr32_t new_ip = 0;
|
|
|
|
if ( preserve_addr && preserve_addr->FindOrDefault(addr) )
|
|
new_ip = ip;
|
|
|
|
else if ( method >= 0 && method < NUM_ADDR_ANONYMIZATION_METHODS )
|
|
{
|
|
if ( method == KEEP_ORIG_ADDR )
|
|
new_ip = ip;
|
|
|
|
else if ( ! ip_anonymizer[method] )
|
|
reporter->InternalError("IP anonymizer not initialized");
|
|
|
|
else
|
|
new_ip = ip_anonymizer[method]->Anonymize(ip);
|
|
}
|
|
|
|
else
|
|
reporter->InternalError("invalid IP anonymization method");
|
|
|
|
#ifdef LOG_ANONYMIZATION_MAPPING
|
|
log_anonymization_mapping(ip, new_ip);
|
|
#endif
|
|
return new_ip;
|
|
}
|
|
|
|
#ifdef LOG_ANONYMIZATION_MAPPING
|
|
|
|
void log_anonymization_mapping(ipaddr32_t input, ipaddr32_t output)
|
|
{
|
|
if ( anonymization_mapping )
|
|
event_mgr.Enqueue(anonymization_mapping,
|
|
make_intrusive<AddrVal>(input),
|
|
make_intrusive<AddrVal>(output)
|
|
);
|
|
}
|
|
|
|
#endif
|
|
|
|
} // namespace zeek::detail
|