zeek/src/Anon.cc

453 lines
9.8 KiB
C++

#include "zeek/Anon.h"
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/time.h>
#include "zeek/util.h"
#include "zeek/net_util.h"
#include "zeek/Val.h"
#include "zeek/NetVar.h"
#include "zeek/Reporter.h"
#include "zeek/Scope.h"
#include "zeek/ID.h"
#include "zeek/IPAddr.h"
#include "zeek/Event.h"
namespace zeek::detail {
AnonymizeIPAddr* ip_anonymizer[NUM_ADDR_ANONYMIZATION_METHODS] = {nullptr};
static uint32_t rand32()
{
return ((util::detail::random_number() & 0xffff) << 16) | (util::detail::random_number() & 0xffff);
}
// From tcpdpriv.
static int bi_ffs(uint32_t value)
{
int add = 0;
static uint8_t bvals[] = {
0, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1
};
if ( (value & 0xFFFF0000) == 0 )
{
if ( value == 0 )
// Zero input ==> zero output.
return 0;
add += 16;
}
else
value >>= 16;
if ( (value & 0xFF00) == 0 )
add += 8;
else
value >>= 8;
if ( (value & 0xF0) == 0 )
add += 4;
else
value >>= 4;
return add + bvals[value & 0xf];
}
#define first_n_bit_mask(n) (~(0xFFFFFFFFU >> n))
ipaddr32_t AnonymizeIPAddr::Anonymize(ipaddr32_t addr)
{
std::map<ipaddr32_t, ipaddr32_t>::iterator p = mapping.find(addr);
if ( p != mapping.end() )
return p->second;
else
{
ipaddr32_t new_addr = anonymize(addr);
mapping[addr] = new_addr;
return new_addr;
}
}
// Keep the specified prefix unchanged.
bool AnonymizeIPAddr::PreservePrefix(ipaddr32_t /* input */, int /* num_bits */)
{
reporter->InternalError("prefix preserving is not supported for the anonymizer");
return false;
}
bool AnonymizeIPAddr::PreserveNet(ipaddr32_t input)
{
switch ( addr_to_class(ntohl(input)) ) {
case 'A':
return PreservePrefix(input, 8);
case 'B':
return PreservePrefix(input, 16);
case 'C':
return PreservePrefix(input, 24);
default:
return false;
}
}
ipaddr32_t AnonymizeIPAddr_Seq::anonymize(ipaddr32_t /* input */)
{
++seq;
return htonl(seq);
}
ipaddr32_t AnonymizeIPAddr_RandomMD5::anonymize(ipaddr32_t input)
{
uint8_t digest[16];
ipaddr32_t output = 0;
util::detail::hmac_md5(sizeof(input), (u_char*)(&input), digest);
for ( int i = 0; i < 4; ++i )
output = (output << 8) | digest[i];
return output;
}
// This code is from "On the Design and Performance of Prefix-Preserving
// IP Traffic Trace Anonymization", by Xu et al (IMW 2001)
//
// http://www.imconf.net/imw-2001/proceedings.html
ipaddr32_t AnonymizeIPAddr_PrefixMD5::anonymize(ipaddr32_t input)
{
uint8_t digest[16];
ipaddr32_t prefix_mask = 0xffffffff;
input = ntohl(input);
ipaddr32_t output = input;
for ( int i = 0; i < 32; ++i )
{
// PAD(x_0 ... x_{i-1}) = x_0 ... x_{i-1} 1 0 ... 0 .
prefix.len = htonl(i + 1);
prefix.prefix = htonl((input & ~(prefix_mask>>i)) | (1<<(31-i)));
// HK(PAD(x_0 ... x_{i-1})).
util::detail::hmac_md5(sizeof(prefix), (u_char*) &prefix, digest);
// f_{i-1} = LSB(HK(PAD(x_0 ... x_{i-1}))).
ipaddr32_t bit_mask = (digest[0] & 1) << (31-i);
// x_i' = x_i ^ f_{i-1}.
output ^= bit_mask;
}
return htonl(output);
}
AnonymizeIPAddr_A50::~AnonymizeIPAddr_A50()
{
for ( auto& b : blocks )
delete [] b;
}
void AnonymizeIPAddr_A50::init()
{
root = next_free_node = nullptr;
// Prepare special nodes for 0.0.0.0 and 255.255.255.255.
memset(&special_nodes[0], 0, sizeof(special_nodes));
special_nodes[0].input = special_nodes[0].output = 0;
special_nodes[1].input = special_nodes[1].output = 0xFFFFFFFF;
method = 0;
before_anonymization = 1;
new_mapping = 0;
}
bool AnonymizeIPAddr_A50::PreservePrefix(ipaddr32_t input, int num_bits)
{
DEBUG_MSG("%s/%d\n",
IPAddr(IPv4, &input, IPAddr::Network).AsString().c_str(),
num_bits);
if ( ! before_anonymization )
{
reporter->Error("prefix perservation specified after anonymization begun");
return false;
}
input = ntohl(input);
// Sanitize input.
input = input & first_n_bit_mask(num_bits);
Node* n = find_node(input);
// Preserve the first num_bits bits of addr.
if ( num_bits == 32 )
n->output = input;
else if ( num_bits > 0 )
{
assert((0xFFFFFFFFU >> 1) == 0x7FFFFFFFU);
uint32_t suffix_mask = (0xFFFFFFFFU >> num_bits);
uint32_t prefix_mask = ~suffix_mask;
n->output = (input & prefix_mask) | (rand32() & suffix_mask);
}
return true;
}
ipaddr32_t AnonymizeIPAddr_A50::anonymize(ipaddr32_t a)
{
before_anonymization = 0;
new_mapping = 0;
if ( Node* n = find_node(ntohl(a)) )
{
ipaddr32_t output = htonl(n->output);
return output;
}
else
return 0;
}
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node_block()
{
assert(! next_free_node);
int block_size = 1024;
Node* block = new Node[block_size];
if ( ! block )
reporter->InternalError("out of memory!");
blocks.push_back(block);
for ( int i = 1; i < block_size - 1; ++i )
block[i].child[0] = &block[i+1];
block[block_size - 1].child[0] = nullptr;
next_free_node = &block[1];
return &block[0];
}
inline AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node()
{
new_mapping = 1;
if ( next_free_node )
{
Node* n = next_free_node;
next_free_node = n->child[0];
return n;
}
else
return new_node_block();
}
inline void AnonymizeIPAddr_A50::free_node(Node *n)
{
n->child[0] = next_free_node;
next_free_node = n;
}
ipaddr32_t AnonymizeIPAddr_A50::make_output(ipaddr32_t old_output, int swivel) const
{
// -A50 anonymization
if ( swivel == 32 )
return old_output ^ 1;
else
{
// Bits up to swivel are unchanged; bit swivel is flipped.
ipaddr32_t known_part =
((old_output >> (32 - swivel)) ^ 1) << (32 - swivel);
// Remainder of bits are random.
return known_part | ((rand32() & 0x7FFFFFFF) >> swivel);
}
}
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::make_peer(ipaddr32_t a, Node* n)
{
if ( a == 0 || a == 0xFFFFFFFFU )
reporter->InternalError("0.0.0.0 and 255.255.255.255 should never get into the tree");
// Become a peer.
// Algorithm: create two nodes, the two peers. Leave orig node as
// the parent of the two new ones.
Node* down[2];
if ( ! (down[0] = new_node()) )
return nullptr;
if ( ! (down[1] = new_node()) )
{
free_node(down[0]);
return nullptr;
}
// swivel is first bit 'a' and 'old->input' differ.
int swivel = bi_ffs(a ^ n->input);
// bitvalue is the value of that bit of 'a'.
int bitvalue = (a >> (32 - swivel)) & 1;
down[bitvalue]->input = a;
down[bitvalue]->output = make_output(n->output, swivel);
down[bitvalue]->child[0] = down[bitvalue]->child[1] = nullptr;
*down[1 - bitvalue] = *n; // copy orig node down one level
n->input = down[1]->input; // NB: 1s to the right (0s to the left)
n->output = down[1]->output;
n->child[0] = down[0]; // point to children
n->child[1] = down[1];
return down[bitvalue];
}
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::find_node(ipaddr32_t a)
{
// Watch out for special IP addresses, which never make it
// into the tree.
if ( a == 0 || a == 0xFFFFFFFFU )
return &special_nodes[a & 1];
if ( ! root )
{
root = new_node();
root->input = a;
root->output = rand32();
root->child[0] = root->child[1] = nullptr;
return root;
}
// Straight from tcpdpriv.
Node* n = root;
while ( n )
{
if ( n->input == a )
return n;
if ( ! n->child[0] )
n = make_peer(a, n);
else
{
// swivel is the first bit in which the two children
// differ.
int swivel =
bi_ffs(n->child[0]->input ^ n->child[1]->input);
if ( bi_ffs(a ^ n->input) < swivel )
// Input differs earlier.
n = make_peer(a, n);
else if ( a & (1 << (32 - swivel)) )
n = n->child[1];
else
n = n->child[0];
}
}
reporter->InternalError("out of memory!");
return nullptr;
}
static TableValPtr anon_preserve_orig_addr;
static TableValPtr anon_preserve_resp_addr;
static TableValPtr anon_preserve_other_addr;
void init_ip_addr_anonymizers()
{
ip_anonymizer[KEEP_ORIG_ADDR] = nullptr;
ip_anonymizer[SEQUENTIALLY_NUMBERED] = new AnonymizeIPAddr_Seq();
ip_anonymizer[RANDOM_MD5] = new AnonymizeIPAddr_RandomMD5();
ip_anonymizer[PREFIX_PRESERVING_A50] = new AnonymizeIPAddr_A50();
ip_anonymizer[PREFIX_PRESERVING_MD5] = new AnonymizeIPAddr_PrefixMD5();
auto id = global_scope()->Find("preserve_orig_addr");
if ( id )
anon_preserve_orig_addr = cast_intrusive<TableVal>(id->GetVal());
id = global_scope()->Find("preserve_resp_addr");
if ( id )
anon_preserve_resp_addr = cast_intrusive<TableVal>(id->GetVal());
id = global_scope()->Find("preserve_other_addr");
if ( id )
anon_preserve_other_addr = cast_intrusive<TableVal>(id->GetVal());
}
ipaddr32_t anonymize_ip(ipaddr32_t ip, enum ip_addr_anonymization_class_t cl)
{
TableVal* preserve_addr = nullptr;
auto addr = make_intrusive<AddrVal>(ip);
int method = -1;
switch ( cl ) {
case ORIG_ADDR: // client address
preserve_addr = anon_preserve_orig_addr.get();
method = orig_addr_anonymization;
break;
case RESP_ADDR: // server address
preserve_addr = anon_preserve_resp_addr.get();
method = resp_addr_anonymization;
break;
default:
preserve_addr = anon_preserve_other_addr.get();
method = other_addr_anonymization;
break;
}
ipaddr32_t new_ip = 0;
if ( preserve_addr && preserve_addr->FindOrDefault(addr) )
new_ip = ip;
else if ( method >= 0 && method < NUM_ADDR_ANONYMIZATION_METHODS )
{
if ( method == KEEP_ORIG_ADDR )
new_ip = ip;
else if ( ! ip_anonymizer[method] )
reporter->InternalError("IP anonymizer not initialized");
else
new_ip = ip_anonymizer[method]->Anonymize(ip);
}
else
reporter->InternalError("invalid IP anonymization method");
#ifdef LOG_ANONYMIZATION_MAPPING
log_anonymization_mapping(ip, new_ip);
#endif
return new_ip;
}
#ifdef LOG_ANONYMIZATION_MAPPING
void log_anonymization_mapping(ipaddr32_t input, ipaddr32_t output)
{
if ( anonymization_mapping )
event_mgr.Enqueue(anonymization_mapping,
make_intrusive<AddrVal>(input),
make_intrusive<AddrVal>(output)
);
}
#endif
} // namespace zeek::detail