zeek/src/Anon.cc
Robin Sommer da0ea67453 Fixing btests.
- When Bro is given a PRNG seed, it now uses its own internal random
  number generator that produces consistent results across sytems.
  Note that this internal generator isn't very good, so it should only
  be used for testing purposes.

- The BTest configuration now sets the environemnt variables TZ=UTC
  and LANG=C to ensure consistent results.

- Fixing doc markup in logging.bro.

- Updating baselines.
2011-04-21 19:31:44 -07:00

423 lines
8.7 KiB
C++

// $Id: Anon.cc 7075 2010-09-13 02:39:38Z vern $
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/time.h>
#include "util.h"
#include "net_util.h"
#include "md5.h"
#include "Anon.h"
#include "Val.h"
#include "NetVar.h"
AnonymizeIPAddr* ip_anonymizer[NUM_ADDR_ANONYMIZATION_METHODS] = {0};
static uint32 rand32()
{
return ((bro_random() & 0xffff) << 16) | (bro_random() & 0xffff);
}
// From tcpdpriv.
int bi_ffs(uint32 value)
{
int add = 0;
static uint8 bvals[] = {
0, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1
};
if ( (value & 0xFFFF0000) == 0 )
{
if ( value == 0 )
// Zero input ==> zero output.
return 0;
add += 16;
}
else
value >>= 16;
if ( (value & 0xFF00) == 0 )
add += 8;
else
value >>= 8;
if ( (value & 0xF0) == 0 )
add += 4;
else
value >>= 4;
return add + bvals[value & 0xf];
}
#define first_n_bit_mask(n) (~(0xFFFFFFFFU >> n))
ipaddr32_t AnonymizeIPAddr::Anonymize(ipaddr32_t addr)
{
map<ipaddr32_t, ipaddr32_t>::iterator p = mapping.find(addr);
if ( p != mapping.end() )
return p->second;
else
{
ipaddr32_t new_addr = anonymize(addr);
mapping[addr] = new_addr;
return new_addr;
}
}
int AnonymizeIPAddr::PreserveNet(ipaddr32_t input)
{
switch ( addr_to_class(ntohl(input)) ) {
case 'A':
return PreservePrefix(input, 8);
case 'B':
return PreservePrefix(input, 16);
case 'C':
return PreservePrefix(input, 24);
default:
return 0;
}
}
ipaddr32_t AnonymizeIPAddr_Seq::anonymize(ipaddr32_t /* input */)
{
return htonl(seq++);
}
ipaddr32_t AnonymizeIPAddr_RandomMD5::anonymize(ipaddr32_t input)
{
uint8 digest[16];
ipaddr32_t output = 0;
hmac_md5(sizeof(input), (u_char*)(&input), digest);
for ( int i = 0; i < 4; ++i )
output = (output << 8) | digest[i];
return output;
}
// This code is from "On the Design and Performance of Prefix-Preserving
// IP Traffic Trace Anonymization", by Xu et al (IMW 2001)
//
// http://www.imconf.net/imw-2001/proceedings.html
ipaddr32_t AnonymizeIPAddr_PrefixMD5::anonymize(ipaddr32_t input)
{
uint8 digest[16];
ipaddr32_t prefix_mask = 0xffffffff;
input = ntohl(input);
ipaddr32_t output = input;
for ( int i = 0; i < 32; ++i )
{
// PAD(x_0 ... x_{i-1}) = x_0 ... x_{i-1} 1 0 ... 0 .
prefix.len = htonl(i + 1);
prefix.prefix = htonl((input & ~(prefix_mask>>i)) | (1<<(31-i)));
// HK(PAD(x_0 ... x_{i-1})).
hmac_md5(sizeof(prefix), (u_char*) &prefix, digest);
// f_{i-1} = LSB(HK(PAD(x_0 ... x_{i-1}))).
ipaddr32_t bit_mask = (digest[0] & 1) << (31-i);
// x_i' = x_i ^ f_{i-1}.
output ^= bit_mask;
}
return htonl(output);
}
AnonymizeIPAddr_A50::~AnonymizeIPAddr_A50()
{
for ( unsigned int i = 0; i < blocks.size(); ++i )
delete [] blocks[i];
blocks.clear();
}
void AnonymizeIPAddr_A50::init()
{
root = next_free_node = 0;
// Prepare special nodes for 0.0.0.0 and 255.255.255.255.
memset(&special_nodes[0], 0, sizeof(special_nodes));
special_nodes[0].input = special_nodes[0].output = 0;
special_nodes[1].input = special_nodes[1].output = 0xFFFFFFFF;
before_anonymization = 1;
}
int AnonymizeIPAddr_A50::PreservePrefix(ipaddr32_t input, int num_bits)
{
DEBUG_MSG("%s/%d\n", dotted_addr(input), num_bits);
if ( ! before_anonymization )
{
run_time("prefix perservation specified after anonymization begun");
return 0;
}
input = ntohl(input);
// Sanitize input.
input = input & first_n_bit_mask(num_bits);
Node* n = find_node(input);
// Preserve the first num_bits bits of addr.
if ( num_bits == 32 )
n->output = input;
else if ( num_bits > 0 )
{
assert((0xFFFFFFFFU >> 1) == 0x7FFFFFFFU);
uint32 suffix_mask = (0xFFFFFFFFU >> num_bits);
uint32 prefix_mask = ~suffix_mask;
n->output = (input & prefix_mask) | (rand32() & suffix_mask);
}
return 1;
}
ipaddr32_t AnonymizeIPAddr_A50::anonymize(ipaddr32_t a)
{
before_anonymization = 0;
new_mapping = 0;
if ( Node* n = find_node(ntohl(a)) )
{
ipaddr32_t output = htonl(n->output);
return output;
}
else
return 0;
}
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node_block()
{
assert(! next_free_node);
int block_size = 1024;
Node* block = new Node[block_size];
if ( ! block )
internal_error("out of memory!");
blocks.push_back(block);
for ( int i = 1; i < block_size - 1; ++i )
block[i].child[0] = &block[i+1];
block[block_size - 1].child[0] = 0;
next_free_node = &block[1];
return &block[0];
}
inline AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node()
{
new_mapping = 1;
if ( next_free_node )
{
Node* n = next_free_node;
next_free_node = n->child[0];
return n;
}
else
return new_node_block();
}
inline void AnonymizeIPAddr_A50::free_node(Node *n)
{
n->child[0] = next_free_node;
next_free_node = n;
}
ipaddr32_t AnonymizeIPAddr_A50::make_output(ipaddr32_t old_output, int swivel) const
{
// -A50 anonymization
if ( swivel == 32 )
return old_output ^ 1;
else
{
// Bits up to swivel are unchanged; bit swivel is flipped.
ipaddr32_t known_part =
((old_output >> (32 - swivel)) ^ 1) << (32 - swivel);
// Remainder of bits are random.
return known_part | ((rand32() & 0x7FFFFFFF) >> swivel);
}
}
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::make_peer(ipaddr32_t a, Node* n)
{
if ( a == 0 || a == 0xFFFFFFFFU )
internal_error("0.0.0.0 and 255.255.255.255 should never get into the tree");
// Become a peer.
// Algorithm: create two nodes, the two peers. Leave orig node as
// the parent of the two new ones.
Node* down[2];
if ( ! (down[0] = new_node()) )
return 0;
if ( ! (down[1] = new_node()) )
{
free_node(down[0]);
return 0;
}
// swivel is first bit 'a' and 'old->input' differ.
int swivel = bi_ffs(a ^ n->input);
// bitvalue is the value of that bit of 'a'.
int bitvalue = (a >> (32 - swivel)) & 1;
down[bitvalue]->input = a;
down[bitvalue]->output = make_output(n->output, swivel);
down[bitvalue]->child[0] = down[bitvalue]->child[1] = 0;
*down[1 - bitvalue] = *n; // copy orig node down one level
n->input = down[1]->input; // NB: 1s to the right (0s to the left)
n->output = down[1]->output;
n->child[0] = down[0]; // point to children
n->child[1] = down[1];
return down[bitvalue];
}
AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::find_node(ipaddr32_t a)
{
// Watch out for special IP addresses, which never make it
// into the tree.
if ( a == 0 || a == 0xFFFFFFFFU )
return &special_nodes[a & 1];
if ( ! root )
{
root = new_node();
root->input = a;
root->output = rand32();
root->child[0] = root->child[1] = 0;
return root;
}
// Straight from tcpdpriv.
Node* n = root;
while ( n )
{
if ( n->input == a )
return n;
if ( ! n->child[0] )
n = make_peer(a, n);
else
{
// swivel is the first bit in which the two children
// differ.
int swivel =
bi_ffs(n->child[0]->input ^ n->child[1]->input);
if ( bi_ffs(a ^ n->input) < swivel )
// Input differs earlier.
n = make_peer(a, n);
else if ( a & (1 << (32 - swivel)) )
n = n->child[1];
else
n = n->child[0];
}
}
internal_error("out of memory!");
return 0;
}
void init_ip_addr_anonymizers()
{
ip_anonymizer[KEEP_ORIG_ADDR] = 0;
ip_anonymizer[SEQUENTIALLY_NUMBERED] = new AnonymizeIPAddr_Seq();
ip_anonymizer[RANDOM_MD5] = new AnonymizeIPAddr_RandomMD5();
ip_anonymizer[PREFIX_PRESERVING_A50] = new AnonymizeIPAddr_A50();
ip_anonymizer[PREFIX_PRESERVING_MD5] = new AnonymizeIPAddr_PrefixMD5();
}
ipaddr32_t anonymize_ip(ipaddr32_t ip, enum ip_addr_anonymization_class_t cl)
{
TableVal* preserve_addr = 0;
AddrVal addr(ip);
int method = -1;
switch ( cl ) {
case ORIG_ADDR: // client address
preserve_addr = preserve_orig_addr;
method = orig_addr_anonymization;
break;
case RESP_ADDR: // server address
preserve_addr = preserve_resp_addr;
method = resp_addr_anonymization;
break;
default:
preserve_addr = preserve_other_addr;
method = other_addr_anonymization;
break;
}
ipaddr32_t new_ip = 0;
if ( preserve_addr && preserve_addr->Lookup(&addr) )
new_ip = ip;
else if ( method >= 0 && method < NUM_ADDR_ANONYMIZATION_METHODS )
{
if ( method == KEEP_ORIG_ADDR )
new_ip = ip;
else if ( ! ip_anonymizer[method] )
internal_error("IP anonymizer not initialized");
else
new_ip = ip_anonymizer[method]->Anonymize(ip);
}
else
internal_error("invalid IP anonymization method");
#ifdef LOG_ANONYMIZATION_MAPPING
log_anonymization_mapping(ip, new_ip);
#endif
return new_ip;
}
#ifdef LOG_ANONYMIZATION_MAPPING
#include "NetVar.h"
#include "Event.h"
void log_anonymization_mapping(ipaddr32_t input, ipaddr32_t output)
{
if ( anonymization_mapping )
{
val_list* vl = new val_list;
vl->append(new AddrVal(input));
vl->append(new AddrVal(output));
mgr.QueueEvent(anonymization_mapping, vl);
}
}
#endif