#include "zeek/Anon.h" #include #include #include #include #include "zeek/util.h" #include "zeek/net_util.h" #include "zeek/Val.h" #include "zeek/NetVar.h" #include "zeek/Reporter.h" #include "zeek/Scope.h" #include "zeek/ID.h" #include "zeek/IPAddr.h" #include "zeek/Event.h" namespace zeek::detail { AnonymizeIPAddr* ip_anonymizer[NUM_ADDR_ANONYMIZATION_METHODS] = {nullptr}; static uint32_t rand32() { return ((util::detail::random_number() & 0xffff) << 16) | (util::detail::random_number() & 0xffff); } // From tcpdpriv. static int bi_ffs(uint32_t value) { int add = 0; static uint8_t bvals[] = { 0, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }; if ( (value & 0xFFFF0000) == 0 ) { if ( value == 0 ) // Zero input ==> zero output. return 0; add += 16; } else value >>= 16; if ( (value & 0xFF00) == 0 ) add += 8; else value >>= 8; if ( (value & 0xF0) == 0 ) add += 4; else value >>= 4; return add + bvals[value & 0xf]; } #define first_n_bit_mask(n) (~(0xFFFFFFFFU >> n)) ipaddr32_t AnonymizeIPAddr::Anonymize(ipaddr32_t addr) { std::map::iterator p = mapping.find(addr); if ( p != mapping.end() ) return p->second; else { ipaddr32_t new_addr = anonymize(addr); mapping[addr] = new_addr; return new_addr; } } // Keep the specified prefix unchanged. bool AnonymizeIPAddr::PreservePrefix(ipaddr32_t /* input */, int /* num_bits */) { reporter->InternalError("prefix preserving is not supported for the anonymizer"); return false; } bool AnonymizeIPAddr::PreserveNet(ipaddr32_t input) { switch ( addr_to_class(ntohl(input)) ) { case 'A': return PreservePrefix(input, 8); case 'B': return PreservePrefix(input, 16); case 'C': return PreservePrefix(input, 24); default: return false; } } ipaddr32_t AnonymizeIPAddr_Seq::anonymize(ipaddr32_t /* input */) { ++seq; return htonl(seq); } ipaddr32_t AnonymizeIPAddr_RandomMD5::anonymize(ipaddr32_t input) { uint8_t digest[16]; ipaddr32_t output = 0; util::detail::hmac_md5(sizeof(input), (u_char*)(&input), digest); for ( int i = 0; i < 4; ++i ) output = (output << 8) | digest[i]; return output; } // This code is from "On the Design and Performance of Prefix-Preserving // IP Traffic Trace Anonymization", by Xu et al (IMW 2001) // // http://www.imconf.net/imw-2001/proceedings.html ipaddr32_t AnonymizeIPAddr_PrefixMD5::anonymize(ipaddr32_t input) { uint8_t digest[16]; ipaddr32_t prefix_mask = 0xffffffff; input = ntohl(input); ipaddr32_t output = input; for ( int i = 0; i < 32; ++i ) { // PAD(x_0 ... x_{i-1}) = x_0 ... x_{i-1} 1 0 ... 0 . prefix.len = htonl(i + 1); prefix.prefix = htonl((input & ~(prefix_mask>>i)) | (1<<(31-i))); // HK(PAD(x_0 ... x_{i-1})). util::detail::hmac_md5(sizeof(prefix), (u_char*) &prefix, digest); // f_{i-1} = LSB(HK(PAD(x_0 ... x_{i-1}))). ipaddr32_t bit_mask = (digest[0] & 1) << (31-i); // x_i' = x_i ^ f_{i-1}. output ^= bit_mask; } return htonl(output); } AnonymizeIPAddr_A50::~AnonymizeIPAddr_A50() { for ( auto& b : blocks ) delete [] b; } void AnonymizeIPAddr_A50::init() { root = next_free_node = nullptr; // Prepare special nodes for 0.0.0.0 and 255.255.255.255. memset(&special_nodes[0], 0, sizeof(special_nodes)); special_nodes[0].input = special_nodes[0].output = 0; special_nodes[1].input = special_nodes[1].output = 0xFFFFFFFF; method = 0; before_anonymization = 1; new_mapping = 0; } bool AnonymizeIPAddr_A50::PreservePrefix(ipaddr32_t input, int num_bits) { DEBUG_MSG("%s/%d\n", IPAddr(IPv4, &input, IPAddr::Network).AsString().c_str(), num_bits); if ( ! before_anonymization ) { reporter->Error("prefix perservation specified after anonymization begun"); return false; } input = ntohl(input); // Sanitize input. input = input & first_n_bit_mask(num_bits); Node* n = find_node(input); // Preserve the first num_bits bits of addr. if ( num_bits == 32 ) n->output = input; else if ( num_bits > 0 ) { assert((0xFFFFFFFFU >> 1) == 0x7FFFFFFFU); uint32_t suffix_mask = (0xFFFFFFFFU >> num_bits); uint32_t prefix_mask = ~suffix_mask; n->output = (input & prefix_mask) | (rand32() & suffix_mask); } return true; } ipaddr32_t AnonymizeIPAddr_A50::anonymize(ipaddr32_t a) { before_anonymization = 0; new_mapping = 0; if ( Node* n = find_node(ntohl(a)) ) { ipaddr32_t output = htonl(n->output); return output; } else return 0; } AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node_block() { assert(! next_free_node); int block_size = 1024; Node* block = new Node[block_size]; if ( ! block ) reporter->InternalError("out of memory!"); blocks.push_back(block); for ( int i = 1; i < block_size - 1; ++i ) block[i].child[0] = &block[i+1]; block[block_size - 1].child[0] = nullptr; next_free_node = &block[1]; return &block[0]; } inline AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::new_node() { new_mapping = 1; if ( next_free_node ) { Node* n = next_free_node; next_free_node = n->child[0]; return n; } else return new_node_block(); } inline void AnonymizeIPAddr_A50::free_node(Node *n) { n->child[0] = next_free_node; next_free_node = n; } ipaddr32_t AnonymizeIPAddr_A50::make_output(ipaddr32_t old_output, int swivel) const { // -A50 anonymization if ( swivel == 32 ) return old_output ^ 1; else { // Bits up to swivel are unchanged; bit swivel is flipped. ipaddr32_t known_part = ((old_output >> (32 - swivel)) ^ 1) << (32 - swivel); // Remainder of bits are random. return known_part | ((rand32() & 0x7FFFFFFF) >> swivel); } } AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::make_peer(ipaddr32_t a, Node* n) { if ( a == 0 || a == 0xFFFFFFFFU ) reporter->InternalError("0.0.0.0 and 255.255.255.255 should never get into the tree"); // Become a peer. // Algorithm: create two nodes, the two peers. Leave orig node as // the parent of the two new ones. Node* down[2]; if ( ! (down[0] = new_node()) ) return nullptr; if ( ! (down[1] = new_node()) ) { free_node(down[0]); return nullptr; } // swivel is first bit 'a' and 'old->input' differ. int swivel = bi_ffs(a ^ n->input); // bitvalue is the value of that bit of 'a'. int bitvalue = (a >> (32 - swivel)) & 1; down[bitvalue]->input = a; down[bitvalue]->output = make_output(n->output, swivel); down[bitvalue]->child[0] = down[bitvalue]->child[1] = nullptr; *down[1 - bitvalue] = *n; // copy orig node down one level n->input = down[1]->input; // NB: 1s to the right (0s to the left) n->output = down[1]->output; n->child[0] = down[0]; // point to children n->child[1] = down[1]; return down[bitvalue]; } AnonymizeIPAddr_A50::Node* AnonymizeIPAddr_A50::find_node(ipaddr32_t a) { // Watch out for special IP addresses, which never make it // into the tree. if ( a == 0 || a == 0xFFFFFFFFU ) return &special_nodes[a & 1]; if ( ! root ) { root = new_node(); root->input = a; root->output = rand32(); root->child[0] = root->child[1] = nullptr; return root; } // Straight from tcpdpriv. Node* n = root; while ( n ) { if ( n->input == a ) return n; if ( ! n->child[0] ) n = make_peer(a, n); else { // swivel is the first bit in which the two children // differ. int swivel = bi_ffs(n->child[0]->input ^ n->child[1]->input); if ( bi_ffs(a ^ n->input) < swivel ) // Input differs earlier. n = make_peer(a, n); else if ( a & (1 << (32 - swivel)) ) n = n->child[1]; else n = n->child[0]; } } reporter->InternalError("out of memory!"); return nullptr; } static TableValPtr anon_preserve_orig_addr; static TableValPtr anon_preserve_resp_addr; static TableValPtr anon_preserve_other_addr; void init_ip_addr_anonymizers() { ip_anonymizer[KEEP_ORIG_ADDR] = nullptr; ip_anonymizer[SEQUENTIALLY_NUMBERED] = new AnonymizeIPAddr_Seq(); ip_anonymizer[RANDOM_MD5] = new AnonymizeIPAddr_RandomMD5(); ip_anonymizer[PREFIX_PRESERVING_A50] = new AnonymizeIPAddr_A50(); ip_anonymizer[PREFIX_PRESERVING_MD5] = new AnonymizeIPAddr_PrefixMD5(); auto id = global_scope()->Find("preserve_orig_addr"); if ( id ) anon_preserve_orig_addr = cast_intrusive(id->GetVal()); id = global_scope()->Find("preserve_resp_addr"); if ( id ) anon_preserve_resp_addr = cast_intrusive(id->GetVal()); id = global_scope()->Find("preserve_other_addr"); if ( id ) anon_preserve_other_addr = cast_intrusive(id->GetVal()); } ipaddr32_t anonymize_ip(ipaddr32_t ip, enum ip_addr_anonymization_class_t cl) { TableVal* preserve_addr = nullptr; auto addr = make_intrusive(ip); int method = -1; switch ( cl ) { case ORIG_ADDR: // client address preserve_addr = anon_preserve_orig_addr.get(); method = orig_addr_anonymization; break; case RESP_ADDR: // server address preserve_addr = anon_preserve_resp_addr.get(); method = resp_addr_anonymization; break; default: preserve_addr = anon_preserve_other_addr.get(); method = other_addr_anonymization; break; } ipaddr32_t new_ip = 0; if ( preserve_addr && preserve_addr->FindOrDefault(addr) ) new_ip = ip; else if ( method >= 0 && method < NUM_ADDR_ANONYMIZATION_METHODS ) { if ( method == KEEP_ORIG_ADDR ) new_ip = ip; else if ( ! ip_anonymizer[method] ) reporter->InternalError("IP anonymizer not initialized"); else new_ip = ip_anonymizer[method]->Anonymize(ip); } else reporter->InternalError("invalid IP anonymization method"); #ifdef LOG_ANONYMIZATION_MAPPING log_anonymization_mapping(ip, new_ip); #endif return new_ip; } #ifdef LOG_ANONYMIZATION_MAPPING void log_anonymization_mapping(ipaddr32_t input, ipaddr32_t output) { if ( anonymization_mapping ) event_mgr.Enqueue(anonymization_mapping, make_intrusive(input), make_intrusive(output) ); } #endif } // namespace zeek::detail