Merge remote-tracking branch 'origin/topic/jsiwek/uid'

Thanks!

* origin/topic/jsiwek/uid:
  Add bits_per_uid unit test, addressing BIT0-1016.
  UID optimizations addressing BIT-1016.

BIT-1016 #merged
This commit is contained in:
Robin Sommer 2013-08-28 18:51:06 -07:00
commit 6373d817a5
14 changed files with 141 additions and 43 deletions

10
CHANGES
View file

@ -1,4 +1,14 @@
2.1-1144 | 2013-08-28 18:51:06 -0700
* Add bits_per_uid unit test. Addresses BIT-1016. (Jon Siwek)
* UID optimizations. Addresses BIT-1016. (Jon Siwek)
* Added a $unique_max field to Reducers for the SumStats::UNIQUE
calculation, and using the new option in scan.bro and the FTP
bruteforce detection. (Seth Hall)
2.1-1137 | 2013-08-27 13:26:44 -0700 2.1-1137 | 2013-08-27 13:26:44 -0700
* Add BiF hexstr_to_bytestring() that does exactly the opposite of * Add BiF hexstr_to_bytestring() that does exactly the opposite of

View file

@ -1 +1 @@
2.1-1137 2.1-1144

View file

@ -3082,6 +3082,7 @@ const global_hash_seed: string = "" &redef;
## Number of bits in UIDs that are generated to identify connections and ## Number of bits in UIDs that are generated to identify connections and
## files. The larger the value, the more confidence in UID uniqueness. ## files. The larger the value, the more confidence in UID uniqueness.
## The maximum is currently 128 bits.
const bits_per_uid: count = 96 &redef; const bits_per_uid: count = 96 &redef;
# Load BiFs defined by plugins. # Load BiFs defined by plugins.

View file

@ -378,7 +378,7 @@ RecordVal* Connection::BuildConnVal()
conn_val->Assign(8, new StringVal("")); // history conn_val->Assign(8, new StringVal("")); // history
if ( ! uid ) if ( ! uid )
uid = Bro::UID(bits_per_uid); uid.Set(bits_per_uid);
conn_val->Assign(9, new StringVal(uid.Base62("C").c_str())); conn_val->Assign(9, new StringVal(uid.Base62("C").c_str()));

View file

@ -11,7 +11,7 @@ EncapsulatingConn::EncapsulatingConn(Connection* c, BifEnum::Tunnel::Type t)
{ {
if ( ! uid ) if ( ! uid )
{ {
uid = Bro::UID(bits_per_uid); uid.Set(bits_per_uid);
c->SetUID(uid); c->SetUID(uid);
} }
} }

View file

@ -7,39 +7,30 @@
using namespace Bro; using namespace Bro;
using namespace std; using namespace std;
void UID::Set(bro_uint_t bits, const std::vector<uint64>& v) void UID::Set(bro_uint_t bits, const uint64* v, size_t n)
{ {
uid.clear(); initialized = true;
for ( size_t i = 0; i < BRO_UID_LEN; ++i )
uid[i] = 0;
if ( bits > BRO_UID_LEN * 64 )
bits = BRO_UID_LEN * 64;
div_t res = div(bits, 64); div_t res = div(bits, 64);
size_t size = res.rem ? res.quot + 1 : res.quot; size_t size = res.rem ? res.quot + 1 : res.quot;
for ( size_t i = 0; i < size; ++i ) for ( size_t i = 0; i < size; ++i )
uid.push_back(i < v.size() ? v[i] : calculate_unique_id()); uid[i] = v && i < n ? v[i] : calculate_unique_id();
if ( res.rem ) if ( res.rem )
uid[0] >>= 64 - res.rem; uid[0] >>= 64 - res.rem;
} }
string UID::Base62(const std::string& prefix) const
{
char tmp[64]; // technically, this should dynamically scale based on size
string rval(prefix);
for ( size_t i = 0; i < uid.size(); ++i )
rval.append(uitoa_n(uid[i], tmp, sizeof(tmp), 62));
return rval;
}
bool Bro::operator==(const UID& u1, const UID& u2) bool Bro::operator==(const UID& u1, const UID& u2)
{ {
if ( u1.uid.size() != u2.uid.size() ) for ( size_t i = 0; i < BRO_UID_LEN; ++i )
return false;
for ( size_t i = 0; i < u1.uid.size(); ++i )
if ( u1.uid[i] != u2.uid[i] ) if ( u1.uid[i] != u2.uid[i] )
return false; return false;
return true; return true;
} }

View file

@ -4,10 +4,12 @@
#define BRO_UID_H #define BRO_UID_H
#include <string> #include <string>
#include <vector>
#include "Reporter.h"
#include "util.h" #include "util.h"
#define BRO_UID_LEN 2
namespace Bro { namespace Bro {
/** /**
@ -18,53 +20,54 @@ class UID {
public: public:
/** /**
* Default ctor. The UID is uninitialized and in string format is * Default ctor. The UID is uninitialized.
* represented by an empty string.
*/ */
UID() {} UID() : initialized(false) {}
/** /**
* Construct a UID of a given bit-length, optionally from given values. * Construct a UID of a given bit-length, optionally from given values.
* @see UID::Set * @see UID::Set
*/ */
UID(bro_uint_t bits, const std::vector<uint64>& v = std::vector<uint64>()) UID(bro_uint_t bits, const uint64* v = 0, size_t n = 0)
{ Set(bits, v); } { Set(bits, v, n); }
/** /**
* Copy constructor. * Copy constructor.
*/ */
UID(const UID& other) { uid = other.uid; } UID(const UID& other);
/** /**
* Inititialize a UID of a given bit-length, optionally from given values. * Inititialize a UID of a given bit-length, optionally from given values.
* @param bits The desired length in bits of the UID. * @param bits The desired length in bits of the UID, up to a max of
* @param v A vector of values with which to initialize the UID. * BRO_UID_LEN * 64.
* If empty or doesn't contain enough values to satisfy \a bits, * @param v A pointer to an array of values with which to initialize the
* then values are automatically generated using * UID. If empty or doesn't contain enough values to satisfy
* \a bits, then values are automatically generated using
* calculate_unique_id(). If \a bits isn't evenly divisible by * calculate_unique_id(). If \a bits isn't evenly divisible by
* 64, then a value is truncated to bit in desired bit-length. * 64, then a value is truncated to bit in desired bit-length.
* @param n number of 64-bit elements in array pointed to by \a v.
*/ */
void Set(bro_uint_t bits, void Set(bro_uint_t bits, const uint64* v = 0, size_t n = 0);
const std::vector<uint64>& v = std::vector<uint64>());
/** /**
* Returns a base62 (characters 0-9, A-Z, a-z) representation of the UID. * Returns a base62 (characters 0-9, A-Z, a-z) representation of the UID.
* @param prefix An optional string prefix. * @param prefix An optional string prefix.
* @return a base62 string representing the UID. * @return a base62 string representing the UID.
*/ */
std::string Base62(const std::string& prefix = "") const; std::string Base62(std::string prefix = "") const;
/** /**
* @return false if the UID instance was created via the default ctor * @return false if the UID instance was created via the default ctor
* and not yet initialized w/ Set(). * and not yet initialized w/ Set().
* TODO: this would be better as an "explicit" conversion operator (C++11) * TODO: this would be better as an "explicit" conversion operator (C++11)
*/ */
operator bool() const { return ( ! uid.empty() ); } operator bool() const
{ return initialized; }
/** /**
* Assignment operator. * Assignment operator.
*/ */
UID& operator=(const UID& other) { uid = other.uid; return *this; } UID& operator=(const UID& other);
/** /**
* UID equality operator. * UID equality operator.
@ -78,11 +81,41 @@ public:
{ return ! ( u1 == u2 ); } { return ! ( u1 == u2 ); }
private: private:
std::vector<uint64> uid; uint64 uid[BRO_UID_LEN];
bool initialized; // Since technically uid == 0 is a legit UID
}; };
bool operator==(const UID& u1, const UID& u2); bool operator==(const UID& u1, const UID& u2);
inline UID::UID(const UID& other)
{
for ( size_t i = 0; i < BRO_UID_LEN; ++i )
uid[i] = other.uid[i];
initialized = other.initialized;
}
inline UID& UID::operator=(const UID& other)
{
for ( size_t i = 0; i < BRO_UID_LEN; ++i )
uid[i] = other.uid[i];
initialized = other.initialized;
return *this;
}
inline std::string UID::Base62(std::string prefix) const
{
if ( ! initialized )
reporter->InternalError("use of uninitialized UID");
char tmp[64]; // technically, this should dynamically scale w/ BRO_UID_LEN
for ( size_t i = 0; i < BRO_UID_LEN; ++i )
prefix.append(uitoa_n(uid[i], tmp, sizeof(tmp), 62));
return prefix;
}
} // namespace Bro } // namespace Bro
#endif #endif

View file

@ -64,10 +64,7 @@ string Manager::HashHandle(const string& handle) const
MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(), MD5(reinterpret_cast<const u_char*>(msg.data()), msg.size(),
reinterpret_cast<u_char*>(hash)); reinterpret_cast<u_char*>(hash));
vector<uint64> v; return Bro::UID(bits_per_uid, hash, 2).Base62("F");
v.push_back(hash[0]);
v.push_back(hash[1]);
return Bro::UID(bits_per_uid, v).Base62("F");
} }
void Manager::SetHandle(const string& handle) void Manager::SetHandle(const string& handle)

View file

@ -0,0 +1,9 @@
CUWkUyAuUGXfarKYeMETxOg
Ck6kgXLOoSKlnQcgTWjvg4c
Cj4u32Pc5bifTEfuqmmG4bh
Fj3nTWNjezo6G6xBmyo58Tf
F4VAnSiNGSQhKEoCPd4zuQd
CFrJExwHcSal5OKnoww6xl4
C3PKsZ2Uye21VW0XPVINV8a
FaJg8mtdsS86cWjSe4spPPl
FvBr89nD30GgGAp3wgtm6qf

View file

@ -0,0 +1,9 @@
CUWkUyAuUGXfarKYeMETxOg
Ck6kgXLOoSKlnQcgTWjvg4c
Cj4u32Pc5bifTEfuqmmG4bh
Fj3nTWNjezo6G6xBmyo58Tf
F4VAnSiNGSQhKEoCPd4zuQd
CFrJExwHcSal5OKnoww6xl4
C3PKsZ2Uye21VW0XPVINV8a
FaJg8mtdsS86cWjSe4spPPl
FvBr89nD30GgGAp3wgtm6qf

View file

@ -0,0 +1,9 @@
CXWv6p30
CCyvnA30
CjhGID40
F75yAm10
FmGk6O30
CdfHBz20
CCvvfg30
Fuh3fj10
Ftwuyy30

View file

@ -0,0 +1,9 @@
CUWkUyAuUGXf0
CarKYeMETxOg0
Ck6kgXLOoSKl0
Fj3nTWNjezo60
F4VAnSiNGSQh0
CnQcgTWjvg4c0
Cj4u32Pc5bif0
FaJg8mtdsS860
FvBr89nD30Gg0

View file

@ -0,0 +1,9 @@
CXWv6p3arKYeMETxOg
CjhGID4nQcgTWjvg4c
CCvvfg3TEfuqmmG4bh
F75yAm1G6xBmyo58Tf
FmGk6O3KEoCPd4zuQd
CsRx2w45OKnoww6xl4
CRJuHdVW0XPVINV8a
Fuh3fj1cWjSe4spPPl
Ftwuyy3GAp3wgtm6qf

View file

@ -0,0 +1,21 @@
# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=32 >32
# @TEST-EXEC: btest-diff 32
# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=64 >64
# @TEST-EXEC: btest-diff 64
# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=96 >96
# @TEST-EXEC: btest-diff 96
# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=128 >128
# @TEST-EXEC: btest-diff 128
# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=256 >256
# @TEST-EXEC: btest-diff 256
# @TEST-EXEC: cmp 128 256
event new_connection(c: connection)
{
print c$uid;
}
event file_new(f: fa_file)
{
print f$id;
}