diff --git a/CHANGES b/CHANGES index 74da6ab39e..ca1b919f38 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,14 @@ +2.1-1144 | 2013-08-28 18:51:06 -0700 + + * Add bits_per_uid unit test. Addresses BIT-1016. (Jon Siwek) + + * UID optimizations. Addresses BIT-1016. (Jon Siwek) + + * Added a $unique_max field to Reducers for the SumStats::UNIQUE + calculation, and using the new option in scan.bro and the FTP + bruteforce detection. (Seth Hall) + 2.1-1137 | 2013-08-27 13:26:44 -0700 * Add BiF hexstr_to_bytestring() that does exactly the opposite of diff --git a/VERSION b/VERSION index 1b6b465552..9e9c4dd863 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1-1137 +2.1-1144 diff --git a/scripts/base/init-bare.bro b/scripts/base/init-bare.bro index d22aa067d8..59f13bf2eb 100644 --- a/scripts/base/init-bare.bro +++ b/scripts/base/init-bare.bro @@ -3082,6 +3082,7 @@ const global_hash_seed: string = "" &redef; ## Number of bits in UIDs that are generated to identify connections and ## files. The larger the value, the more confidence in UID uniqueness. +## The maximum is currently 128 bits. const bits_per_uid: count = 96 &redef; # Load BiFs defined by plugins. diff --git a/src/Conn.cc b/src/Conn.cc index e653211583..77c854c126 100644 --- a/src/Conn.cc +++ b/src/Conn.cc @@ -378,7 +378,7 @@ RecordVal* Connection::BuildConnVal() conn_val->Assign(8, new StringVal("")); // history if ( ! uid ) - uid = Bro::UID(bits_per_uid); + uid.Set(bits_per_uid); conn_val->Assign(9, new StringVal(uid.Base62("C").c_str())); diff --git a/src/TunnelEncapsulation.cc b/src/TunnelEncapsulation.cc index 6330236dc2..cb4b1eaabe 100644 --- a/src/TunnelEncapsulation.cc +++ b/src/TunnelEncapsulation.cc @@ -11,7 +11,7 @@ EncapsulatingConn::EncapsulatingConn(Connection* c, BifEnum::Tunnel::Type t) { if ( ! uid ) { - uid = Bro::UID(bits_per_uid); + uid.Set(bits_per_uid); c->SetUID(uid); } } diff --git a/src/UID.cc b/src/UID.cc index b7e5d0617c..28675c0376 100644 --- a/src/UID.cc +++ b/src/UID.cc @@ -7,39 +7,30 @@ using namespace Bro; using namespace std; -void UID::Set(bro_uint_t bits, const std::vector& v) +void UID::Set(bro_uint_t bits, const uint64* v, size_t n) { - uid.clear(); + initialized = true; + + for ( size_t i = 0; i < BRO_UID_LEN; ++i ) + uid[i] = 0; + + if ( bits > BRO_UID_LEN * 64 ) + bits = BRO_UID_LEN * 64; div_t res = div(bits, 64); size_t size = res.rem ? res.quot + 1 : res.quot; for ( size_t i = 0; i < size; ++i ) - uid.push_back(i < v.size() ? v[i] : calculate_unique_id()); + uid[i] = v && i < n ? v[i] : calculate_unique_id(); if ( res.rem ) uid[0] >>= 64 - res.rem; } -string UID::Base62(const std::string& prefix) const - { - char tmp[64]; // technically, this should dynamically scale based on size - string rval(prefix); - - for ( size_t i = 0; i < uid.size(); ++i ) - rval.append(uitoa_n(uid[i], tmp, sizeof(tmp), 62)); - - return rval; - } - bool Bro::operator==(const UID& u1, const UID& u2) { - if ( u1.uid.size() != u2.uid.size() ) - return false; - - for ( size_t i = 0; i < u1.uid.size(); ++i ) + for ( size_t i = 0; i < BRO_UID_LEN; ++i ) if ( u1.uid[i] != u2.uid[i] ) return false; - return true; } diff --git a/src/UID.h b/src/UID.h index a9a77dcc90..9ccf0ae0e8 100644 --- a/src/UID.h +++ b/src/UID.h @@ -4,10 +4,12 @@ #define BRO_UID_H #include -#include +#include "Reporter.h" #include "util.h" +#define BRO_UID_LEN 2 + namespace Bro { /** @@ -18,53 +20,54 @@ class UID { public: /** - * Default ctor. The UID is uninitialized and in string format is - * represented by an empty string. + * Default ctor. The UID is uninitialized. */ - UID() {} + UID() : initialized(false) {} /** * Construct a UID of a given bit-length, optionally from given values. * @see UID::Set */ - UID(bro_uint_t bits, const std::vector& v = std::vector()) - { Set(bits, v); } + UID(bro_uint_t bits, const uint64* v = 0, size_t n = 0) + { Set(bits, v, n); } /** * Copy constructor. */ - UID(const UID& other) { uid = other.uid; } + UID(const UID& other); /** * Inititialize a UID of a given bit-length, optionally from given values. - * @param bits The desired length in bits of the UID. - * @param v A vector of values with which to initialize the UID. - * If empty or doesn't contain enough values to satisfy \a bits, - * then values are automatically generated using + * @param bits The desired length in bits of the UID, up to a max of + * BRO_UID_LEN * 64. + * @param v A pointer to an array of values with which to initialize the + * UID. If empty or doesn't contain enough values to satisfy + * \a bits, then values are automatically generated using * calculate_unique_id(). If \a bits isn't evenly divisible by * 64, then a value is truncated to bit in desired bit-length. + * @param n number of 64-bit elements in array pointed to by \a v. */ - void Set(bro_uint_t bits, - const std::vector& v = std::vector()); + void Set(bro_uint_t bits, const uint64* v = 0, size_t n = 0); /** * Returns a base62 (characters 0-9, A-Z, a-z) representation of the UID. * @param prefix An optional string prefix. * @return a base62 string representing the UID. */ - std::string Base62(const std::string& prefix = "") const; + std::string Base62(std::string prefix = "") const; /** * @return false if the UID instance was created via the default ctor * and not yet initialized w/ Set(). * TODO: this would be better as an "explicit" conversion operator (C++11) */ - operator bool() const { return ( ! uid.empty() ); } + operator bool() const + { return initialized; } /** * Assignment operator. */ - UID& operator=(const UID& other) { uid = other.uid; return *this; } + UID& operator=(const UID& other); /** * UID equality operator. @@ -78,11 +81,41 @@ public: { return ! ( u1 == u2 ); } private: - std::vector uid; + uint64 uid[BRO_UID_LEN]; + bool initialized; // Since technically uid == 0 is a legit UID }; bool operator==(const UID& u1, const UID& u2); +inline UID::UID(const UID& other) + { + for ( size_t i = 0; i < BRO_UID_LEN; ++i ) + uid[i] = other.uid[i]; + + initialized = other.initialized; + } + +inline UID& UID::operator=(const UID& other) + { + for ( size_t i = 0; i < BRO_UID_LEN; ++i ) + uid[i] = other.uid[i]; + + initialized = other.initialized; + return *this; + } + +inline std::string UID::Base62(std::string prefix) const + { + if ( ! initialized ) + reporter->InternalError("use of uninitialized UID"); + + char tmp[64]; // technically, this should dynamically scale w/ BRO_UID_LEN + for ( size_t i = 0; i < BRO_UID_LEN; ++i ) + prefix.append(uitoa_n(uid[i], tmp, sizeof(tmp), 62)); + + return prefix; + } + } // namespace Bro #endif diff --git a/src/file_analysis/Manager.cc b/src/file_analysis/Manager.cc index 77441f8264..8dfb220381 100644 --- a/src/file_analysis/Manager.cc +++ b/src/file_analysis/Manager.cc @@ -64,10 +64,7 @@ string Manager::HashHandle(const string& handle) const MD5(reinterpret_cast(msg.data()), msg.size(), reinterpret_cast(hash)); - vector v; - v.push_back(hash[0]); - v.push_back(hash[1]); - return Bro::UID(bits_per_uid, v).Base62("F"); + return Bro::UID(bits_per_uid, hash, 2).Base62("F"); } void Manager::SetHandle(const string& handle) diff --git a/testing/btest/Baseline/core.bits_per_uid/128 b/testing/btest/Baseline/core.bits_per_uid/128 new file mode 100644 index 0000000000..95ef343262 --- /dev/null +++ b/testing/btest/Baseline/core.bits_per_uid/128 @@ -0,0 +1,9 @@ +CUWkUyAuUGXfarKYeMETxOg +Ck6kgXLOoSKlnQcgTWjvg4c +Cj4u32Pc5bifTEfuqmmG4bh +Fj3nTWNjezo6G6xBmyo58Tf +F4VAnSiNGSQhKEoCPd4zuQd +CFrJExwHcSal5OKnoww6xl4 +C3PKsZ2Uye21VW0XPVINV8a +FaJg8mtdsS86cWjSe4spPPl +FvBr89nD30GgGAp3wgtm6qf diff --git a/testing/btest/Baseline/core.bits_per_uid/256 b/testing/btest/Baseline/core.bits_per_uid/256 new file mode 100644 index 0000000000..95ef343262 --- /dev/null +++ b/testing/btest/Baseline/core.bits_per_uid/256 @@ -0,0 +1,9 @@ +CUWkUyAuUGXfarKYeMETxOg +Ck6kgXLOoSKlnQcgTWjvg4c +Cj4u32Pc5bifTEfuqmmG4bh +Fj3nTWNjezo6G6xBmyo58Tf +F4VAnSiNGSQhKEoCPd4zuQd +CFrJExwHcSal5OKnoww6xl4 +C3PKsZ2Uye21VW0XPVINV8a +FaJg8mtdsS86cWjSe4spPPl +FvBr89nD30GgGAp3wgtm6qf diff --git a/testing/btest/Baseline/core.bits_per_uid/32 b/testing/btest/Baseline/core.bits_per_uid/32 new file mode 100644 index 0000000000..a20d05dbd5 --- /dev/null +++ b/testing/btest/Baseline/core.bits_per_uid/32 @@ -0,0 +1,9 @@ +CXWv6p30 +CCyvnA30 +CjhGID40 +F75yAm10 +FmGk6O30 +CdfHBz20 +CCvvfg30 +Fuh3fj10 +Ftwuyy30 diff --git a/testing/btest/Baseline/core.bits_per_uid/64 b/testing/btest/Baseline/core.bits_per_uid/64 new file mode 100644 index 0000000000..b34eb4879d --- /dev/null +++ b/testing/btest/Baseline/core.bits_per_uid/64 @@ -0,0 +1,9 @@ +CUWkUyAuUGXf0 +CarKYeMETxOg0 +Ck6kgXLOoSKl0 +Fj3nTWNjezo60 +F4VAnSiNGSQh0 +CnQcgTWjvg4c0 +Cj4u32Pc5bif0 +FaJg8mtdsS860 +FvBr89nD30Gg0 diff --git a/testing/btest/Baseline/core.bits_per_uid/96 b/testing/btest/Baseline/core.bits_per_uid/96 new file mode 100644 index 0000000000..3ba0f50e04 --- /dev/null +++ b/testing/btest/Baseline/core.bits_per_uid/96 @@ -0,0 +1,9 @@ +CXWv6p3arKYeMETxOg +CjhGID4nQcgTWjvg4c +CCvvfg3TEfuqmmG4bh +F75yAm1G6xBmyo58Tf +FmGk6O3KEoCPd4zuQd +CsRx2w45OKnoww6xl4 +CRJuHdVW0XPVINV8a +Fuh3fj1cWjSe4spPPl +Ftwuyy3GAp3wgtm6qf diff --git a/testing/btest/core/bits_per_uid.bro b/testing/btest/core/bits_per_uid.bro new file mode 100644 index 0000000000..6e997907de --- /dev/null +++ b/testing/btest/core/bits_per_uid.bro @@ -0,0 +1,21 @@ +# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=32 >32 +# @TEST-EXEC: btest-diff 32 +# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=64 >64 +# @TEST-EXEC: btest-diff 64 +# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=96 >96 +# @TEST-EXEC: btest-diff 96 +# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=128 >128 +# @TEST-EXEC: btest-diff 128 +# @TEST-EXEC: bro -r $TRACES/ftp/ipv4.trace %INPUT bits_per_uid=256 >256 +# @TEST-EXEC: btest-diff 256 +# @TEST-EXEC: cmp 128 256 + +event new_connection(c: connection) + { + print c$uid; + } + +event file_new(f: fa_file) + { + print f$id; + }