From dbca5be43c36b7dcd22a3c9c992bd5bfb6e2bddd Mon Sep 17 00:00:00 2001 From: Robin Sommer Date: Thu, 6 Jan 2011 17:16:10 -0800 Subject: [PATCH] Applying Seth's patch from #265 adding entropy BiFs. --- policy/bro.init | 8 ++ src/NetVar.cc | 4 + src/NetVar.h | 2 + src/RandTest.cc | 256 ++++++++++++++++++++++++++++++++++++++++++++++++ src/RandTest.h | 68 +++++++++++++ src/bro.bif | 89 ++++++++++++++++- 6 files changed, 423 insertions(+), 4 deletions(-) create mode 100644 src/RandTest.cc create mode 100644 src/RandTest.h diff --git a/policy/bro.init b/policy/bro.init index 1ba8f59b4d..f9742798c4 100644 --- a/policy/bro.init +++ b/policy/bro.init @@ -264,6 +264,14 @@ type geo_location: record { longitude: double; }; +type entropy_test_result: record { + entropy: double; + chi_square: double; + mean: double; + monte_carlo_pi: double; + serial_correlation: double; +}; + # Prototypes of Bro built-in functions. @load strings.bif.bro @load bro.bif.bro diff --git a/src/NetVar.cc b/src/NetVar.cc index 2c817fdc17..0af742ef3e 100644 --- a/src/NetVar.cc +++ b/src/NetVar.cc @@ -126,6 +126,8 @@ TableType* smb_negotiate; RecordType* geo_location; +RecordType* entropy_test_result; + TableType* dhcp_router_list; RecordType* dhcp_msg; @@ -460,6 +462,8 @@ void init_net_var() geo_location = internal_type("geo_location")->AsRecordType(); + entropy_test_result = internal_type("entropy_test_result")->AsRecordType(); + dhcp_router_list = internal_type("dhcp_router_list")->AsTableType(); dhcp_msg = internal_type("dhcp_msg")->AsRecordType(); diff --git a/src/NetVar.h b/src/NetVar.h index 904bccdb77..7461ec8be0 100644 --- a/src/NetVar.h +++ b/src/NetVar.h @@ -133,6 +133,8 @@ extern TableType* smb_negotiate; extern RecordType* geo_location; +extern RecordType* entropy_test_result; + extern TableType* dhcp_router_list; extern RecordType* dhcp_msg; diff --git a/src/RandTest.cc b/src/RandTest.cc new file mode 100644 index 0000000000..f779a23f94 --- /dev/null +++ b/src/RandTest.cc @@ -0,0 +1,256 @@ +/* + + Apply various randomness tests to a stream of bytes + + by John Walker -- September 1996 + http://www.fourmilab.ch/ + + Modified for Bro by Seth Hall - July 2010 +*/ + +#include + +RandTest::RandTest() + { + totalc = 0; + mp = 0; + sccfirst = 1; + inmont = mcount = 0; + cexp = montex = montey = montepi = sccu0 = scclast = scct1 = scct2 = scct3 = 0.0; + + for (int i = 0; i < 256; i++) + { + ccount[i] = 0; + } + } + +void RandTest::add(void *buf, int bufl) + { + unsigned char *bp = (unsigned char*)buf; + int oc; + + while (bufl-- > 0) + { + oc = *bp++; + ccount[oc]++; /* Update counter for this bin */ + totalc++; + + /* Update inside / outside circle counts for Monte Carlo + computation of PI */ + monte[mp++] = oc; /* Save character for Monte Carlo */ + if (mp >= RT_MONTEN) /* Calculate every RT_MONTEN character */ + { + mp = 0; + mcount++; + montex = 0; + montey = 0; + for (int mj=0; mj < RT_MONTEN/2; mj++) + { + montex = (montex * 256.0) + monte[mj]; + montey = (montey * 256.0) + monte[(RT_MONTEN / 2) + mj]; + } + if (montex*montex + montey*montey <= RT_INCIRC) + { + inmont++; + } + } + + /* Update calculation of serial correlation coefficient */ + if (sccfirst) + { + sccfirst = 0; + scclast = 0; + sccu0 = oc; + } + else + { + scct1 = scct1 + scclast * oc; + } + + scct2 = scct2 + oc; + scct3 = scct3 + (oc * oc); + scclast = oc; + oc <<= 1; + } + } + +void RandTest::end(double *r_ent, double *r_chisq, + double *r_mean, double *r_montepicalc, double *r_scc) + { + int i; + double ent, chisq, scc, datasum; + ent = 0.0; chisq = 0.0; scc = 0.0; datasum = 0.0; + double prob[256]; /* Probabilities per bin for entropy */ + + /* Complete calculation of serial correlation coefficient */ + scct1 = scct1 + scclast * sccu0; + scct2 = scct2 * scct2; + scc = totalc * scct3 - scct2; + if (scc == 0.0) + scc = -100000; + else + scc = (totalc * scct1 - scct2) / scc; + + /* Scan bins and calculate probability for each bin and + Chi-Square distribution. The probability will be reused + in the entropy calculation below. While we're at it, + we sum of all the data which will be used to compute the + mean. */ + cexp = totalc / 256.0; /* Expected count per bin */ + for (i = 0; i < 256; i++) + { + double a = ccount[i] - cexp; + + prob[i] = ((double) ccount[i]) / totalc; + chisq += (a * a) / cexp; + datasum += ((double) i) * ccount[i]; + } + + /* Calculate entropy */ + for (i = 0; i < 256; i++) + { + if (prob[i] > 0.0) + { + ent += prob[i] * rt_log2(1 / prob[i]); + } + } + + /* Calculate Monte Carlo value for PI from percentage of hits + within the circle */ + montepi = 4.0 * (((double) inmont) / mcount); + + /* Return results through arguments */ + *r_ent = ent; + *r_chisq = chisq; + *r_mean = datasum / totalc; + *r_montepicalc = montepi; + *r_scc = scc; + } +/* + + Apply various randomness tests to a stream of bytes + + by John Walker -- September 1996 + http://www.fourmilab.ch/ + + Modified for Bro by Seth Hall - July 2010 +*/ + +#include + +RandTest::RandTest() + { + totalc = 0; + mp = 0; + sccfirst = 1; + inmont = mcount = 0; + cexp = montex = montey = montepi = sccu0 = scclast = scct1 = scct2 = scct3 = 0.0; + + for (int i = 0; i < 256; i++) + { + ccount[i] = 0; + } + } + +void RandTest::add(void *buf, int bufl) + { + unsigned char *bp = (unsigned char*)buf; + int oc; + + while (bufl-- > 0) + { + oc = *bp++; + ccount[oc]++; /* Update counter for this bin */ + totalc++; + + /* Update inside / outside circle counts for Monte Carlo + computation of PI */ + monte[mp++] = oc; /* Save character for Monte Carlo */ + if (mp >= RT_MONTEN) /* Calculate every RT_MONTEN character */ + { + mp = 0; + mcount++; + montex = 0; + montey = 0; + for (int mj=0; mj < RT_MONTEN/2; mj++) + { + montex = (montex * 256.0) + monte[mj]; + montey = (montey * 256.0) + monte[(RT_MONTEN / 2) + mj]; + } + if (montex*montex + montey*montey <= RT_INCIRC) + { + inmont++; + } + } + + /* Update calculation of serial correlation coefficient */ + if (sccfirst) + { + sccfirst = 0; + scclast = 0; + sccu0 = oc; + } + else + { + scct1 = scct1 + scclast * oc; + } + + scct2 = scct2 + oc; + scct3 = scct3 + (oc * oc); + scclast = oc; + oc <<= 1; + } + } + +void RandTest::end(double *r_ent, double *r_chisq, + double *r_mean, double *r_montepicalc, double *r_scc) + { + int i; + double ent, chisq, scc, datasum; + ent = 0.0; chisq = 0.0; scc = 0.0; datasum = 0.0; + double prob[256]; /* Probabilities per bin for entropy */ + + /* Complete calculation of serial correlation coefficient */ + scct1 = scct1 + scclast * sccu0; + scct2 = scct2 * scct2; + scc = totalc * scct3 - scct2; + if (scc == 0.0) + scc = -100000; + else + scc = (totalc * scct1 - scct2) / scc; + + /* Scan bins and calculate probability for each bin and + Chi-Square distribution. The probability will be reused + in the entropy calculation below. While we're at it, + we sum of all the data which will be used to compute the + mean. */ + cexp = totalc / 256.0; /* Expected count per bin */ + for (i = 0; i < 256; i++) + { + double a = ccount[i] - cexp; + + prob[i] = ((double) ccount[i]) / totalc; + chisq += (a * a) / cexp; + datasum += ((double) i) * ccount[i]; + } + + /* Calculate entropy */ + for (i = 0; i < 256; i++) + { + if (prob[i] > 0.0) + { + ent += prob[i] * rt_log2(1 / prob[i]); + } + } + + /* Calculate Monte Carlo value for PI from percentage of hits + within the circle */ + montepi = 4.0 * (((double) inmont) / mcount); + + /* Return results through arguments */ + *r_ent = ent; + *r_chisq = chisq; + *r_mean = datasum / totalc; + *r_montepicalc = montepi; + *r_scc = scc; + } diff --git a/src/RandTest.h b/src/RandTest.h new file mode 100644 index 0000000000..bed4d93f00 --- /dev/null +++ b/src/RandTest.h @@ -0,0 +1,68 @@ +#include + +#define log2of10 3.32192809488736234787 +/* RT_LOG2 -- Calculate log to the base 2 */ +static double rt_log2(double x) +{ + return log2of10 * log10(x); +} + +#define RT_MONTEN 6 /* Bytes used as Monte Carlo + co-ordinates. This should be no more + bits than the mantissa of your "double" + floating point type. */ + +// RT_INCIRC = pow(pow(256.0, (double) (RT_MONTEN / 2)) - 1, 2.0); +#define RT_INCIRC 281474943156225.0 + +class RandTest { + public: + RandTest(); + void add(void *buf, int bufl); + void end(double *r_ent, double *r_chisq, double *r_mean, + double *r_montepicalc, double *r_scc); + + private: + long ccount[256]; /* Bins to count occurrences of values */ + long totalc; /* Total bytes counted */ + int mp; + int sccfirst; + unsigned int monte[RT_MONTEN]; + long inmont, mcount; + double cexp, montex, montey, montepi, + sccu0, scclast, scct1, scct2, scct3; + }; +#include + +#define log2of10 3.32192809488736234787 +/* RT_LOG2 -- Calculate log to the base 2 */ +static double rt_log2(double x) +{ + return log2of10 * log10(x); +} + +#define RT_MONTEN 6 /* Bytes used as Monte Carlo + co-ordinates. This should be no more + bits than the mantissa of your "double" + floating point type. */ + +// RT_INCIRC = pow(pow(256.0, (double) (RT_MONTEN / 2)) - 1, 2.0); +#define RT_INCIRC 281474943156225.0 + +class RandTest { + public: + RandTest(); + void add(void *buf, int bufl); + void end(double *r_ent, double *r_chisq, double *r_mean, + double *r_montepicalc, double *r_scc); + + private: + long ccount[256]; /* Bins to count occurrences of values */ + long totalc; /* Total bytes counted */ + int mp; + int sccfirst; + unsigned int monte[RT_MONTEN]; + long inmont, mcount; + double cexp, montex, montey, montepi, + sccu0, scclast, scct1, scct2, scct3; + }; \ No newline at end of file diff --git a/src/bro.bif b/src/bro.bif index 0de77bfc49..a9d76ba462 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -1725,7 +1725,7 @@ function md5_hmac%(...%): string %%{ static map md5_states; -BroString* convert_md5_index_to_string(Val* index) +BroString* convert_index_to_string(Val* index) { ODesc d; index->Describe(&d); @@ -1735,7 +1735,7 @@ BroString* convert_md5_index_to_string(Val* index) function md5_hash_init%(index: any%): bool %{ - BroString* s = convert_md5_index_to_string(index); + BroString* s = convert_index_to_string(index); int status = 0; if ( md5_states.count(*s) < 1 ) @@ -1752,7 +1752,7 @@ function md5_hash_init%(index: any%): bool function md5_hash_update%(index: any, data: string%): bool %{ - BroString* s = convert_md5_index_to_string(index); + BroString* s = convert_index_to_string(index); int status = 0; if ( md5_states.count(*s) > 0 ) @@ -1767,7 +1767,7 @@ function md5_hash_update%(index: any, data: string%): bool function md5_hash_finish%(index: any%): string %{ - BroString* s = convert_md5_index_to_string(index); + BroString* s = convert_index_to_string(index); StringVal* printable_digest; if ( md5_states.count(*s) > 0 ) @@ -3196,3 +3196,84 @@ function disable_event_group%(group: string%) : any event_registry->EnableGroup(group->CheckString(), false); return 0; %} + + +%%{ +#include +static map entropy_states; +%%} + +function find_entropy%(data: string%): entropy_test_result + %{ + double montepi, scc, ent, mean, chisq; + montepi = scc = ent = mean = chisq = 0.0; + RecordVal* ent_result = new RecordVal(entropy_test_result); + RandTest *rt = new RandTest(); + + rt->add((char*) data->Bytes(), data->Len()); + rt->end(&ent, &chisq, &mean, &montepi, &scc); + delete rt; + + ent_result->Assign(0, new Val(ent, TYPE_DOUBLE)); + ent_result->Assign(1, new Val(chisq, TYPE_DOUBLE)); + ent_result->Assign(2, new Val(mean, TYPE_DOUBLE)); + ent_result->Assign(3, new Val(montepi, TYPE_DOUBLE)); + ent_result->Assign(4, new Val(scc, TYPE_DOUBLE)); + return ent_result; + %} + +function entropy_test_init%(index: any%): bool + %{ + BroString* s = convert_index_to_string(index); + int status = 0; + + if ( entropy_states.count(*s) < 1 ) + { + entropy_states[*s] = new RandTest(); + status = 1; + } + + delete s; + return new Val(status, TYPE_BOOL); + %} + +function entropy_test_add%(index: any, data: string%): bool + %{ + BroString* s = convert_index_to_string(index); + int status = 0; + + if ( entropy_states.count(*s) > 0 ) + { + entropy_states[*s]->add((char*) data->Bytes(), data->Len()); + status = 1; + } + + delete s; + return new Val(status, TYPE_BOOL); + %} + +function entropy_test_finish%(index: any%): entropy_test_result + %{ + BroString* s = convert_index_to_string(index); + double montepi, scc, ent, mean, chisq; + montepi = scc = ent = mean = chisq = 0.0; + RecordVal* ent_result = new RecordVal(entropy_test_result); + + if ( entropy_states.count(*s) > 0 ) + { + RandTest *rt; + rt = entropy_states[*s]; + rt->end(&ent, &chisq, &mean, &montepi, &scc); + entropy_states.erase(*s); + delete rt; + } + + ent_result->Assign(0, new Val(ent, TYPE_DOUBLE)); + ent_result->Assign(1, new Val(chisq, TYPE_DOUBLE)); + ent_result->Assign(2, new Val(mean, TYPE_DOUBLE)); + ent_result->Assign(3, new Val(montepi, TYPE_DOUBLE)); + ent_result->Assign(4, new Val(scc, TYPE_DOUBLE)); + + delete s; + return ent_result; + %}