Merge remote branch 'origin/topic/robin/entropy'

* origin/topic/robin/entropy:
  A few smaller tweaks.
  Applying Seth's patch from #265 adding entropy BiFs.
This commit is contained in:
Robin Sommer 2011-01-19 17:35:40 -08:00
commit fa7654ec75
9 changed files with 306 additions and 8 deletions

37
CHANGES
View file

@ -1,3 +1,40 @@
1.6-dev.21 Wed Jan 19 17:36:02 PST 2011
* Added 4 new BiFs and a new record type for testing the entropy
of strings. (Seth Hall)
find_entropy(data: string): entropy_test_result
This is a one shot function that accepts a string
and returns the result of the entropy calculations.
entropy_test_init(index: any): bool
This and the next two functions are for calculating
entropy piece-wise. It only needs an index which can
be any type of variable. It needs to be something
that uniquely identifies the data stream that is
currently having it's entropy calculated.
entropy_test_add(index: any, data: string): bool
This function is used to add data into the entropy
calculation. It takes the index used in the function
above and the data that you are adding and returns
true if everything seemed to work, false otherwise.
entropy_test_finish(index: any): entropy_test_result
Calling this function indicates that all of the
desired data has been inserted into the
entropy_test_add function and the entropy should be
calculated. This function *must* be called in order
to clean up an internal state tracking variable. If
this is never called on an index, it will result in
a memory leak.
The entropy_test_result values have several measures of the
entropy, but a good one to work with is the "entropy" attribute.
It's a double and as the value approaches 8.0 it can be
considered more and more random. For example, a value of 7.832
would be quite random but a value of 4.671 is not very random.
1.6-dev.20 Wed Jan 19 17:30:11 PST 2011
* BRO_DNS_FAKE is now listed in the --help output. (Seth Hall)

View file

@ -1 +1 @@
1.6-dev.20
1.6-dev.21

View file

@ -264,6 +264,14 @@ type geo_location: record {
longitude: double;
};
type entropy_test_result: record {
entropy: double;
chi_square: double;
mean: double;
monte_carlo_pi: double;
serial_correlation: double;
};
# Prototypes of Bro built-in functions.
@load strings.bif.bro
@load bro.bif.bro

View file

@ -320,6 +320,7 @@ set(bro_SRCS
PrefixTable.cc
PriorityQueue.cc
Queue.cc
RandTest.cc
RE.cc
RPC.cc
Reassem.cc

View file

@ -126,6 +126,8 @@ TableType* smb_negotiate;
RecordType* geo_location;
RecordType* entropy_test_result;
TableType* dhcp_router_list;
RecordType* dhcp_msg;
@ -460,6 +462,8 @@ void init_net_var()
geo_location = internal_type("geo_location")->AsRecordType();
entropy_test_result = internal_type("entropy_test_result")->AsRecordType();
dhcp_router_list = internal_type("dhcp_router_list")->AsTableType();
dhcp_msg = internal_type("dhcp_msg")->AsRecordType();

View file

@ -133,6 +133,8 @@ extern TableType* smb_negotiate;
extern RecordType* geo_location;
extern RecordType* entropy_test_result;
extern TableType* dhcp_router_list;
extern RecordType* dhcp_msg;

132
src/RandTest.cc Normal file
View file

@ -0,0 +1,132 @@
/*
Apply various randomness tests to a stream of bytes
by John Walker -- September 1996
http://www.fourmilab.ch/random
This software is in the public domain. Permission to use, copy, modify,
and distribute this software and its documentation for any purpose and
without fee is hereby granted, without any conditions or restrictions.
This software is provided as is without express or implied warranty.
Modified for Bro by Seth Hall - July 2010
*/
#include <RandTest.h>
RandTest::RandTest()
{
totalc = 0;
mp = 0;
sccfirst = 1;
inmont = mcount = 0;
cexp = montex = montey = montepi = sccu0 = scclast = scct1 = scct2 = scct3 = 0.0;
for (int i = 0; i < 256; i++)
{
ccount[i] = 0;
}
}
void RandTest::add(void *buf, int bufl)
{
unsigned char *bp = (unsigned char*)buf;
int oc;
while (bufl-- > 0)
{
oc = *bp++;
ccount[oc]++; /* Update counter for this bin */
totalc++;
/* Update inside / outside circle counts for Monte Carlo
computation of PI */
monte[mp++] = oc; /* Save character for Monte Carlo */
if (mp >= RT_MONTEN) /* Calculate every RT_MONTEN character */
{
mp = 0;
mcount++;
montex = 0;
montey = 0;
for (int mj=0; mj < RT_MONTEN/2; mj++)
{
montex = (montex * 256.0) + monte[mj];
montey = (montey * 256.0) + monte[(RT_MONTEN / 2) + mj];
}
if (montex*montex + montey*montey <= RT_INCIRC)
{
inmont++;
}
}
/* Update calculation of serial correlation coefficient */
if (sccfirst)
{
sccfirst = 0;
scclast = 0;
sccu0 = oc;
}
else
{
scct1 = scct1 + scclast * oc;
}
scct2 = scct2 + oc;
scct3 = scct3 + (oc * oc);
scclast = oc;
oc <<= 1;
}
}
void RandTest::end(double *r_ent, double *r_chisq,
double *r_mean, double *r_montepicalc, double *r_scc)
{
int i;
double ent, chisq, scc, datasum;
ent = 0.0; chisq = 0.0; scc = 0.0; datasum = 0.0;
double prob[256]; /* Probabilities per bin for entropy */
/* Complete calculation of serial correlation coefficient */
scct1 = scct1 + scclast * sccu0;
scct2 = scct2 * scct2;
scc = totalc * scct3 - scct2;
if (scc == 0.0)
scc = -100000;
else
scc = (totalc * scct1 - scct2) / scc;
/* Scan bins and calculate probability for each bin and
Chi-Square distribution. The probability will be reused
in the entropy calculation below. While we're at it,
we sum of all the data which will be used to compute the
mean. */
cexp = totalc / 256.0; /* Expected count per bin */
for (i = 0; i < 256; i++)
{
double a = ccount[i] - cexp;
prob[i] = ((double) ccount[i]) / totalc;
chisq += (a * a) / cexp;
datasum += ((double) i) * ccount[i];
}
/* Calculate entropy */
for (i = 0; i < 256; i++)
{
if (prob[i] > 0.0)
{
ent += prob[i] * rt_log2(1 / prob[i]);
}
}
/* Calculate Monte Carlo value for PI from percentage of hits
within the circle */
montepi = 4.0 * (((double) inmont) / mcount);
/* Return results through arguments */
*r_ent = ent;
*r_chisq = chisq;
*r_mean = datasum / totalc;
*r_montepicalc = montepi;
*r_scc = scc;
}

34
src/RandTest.h Normal file
View file

@ -0,0 +1,34 @@
#include <math.h>
#define log2of10 3.32192809488736234787
/* RT_LOG2 -- Calculate log to the base 2 */
static double rt_log2(double x)
{
return log2of10 * log10(x);
}
#define RT_MONTEN 6 /* Bytes used as Monte Carlo
co-ordinates. This should be no more
bits than the mantissa of your "double"
floating point type. */
// RT_INCIRC = pow(pow(256.0, (double) (RT_MONTEN / 2)) - 1, 2.0);
#define RT_INCIRC 281474943156225.0
class RandTest {
public:
RandTest();
void add(void *buf, int bufl);
void end(double *r_ent, double *r_chisq, double *r_mean,
double *r_montepicalc, double *r_scc);
private:
long ccount[256]; /* Bins to count occurrences of values */
long totalc; /* Total bytes counted */
int mp;
int sccfirst;
unsigned int monte[RT_MONTEN];
long inmont, mcount;
double cexp, montex, montey, montepi,
sccu0, scclast, scct1, scct2, scct3;
};

View file

@ -1725,7 +1725,7 @@ function md5_hmac%(...%): string
%%{
static map<BroString, md5_state_s> md5_states;
BroString* convert_md5_index_to_string(Val* index)
BroString* convert_index_to_string(Val* index)
{
ODesc d;
index->Describe(&d);
@ -1735,7 +1735,7 @@ BroString* convert_md5_index_to_string(Val* index)
function md5_hash_init%(index: any%): bool
%{
BroString* s = convert_md5_index_to_string(index);
BroString* s = convert_index_to_string(index);
int status = 0;
if ( md5_states.count(*s) < 1 )
@ -1752,7 +1752,7 @@ function md5_hash_init%(index: any%): bool
function md5_hash_update%(index: any, data: string%): bool
%{
BroString* s = convert_md5_index_to_string(index);
BroString* s = convert_index_to_string(index);
int status = 0;
if ( md5_states.count(*s) > 0 )
@ -1767,7 +1767,7 @@ function md5_hash_update%(index: any, data: string%): bool
function md5_hash_finish%(index: any%): string
%{
BroString* s = convert_md5_index_to_string(index);
BroString* s = convert_index_to_string(index);
StringVal* printable_digest;
if ( md5_states.count(*s) > 0 )
@ -3206,3 +3206,83 @@ function disable_event_group%(group: string%) : any
event_registry->EnableGroup(group->CheckString(), false);
return 0;
%}
%%{
#include <RandTest.h>
static map<BroString, RandTest*> entropy_states;
%%}
function find_entropy%(data: string%): entropy_test_result
%{
double montepi, scc, ent, mean, chisq;
montepi = scc = ent = mean = chisq = 0.0;
RecordVal* ent_result = new RecordVal(entropy_test_result);
RandTest *rt = new RandTest();
rt->add((char*) data->Bytes(), data->Len());
rt->end(&ent, &chisq, &mean, &montepi, &scc);
delete rt;
ent_result->Assign(0, new Val(ent, TYPE_DOUBLE));
ent_result->Assign(1, new Val(chisq, TYPE_DOUBLE));
ent_result->Assign(2, new Val(mean, TYPE_DOUBLE));
ent_result->Assign(3, new Val(montepi, TYPE_DOUBLE));
ent_result->Assign(4, new Val(scc, TYPE_DOUBLE));
return ent_result;
%}
function entropy_test_init%(index: any%): bool
%{
BroString* s = convert_index_to_string(index);
int status = 0;
if ( entropy_states.count(*s) < 1 )
{
entropy_states[*s] = new RandTest();
status = 1;
}
delete s;
return new Val(status, TYPE_BOOL);
%}
function entropy_test_add%(index: any, data: string%): bool
%{
BroString* s = convert_index_to_string(index);
int status = 0;
if ( entropy_states.count(*s) > 0 )
{
entropy_states[*s]->add((char*) data->Bytes(), data->Len());
status = 1;
}
delete s;
return new Val(status, TYPE_BOOL);
%}
function entropy_test_finish%(index: any%): entropy_test_result
%{
BroString* s = convert_index_to_string(index);
double montepi, scc, ent, mean, chisq;
montepi = scc = ent = mean = chisq = 0.0;
RecordVal* ent_result = new RecordVal(entropy_test_result);
if ( entropy_states.count(*s) > 0 )
{
RandTest *rt = entropy_states[*s];
rt->end(&ent, &chisq, &mean, &montepi, &scc);
entropy_states.erase(*s);
delete rt;
}
ent_result->Assign(0, new Val(ent, TYPE_DOUBLE));
ent_result->Assign(1, new Val(chisq, TYPE_DOUBLE));
ent_result->Assign(2, new Val(mean, TYPE_DOUBLE));
ent_result->Assign(3, new Val(montepi, TYPE_DOUBLE));
ent_result->Assign(4, new Val(scc, TYPE_DOUBLE));
delete s;
return ent_result;
%}