Applying Seth's patch from #265 adding entropy BiFs.

This commit is contained in:
Robin Sommer 2011-01-06 17:16:10 -08:00
parent 5d41794034
commit dbca5be43c
6 changed files with 423 additions and 4 deletions

View file

@ -264,6 +264,14 @@ type geo_location: record {
longitude: double; longitude: double;
}; };
type entropy_test_result: record {
entropy: double;
chi_square: double;
mean: double;
monte_carlo_pi: double;
serial_correlation: double;
};
# Prototypes of Bro built-in functions. # Prototypes of Bro built-in functions.
@load strings.bif.bro @load strings.bif.bro
@load bro.bif.bro @load bro.bif.bro

View file

@ -126,6 +126,8 @@ TableType* smb_negotiate;
RecordType* geo_location; RecordType* geo_location;
RecordType* entropy_test_result;
TableType* dhcp_router_list; TableType* dhcp_router_list;
RecordType* dhcp_msg; RecordType* dhcp_msg;
@ -460,6 +462,8 @@ void init_net_var()
geo_location = internal_type("geo_location")->AsRecordType(); geo_location = internal_type("geo_location")->AsRecordType();
entropy_test_result = internal_type("entropy_test_result")->AsRecordType();
dhcp_router_list = internal_type("dhcp_router_list")->AsTableType(); dhcp_router_list = internal_type("dhcp_router_list")->AsTableType();
dhcp_msg = internal_type("dhcp_msg")->AsRecordType(); dhcp_msg = internal_type("dhcp_msg")->AsRecordType();

View file

@ -133,6 +133,8 @@ extern TableType* smb_negotiate;
extern RecordType* geo_location; extern RecordType* geo_location;
extern RecordType* entropy_test_result;
extern TableType* dhcp_router_list; extern TableType* dhcp_router_list;
extern RecordType* dhcp_msg; extern RecordType* dhcp_msg;

256
src/RandTest.cc Normal file
View file

@ -0,0 +1,256 @@
/*
Apply various randomness tests to a stream of bytes
by John Walker -- September 1996
http://www.fourmilab.ch/
Modified for Bro by Seth Hall - July 2010
*/
#include <RandTest.h>
RandTest::RandTest()
{
totalc = 0;
mp = 0;
sccfirst = 1;
inmont = mcount = 0;
cexp = montex = montey = montepi = sccu0 = scclast = scct1 = scct2 = scct3 = 0.0;
for (int i = 0; i < 256; i++)
{
ccount[i] = 0;
}
}
void RandTest::add(void *buf, int bufl)
{
unsigned char *bp = (unsigned char*)buf;
int oc;
while (bufl-- > 0)
{
oc = *bp++;
ccount[oc]++; /* Update counter for this bin */
totalc++;
/* Update inside / outside circle counts for Monte Carlo
computation of PI */
monte[mp++] = oc; /* Save character for Monte Carlo */
if (mp >= RT_MONTEN) /* Calculate every RT_MONTEN character */
{
mp = 0;
mcount++;
montex = 0;
montey = 0;
for (int mj=0; mj < RT_MONTEN/2; mj++)
{
montex = (montex * 256.0) + monte[mj];
montey = (montey * 256.0) + monte[(RT_MONTEN / 2) + mj];
}
if (montex*montex + montey*montey <= RT_INCIRC)
{
inmont++;
}
}
/* Update calculation of serial correlation coefficient */
if (sccfirst)
{
sccfirst = 0;
scclast = 0;
sccu0 = oc;
}
else
{
scct1 = scct1 + scclast * oc;
}
scct2 = scct2 + oc;
scct3 = scct3 + (oc * oc);
scclast = oc;
oc <<= 1;
}
}
void RandTest::end(double *r_ent, double *r_chisq,
double *r_mean, double *r_montepicalc, double *r_scc)
{
int i;
double ent, chisq, scc, datasum;
ent = 0.0; chisq = 0.0; scc = 0.0; datasum = 0.0;
double prob[256]; /* Probabilities per bin for entropy */
/* Complete calculation of serial correlation coefficient */
scct1 = scct1 + scclast * sccu0;
scct2 = scct2 * scct2;
scc = totalc * scct3 - scct2;
if (scc == 0.0)
scc = -100000;
else
scc = (totalc * scct1 - scct2) / scc;
/* Scan bins and calculate probability for each bin and
Chi-Square distribution. The probability will be reused
in the entropy calculation below. While we're at it,
we sum of all the data which will be used to compute the
mean. */
cexp = totalc / 256.0; /* Expected count per bin */
for (i = 0; i < 256; i++)
{
double a = ccount[i] - cexp;
prob[i] = ((double) ccount[i]) / totalc;
chisq += (a * a) / cexp;
datasum += ((double) i) * ccount[i];
}
/* Calculate entropy */
for (i = 0; i < 256; i++)
{
if (prob[i] > 0.0)
{
ent += prob[i] * rt_log2(1 / prob[i]);
}
}
/* Calculate Monte Carlo value for PI from percentage of hits
within the circle */
montepi = 4.0 * (((double) inmont) / mcount);
/* Return results through arguments */
*r_ent = ent;
*r_chisq = chisq;
*r_mean = datasum / totalc;
*r_montepicalc = montepi;
*r_scc = scc;
}
/*
Apply various randomness tests to a stream of bytes
by John Walker -- September 1996
http://www.fourmilab.ch/
Modified for Bro by Seth Hall - July 2010
*/
#include <RandTest.h>
RandTest::RandTest()
{
totalc = 0;
mp = 0;
sccfirst = 1;
inmont = mcount = 0;
cexp = montex = montey = montepi = sccu0 = scclast = scct1 = scct2 = scct3 = 0.0;
for (int i = 0; i < 256; i++)
{
ccount[i] = 0;
}
}
void RandTest::add(void *buf, int bufl)
{
unsigned char *bp = (unsigned char*)buf;
int oc;
while (bufl-- > 0)
{
oc = *bp++;
ccount[oc]++; /* Update counter for this bin */
totalc++;
/* Update inside / outside circle counts for Monte Carlo
computation of PI */
monte[mp++] = oc; /* Save character for Monte Carlo */
if (mp >= RT_MONTEN) /* Calculate every RT_MONTEN character */
{
mp = 0;
mcount++;
montex = 0;
montey = 0;
for (int mj=0; mj < RT_MONTEN/2; mj++)
{
montex = (montex * 256.0) + monte[mj];
montey = (montey * 256.0) + monte[(RT_MONTEN / 2) + mj];
}
if (montex*montex + montey*montey <= RT_INCIRC)
{
inmont++;
}
}
/* Update calculation of serial correlation coefficient */
if (sccfirst)
{
sccfirst = 0;
scclast = 0;
sccu0 = oc;
}
else
{
scct1 = scct1 + scclast * oc;
}
scct2 = scct2 + oc;
scct3 = scct3 + (oc * oc);
scclast = oc;
oc <<= 1;
}
}
void RandTest::end(double *r_ent, double *r_chisq,
double *r_mean, double *r_montepicalc, double *r_scc)
{
int i;
double ent, chisq, scc, datasum;
ent = 0.0; chisq = 0.0; scc = 0.0; datasum = 0.0;
double prob[256]; /* Probabilities per bin for entropy */
/* Complete calculation of serial correlation coefficient */
scct1 = scct1 + scclast * sccu0;
scct2 = scct2 * scct2;
scc = totalc * scct3 - scct2;
if (scc == 0.0)
scc = -100000;
else
scc = (totalc * scct1 - scct2) / scc;
/* Scan bins and calculate probability for each bin and
Chi-Square distribution. The probability will be reused
in the entropy calculation below. While we're at it,
we sum of all the data which will be used to compute the
mean. */
cexp = totalc / 256.0; /* Expected count per bin */
for (i = 0; i < 256; i++)
{
double a = ccount[i] - cexp;
prob[i] = ((double) ccount[i]) / totalc;
chisq += (a * a) / cexp;
datasum += ((double) i) * ccount[i];
}
/* Calculate entropy */
for (i = 0; i < 256; i++)
{
if (prob[i] > 0.0)
{
ent += prob[i] * rt_log2(1 / prob[i]);
}
}
/* Calculate Monte Carlo value for PI from percentage of hits
within the circle */
montepi = 4.0 * (((double) inmont) / mcount);
/* Return results through arguments */
*r_ent = ent;
*r_chisq = chisq;
*r_mean = datasum / totalc;
*r_montepicalc = montepi;
*r_scc = scc;
}

68
src/RandTest.h Normal file
View file

@ -0,0 +1,68 @@
#include <math.h>
#define log2of10 3.32192809488736234787
/* RT_LOG2 -- Calculate log to the base 2 */
static double rt_log2(double x)
{
return log2of10 * log10(x);
}
#define RT_MONTEN 6 /* Bytes used as Monte Carlo
co-ordinates. This should be no more
bits than the mantissa of your "double"
floating point type. */
// RT_INCIRC = pow(pow(256.0, (double) (RT_MONTEN / 2)) - 1, 2.0);
#define RT_INCIRC 281474943156225.0
class RandTest {
public:
RandTest();
void add(void *buf, int bufl);
void end(double *r_ent, double *r_chisq, double *r_mean,
double *r_montepicalc, double *r_scc);
private:
long ccount[256]; /* Bins to count occurrences of values */
long totalc; /* Total bytes counted */
int mp;
int sccfirst;
unsigned int monte[RT_MONTEN];
long inmont, mcount;
double cexp, montex, montey, montepi,
sccu0, scclast, scct1, scct2, scct3;
};
#include <math.h>
#define log2of10 3.32192809488736234787
/* RT_LOG2 -- Calculate log to the base 2 */
static double rt_log2(double x)
{
return log2of10 * log10(x);
}
#define RT_MONTEN 6 /* Bytes used as Monte Carlo
co-ordinates. This should be no more
bits than the mantissa of your "double"
floating point type. */
// RT_INCIRC = pow(pow(256.0, (double) (RT_MONTEN / 2)) - 1, 2.0);
#define RT_INCIRC 281474943156225.0
class RandTest {
public:
RandTest();
void add(void *buf, int bufl);
void end(double *r_ent, double *r_chisq, double *r_mean,
double *r_montepicalc, double *r_scc);
private:
long ccount[256]; /* Bins to count occurrences of values */
long totalc; /* Total bytes counted */
int mp;
int sccfirst;
unsigned int monte[RT_MONTEN];
long inmont, mcount;
double cexp, montex, montey, montepi,
sccu0, scclast, scct1, scct2, scct3;
};

View file

@ -1725,7 +1725,7 @@ function md5_hmac%(...%): string
%%{ %%{
static map<BroString, md5_state_s> md5_states; static map<BroString, md5_state_s> md5_states;
BroString* convert_md5_index_to_string(Val* index) BroString* convert_index_to_string(Val* index)
{ {
ODesc d; ODesc d;
index->Describe(&d); index->Describe(&d);
@ -1735,7 +1735,7 @@ BroString* convert_md5_index_to_string(Val* index)
function md5_hash_init%(index: any%): bool function md5_hash_init%(index: any%): bool
%{ %{
BroString* s = convert_md5_index_to_string(index); BroString* s = convert_index_to_string(index);
int status = 0; int status = 0;
if ( md5_states.count(*s) < 1 ) if ( md5_states.count(*s) < 1 )
@ -1752,7 +1752,7 @@ function md5_hash_init%(index: any%): bool
function md5_hash_update%(index: any, data: string%): bool function md5_hash_update%(index: any, data: string%): bool
%{ %{
BroString* s = convert_md5_index_to_string(index); BroString* s = convert_index_to_string(index);
int status = 0; int status = 0;
if ( md5_states.count(*s) > 0 ) if ( md5_states.count(*s) > 0 )
@ -1767,7 +1767,7 @@ function md5_hash_update%(index: any, data: string%): bool
function md5_hash_finish%(index: any%): string function md5_hash_finish%(index: any%): string
%{ %{
BroString* s = convert_md5_index_to_string(index); BroString* s = convert_index_to_string(index);
StringVal* printable_digest; StringVal* printable_digest;
if ( md5_states.count(*s) > 0 ) if ( md5_states.count(*s) > 0 )
@ -3196,3 +3196,84 @@ function disable_event_group%(group: string%) : any
event_registry->EnableGroup(group->CheckString(), false); event_registry->EnableGroup(group->CheckString(), false);
return 0; return 0;
%} %}
%%{
#include <RandTest.h>
static map<BroString, RandTest*> entropy_states;
%%}
function find_entropy%(data: string%): entropy_test_result
%{
double montepi, scc, ent, mean, chisq;
montepi = scc = ent = mean = chisq = 0.0;
RecordVal* ent_result = new RecordVal(entropy_test_result);
RandTest *rt = new RandTest();
rt->add((char*) data->Bytes(), data->Len());
rt->end(&ent, &chisq, &mean, &montepi, &scc);
delete rt;
ent_result->Assign(0, new Val(ent, TYPE_DOUBLE));
ent_result->Assign(1, new Val(chisq, TYPE_DOUBLE));
ent_result->Assign(2, new Val(mean, TYPE_DOUBLE));
ent_result->Assign(3, new Val(montepi, TYPE_DOUBLE));
ent_result->Assign(4, new Val(scc, TYPE_DOUBLE));
return ent_result;
%}
function entropy_test_init%(index: any%): bool
%{
BroString* s = convert_index_to_string(index);
int status = 0;
if ( entropy_states.count(*s) < 1 )
{
entropy_states[*s] = new RandTest();
status = 1;
}
delete s;
return new Val(status, TYPE_BOOL);
%}
function entropy_test_add%(index: any, data: string%): bool
%{
BroString* s = convert_index_to_string(index);
int status = 0;
if ( entropy_states.count(*s) > 0 )
{
entropy_states[*s]->add((char*) data->Bytes(), data->Len());
status = 1;
}
delete s;
return new Val(status, TYPE_BOOL);
%}
function entropy_test_finish%(index: any%): entropy_test_result
%{
BroString* s = convert_index_to_string(index);
double montepi, scc, ent, mean, chisq;
montepi = scc = ent = mean = chisq = 0.0;
RecordVal* ent_result = new RecordVal(entropy_test_result);
if ( entropy_states.count(*s) > 0 )
{
RandTest *rt;
rt = entropy_states[*s];
rt->end(&ent, &chisq, &mean, &montepi, &scc);
entropy_states.erase(*s);
delete rt;
}
ent_result->Assign(0, new Val(ent, TYPE_DOUBLE));
ent_result->Assign(1, new Val(chisq, TYPE_DOUBLE));
ent_result->Assign(2, new Val(mean, TYPE_DOUBLE));
ent_result->Assign(3, new Val(montepi, TYPE_DOUBLE));
ent_result->Assign(4, new Val(scc, TYPE_DOUBLE));
delete s;
return ent_result;
%}