From fab47cc7498464a8826a7a91f0b97d0963b32064 Mon Sep 17 00:00:00 2001 From: Seth Hall Date: Wed, 6 Nov 2013 13:52:29 -0500 Subject: [PATCH] Added a document for the SumStats framework. --- doc/frameworks/index.rst | 1 + doc/frameworks/sumstats-countconns.bro | 36 +++++++++ doc/frameworks/sumstats-toy-scan.bro | 45 +++++++++++ doc/frameworks/sumstats.rst | 102 +++++++++++++++++++++++++ testing/btest/Traces/nmap-vsn.trace | Bin 0 -> 33196 bytes 5 files changed, 184 insertions(+) create mode 100644 doc/frameworks/sumstats-countconns.bro create mode 100644 doc/frameworks/sumstats-toy-scan.bro create mode 100644 doc/frameworks/sumstats.rst create mode 100644 testing/btest/Traces/nmap-vsn.trace diff --git a/doc/frameworks/index.rst b/doc/frameworks/index.rst index d5b771b15e..f8c681d795 100644 --- a/doc/frameworks/index.rst +++ b/doc/frameworks/index.rst @@ -13,4 +13,5 @@ Frameworks logging notice signatures + sumstats diff --git a/doc/frameworks/sumstats-countconns.bro b/doc/frameworks/sumstats-countconns.bro new file mode 100644 index 0000000000..a10be54376 --- /dev/null +++ b/doc/frameworks/sumstats-countconns.bro @@ -0,0 +1,36 @@ +@load base/frameworks/sumstats + +event connection_established(c: connection) + { + # Make an observation! + # This observation is global so the key is empty. + # Each established connection counts as one so the observation is always 1. + SumStats::observe("conn established", + SumStats::Key(), + SumStats::Observation($num=1)); + } + +event bro_init() + { + # Create the reducer. + # The reducer attaches to the "conn established" observation stream + # and uses the summing calculation on the observations. + local r1 = SumStats::Reducer($stream="conn established", + $apply=set(SumStats::SUM)); + + # Create the final sumstat. + # We give it an arbitrary name and make it collect data every minute. + # The reducer is then attached and a $epoch_result callback is given + # to finally do something with the data collected. + SumStats::create([$name = "counting connections", + $epoch = 1min, + $reducers = set(r1), + $epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) = + { + # This is the body of the callback that is called when a single + # result has been collected. We are just printing the total number + # of connections that were seen. The $sum field is provided as a + # double type value so we need to use %f as the format specifier. + print fmt("Number of connections established: %.0f", result["conn established"]$sum); + }]); + } \ No newline at end of file diff --git a/doc/frameworks/sumstats-toy-scan.bro b/doc/frameworks/sumstats-toy-scan.bro new file mode 100644 index 0000000000..c435fb8997 --- /dev/null +++ b/doc/frameworks/sumstats-toy-scan.bro @@ -0,0 +1,45 @@ +@load base/frameworks/sumstats + +# We use the connection_attempted event limit our observations to those +# which were attempted and not successful. +event connection_attempt(c: connection) + { + # Make an observation! + # This observation is about the host attempting the connection. + # Each established connection counts as one so the observation is always 1. + SumStats::observe("conn attempted", + SumStats::Key($host=c$id$orig_h), + SumStats::Observation($num=1)); + } + +event bro_init() + { + # Create the reducer. + # The reducer attaches to the "conn attempted" observation stream + # and uses the summing calculation on the observations. Keep + # in mind that there will be one result per key (connection originator). + local r1 = SumStats::Reducer($stream="conn attempted", + $apply=set(SumStats::SUM)); + + # Create the final sumstat. + # This is slightly different from the last example since we're providing + # a callback to calculate a value to check against the threshold with + # $threshold_val. The actual threshold itself is provided with $threshold. + # Another callback is + SumStats::create([$name = "finding scanners", + $epoch = 5min, + $reducers = set(r1), + # Provide a threshold. + $threshold = 5.0, + # Provide a callback to calculate a value from the result + # to check against the threshold field. + $threshold_val(key: SumStats::Key, result: SumStats::Result) = + { + return result["conn attempted"]$sum; + }, + # Provide a callback for when a key crosses the threshold. + $threshold_crossed(key: SumStats::Key, result: SumStats::Result) = + { + print fmt("%s attempted %.0f or more connections", key$host, result["conn attempted"]$sum); + }]); + } \ No newline at end of file diff --git a/doc/frameworks/sumstats.rst b/doc/frameworks/sumstats.rst new file mode 100644 index 0000000000..e06ceaf2c8 --- /dev/null +++ b/doc/frameworks/sumstats.rst @@ -0,0 +1,102 @@ +================== +Summary Statistics +================== + +.. rst-class:: opening + + Measuring aspects of network traffic is an extremely common task in Bro. + Bro provides data structures which make this very easy as wellin + simplistic cases such as size limited trace file processing. In real- + world deployments though, there are difficulties that arise from + clusterization (many processes sniffing traffic) and unbounded data sets + (traffic never stops). The Summary Statistics (otherwise referred to as + SumStats) framework aims to define a mechanism for consuming unbounded + data sets and making them measurable in practice on large clustered and + non-clustered Bro deployments. + +.. contents:: + +Overview +======== + +The Sumstat processing flow is broken into three pieces. Observations, where +some aspect of an event is observed and fed into the Sumstats framework. +Reducers, where observations are collected and measured, typically by taking +some sort of summary statistic measurement like average or variance (among +others). Sumstats, where reducers have an epoch (time interval) that their +measurements are performed over along with callbacks for monitoring thresholds +or viewing the collected and measured data. + +Terminology +=========== + + Observation + + A single point of data. Observations have a few components of their + own. They are part of an arbitrarily named observation stream, they + have a key that is something the observation is about, and the actual + observation itself. + + Reducer + + Calculations are applied to an observation stream here to reduce the + full unbounded set of observations down to a smaller representation. + Results are collected within each reducer per-key so care must be + taken to keep the total number of keys tracked down to a reasonable + level. + + Sumstat + + The final definition of a Sumstat where one or more reducers is + collected over an interval, also known as an epoch. Thresholding can + be applied here along with a callback in the event that a threshold is + crossed. Additionally, a callback can be provided to access each + result (per-key) at the end of each epoch. + +Examples +======== + +These examples may seem very simple to an experienced Bro script developer and +they're intended to look that way. Keep in mind that these scripts will work +on small single process Bro instances as well as large many-worker clusters. +The complications from dealing with flow based load balancing can be ignored +by developers writing scripts that use Sumstats due to it's built in cluster +transparency. + +Printing the number of connections +---------------------------------- + +Sumstats provides a simple way of approaching the problem of trying to count +the number of connections over a given time interval. Here is a script with +inline documentation that does this with the Sumstats framework: + +.. btest-include:: ${DOC_ROOT}/frameworks/sumstats-countconns.bro + +When run on a sample PCAP file from the Bro test suite, the following output +is created: + +.. btest:: sumstats-countconns + + @TEST-EXEC: btest-rst-cmd bro -r ${TRACES}/workshop_2011_browse.trace ${DOC_ROOT}/frameworks/sumstats-countconns.bro + + +Toy Scan detection +------------------ + +Taking the previous example even further, we can implement a simple detection +to demonstrate the thresholding functionality. This example is a toy to +demonstate how thresholding works in Sumstats and is not meant to be a real- +world functional example, that is left to the scan.bro script that is included +with Bro. + +.. btest-include:: ${DOC_ROOT}/frameworks/sumstats-toy-scan.bro + +Let's see if there any hosts that crossed the threshold in a PCAP file +containing a host running nmap: + +.. btest:: sumstats-toy-scan + + @TEST-EXEC: btest-rst-cmd bro -r ${TRACES}/nmap-vsn.trace ${DOC_ROOT}/frameworks/sumstats-toy-scan.bro + +It seems the host running nmap was detected! + diff --git a/testing/btest/Traces/nmap-vsn.trace b/testing/btest/Traces/nmap-vsn.trace new file mode 100644 index 0000000000000000000000000000000000000000..b276ed3d2f19d5a8e941a38eda8f09d4081d58b0 GIT binary patch literal 33196 zcmbW=3w+J>AII_EnQfLf47poa*CX!1RRxwJsi&%tcZd2q^ikRD6 zc9Ba_$t6@SyHxHIl`#I_*Eu`ene{)~`}gQP9?oMuKcDaS_x<|$e$V-SzdM-nQY?ldh3R_*wpLrdr|6GT!;&+4;3VU&-pY@Psg=eLPTep0#g(WVx zJ#1dK)a91_@yL1^=6|+Ud)>PAU|-Af^7dJ0D^$p4v-uVA>0(XfXWm}U=jVsnMk`XFm7(ii!tA`2r2DAEr( zS&?@kmn*UeGAOpN)xxwdFK3%Odz+)5)fiz~KiqdzCVkA&7wo2|nalLGU+twVE7^y4 zT*NBBKxn4`WLHHNMeYo`tKdDk8nWfRf~Ly^Un2|1&XEfo1Ga-bq#L(Wv> z>&SM?g?dIKUsdD?-IibE#+Eqy3hJsv=T&&1V$lmJ$3f}v^M%v;9xes|* zk>4YI*9$rKBbO`kAo8pt4*Sk^d<27_!iP|R}pzok(H2z#tAutkn0p#1(~MEs>p!xLTQ7Mn-p0c`MV(mv64+q~XcI_JFJ%<9d&euFP%%AVI>A+IUhW#;v}I`NK+Smh@Pcc%%mqaq(b zzNW~g$i<3mhWt*E&5<`18HNmbN4U2wkda2_^t{o-{W_d-?|Gq_^t_ZmvU^_3)GW(B z@~xXIXj6Kg{b$cTZ?sabR+MXnd$}szJh>*ty(W|ymV>MhG2akN<(SGigh z_gt<6O1Z)**A2IFna{Z5?48}qH7Jd8O}(C7t~O~l%if|=&pEDgHSXZKT-7HF?bz1Q zj*5H``KFOMJ)qLfy@%4>Ym{<|8iiiVu2H)*Z_7R^bNrvK8tuBnonGP|J@9DJYHQr1 z`y~_oV%hBI52g87_9L~DhP$HIkMMlI_A9k+Pp!L85$^OO$T5oSh+L(}PRN6bd=%;P zu29d;$Ph(#K|ZF)uE=Ob+L4PC`51DCA|sGz6&Z;vF;!@lZpbEz?2dd=kv)*375O-F znId~4D@?Ol9nE8am-C5XIi4Plk%#%3fsx1h6Xv<`5YGE`Hxsdm3C1g5#2TOPA>t!7Z+pf$BHBxhEt{U@93Pd85&A@L z^0!pvlgQ^4`80B}BA-F7R^+qD9~IdLc|(!UA*;S8v`Sy(BZ};Yd|8ptBWEkJKXQ{I zUqGHzB7Aoh^(#1LC8o&4n{^R@Zax5}dk>ikwiX4wjSL6g_ z*n->{Q1)2EF%x#KlJRP0MjO6YR+@jmTRO)YsaeBTKi|{LgIBIDQ6+Q2l$<$=85wwc zF*+y}u(3H~4x;xm(R0Qdx6iN}MQ&ttu%t=$=)g6@UTHG)V!YA6PVRVvs6kjaYt6q%vO&ydB7 z3-$aQ*;tWZAp0nCHF92{P};S~WJRt+W+*Zqd3On+v>T9h6}b`FO_7_CBNdr|T&Tz` z$Q_E@iY!w~xVMSOmWoV5_Elsua-t$rkZTmV19?J`Um*+KBi!4a$eN1Wh3u@z-N;uJ zxd$1m$ZwEa6}cCgrpRxRg-Z*q@*T3SB7ZsIMV>@%QDiFeq$1OhzU72_n}MvU$X}6mMgE3-U6H>dkCYecc@}9^ z5ac;zup-YRJ1X)5a)=@?B4;Wx6S+;1myqd-`~&G%QD~LR$hwNWf{ak)U&v@h{*7Fw z$bXREDDoQe4@KTUmaZhUN*3}RMV3H*uE>(e{faDw%u?h%$fRn*9l8%0^1gS$KR0QF z?5oJe$oQE;&R)n2MNUMPm?h+#glwV6DabLig`CrnYZV!TJg&(1kVQTaN;@4HF-MTI zkrNd80dl<}=OK?Naz3)kheAESL~e-{fLt6W$kWKL6nO@@Wr>ip z3^GlTWs!xK3OUOo>nO4U@-ao;i;PxeWn}CpLOtsu_pSH7ZLOtqeVKEuWj9||JUn;F ze5&8Z`7MN5TV^h^x@<0ME!Sr9j*D0U<$VkOFU|YNl@$cJ1{qRCknbXUC~^ieUD>zk zg&eNzXDozVph$1zw~F*Z{;fz~WJP5sWMO0{MfxF!D)KJmhl(tMJf-a03_!l3?At7g zY^ChJtcFZhWDVpwMb<=?R`yWVLWU`_HnN{0Ly%3BosjjAeH2+AIZ2TXkR6rXmro#v zD)K30HD&kZ%g7Ci9EzN;>`IJ6?o{Nf$V^4PhAge@gnS*@UXjtrmlQbyIaiS*k(V0? z`!**d0~-l)D)Jd+-{u13aYe=<%PKn|KSs7vVwaDDqF_5Jg@^PFLi0Pl_y!{9BRbkUf;$t*4N+l~tn^ zkxwYH60)SSf-?virpPMDfr_k(oTm7(=E!b}3`4%D$QHFE_{x*c|)TN1n)8v+p|D(y>RI6BCFotw{|>`j{1$u5#tyAAaQ${cTy-~RBEhp15*&Vb--igxKIEW3Bt ztXZxa?F{i;qxY0@wWnNb6!{49uadcKWNLI<-?^Sjjjm_q^qn)dghQM z+o`B|`c5OWF7*HRfTS3rH?imRfOEElM+ZN<+ZDas0MF55+@j}ijizUbzKxSMovm@v zmTCwIlbmh_ddo6SA%%A4NtevNJMDkzJ6|n%1t!1SO{(d0df? zA-$B9sS(J-%8HXns4C1=&cELy(Utau{;3B8MXYV z==od&o>hPDhqj)=17ohAVOwa*!ftBWEb`1LPJ(&Ox44 zxN>ZT3DJ^BtYJ68y_s?6VoVjdxtc z`gVz@Sc~n%x@VaymMtOu9MXK>vxiu7EAWnsSj(4sik04&SU)7?#>!}lH1qlpVs#q9 zJ1%1R$L0RsKkN1r!onXP&kR29>FgPN%Zxde{qUX{o6(%={q&K1Gx%73=KA#EK@+{e ztJum=uBgR=EQ^d)WO-zYA}b&-DDqxp*pl4)|1T%XHMdM#UZIa=pD33xXV^yHXwwD> zoG5qh*PQ$BcB0(YFU^T^j6QRy)$Ukm{ zAm2rPs>m70(kq3WyOGTmxd+)>k>4OkD{?P#i6Xy6ex=Cokf#;-12W)Kp;Znb>nidv zGG>*K^GD=bMgD|5q{yF<9lsDt`wMcgB99|u6nO#}ugH_gql!#LW+^fa8MIosw;9Ov zHG=#V>G!1|e?!((cMUiKaGZlFjxkZuZkiRJMJko2e&?*;@!HT?y?4Za@ zWbN9*Z%Vy{j8fzu$c-UF&dbQ%bp&|@d0LTwAp`0OIsZmBROCO%#}#=E8Lh}0$oY!Q zLe|?8V0ANd5?R9cwbcW0H9Do2kMB+w{wul%+mfN}V<#>2RS4WO*d2)ol zcqlM2H^+bIT#HY7Ok#e!lBq~S-Xwz$1SYv9xpU{5_p8Sw=5LQlvN3OxzCQ#eyC=D0 v=bC-aW0IbZB=LEZ4E{ba#VyI5JJ*5>9+UKPBw3d?$)N87cep3<*tz}(qO1kB literal 0 HcmV?d00001