Merge remote-tracking branch 'origin/master' into topic/documentation

Conflicts:
	doc/index.rst
	doc/scripts/builtins.rst
	testing/btest/btest.cfg
This commit is contained in:
Robin Sommer 2013-08-12 14:03:49 -07:00
commit aec77c7cfe
1022 changed files with 284455 additions and 176377 deletions

3
.gitmodules vendored
View file

@ -16,3 +16,6 @@
[submodule "cmake"] [submodule "cmake"]
path = cmake path = cmake
url = git://git.bro-ids.org/cmake url = git://git.bro-ids.org/cmake
[submodule "magic"]
path = magic
url = git://git.bro.org/bromagic

10629
CHANGES

File diff suppressed because it is too large Load diff

View file

@ -17,12 +17,17 @@ set(BRO_SCRIPT_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts)
get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH} get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH}
ABSOLUTE) ABSOLUTE)
set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic)
set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic/database)
configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev) configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh
"export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"export BROMAGIC=\"${BRO_MAGIC_SOURCE_PATH}\"\n"
"export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh
"setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"setenv BROMAGIC \"${BRO_MAGIC_SOURCE_PATH}\"\n"
"setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1) file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1)
@ -69,6 +74,12 @@ if (MISSING_PREREQS)
message(FATAL_ERROR "Configuration aborted due to missing prerequisites") message(FATAL_ERROR "Configuration aborted due to missing prerequisites")
endif () endif ()
set(libmagic_req 5.04)
if ( LibMagic_VERSION VERSION_LESS ${libmagic_req} )
message(FATAL_ERROR "libmagic of at least version ${libmagic_req} required "
"(found ${LibMagic_VERSION})")
endif ()
include_directories(BEFORE include_directories(BEFORE
${PCAP_INCLUDE_DIR} ${PCAP_INCLUDE_DIR}
${OpenSSL_INCLUDE_DIR} ${OpenSSL_INCLUDE_DIR}
@ -190,6 +201,10 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL)
CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS) CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS)
CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI) CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI)
install(DIRECTORY ./magic/database/
DESTINATION ${BRO_MAGIC_INSTALL_PATH}
)
######################################################################## ########################################################################
## Packaging Setup ## Packaging Setup

View file

@ -1,144 +0,0 @@
# DO NOT EDIT
# This file is auto-generated from the genDocSourcesList.sh script.
#
# This is a list of Bro script sources for which to generate reST documentation.
# It will be included inline in the CMakeLists.txt found in the same directory
# in order to create Makefile targets that define how to generate reST from
# a given Bro script.
#
# Note: any path prefix of the script (2nd argument of rest_target macro)
# will be used to derive what path under scripts/ the generated documentation
# will be placed.
set(psd ${PROJECT_SOURCE_DIR}/scripts)
rest_target(${CMAKE_CURRENT_SOURCE_DIR} example.bro internal)
rest_target(${psd} base/init-default.bro internal)
rest_target(${psd} base/init-bare.bro internal)
rest_target(${CMAKE_BINARY_DIR}/src base/bro.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/const.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/event.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/logging.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/reporter.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/strings.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/types.bif.bro)
rest_target(${psd} base/frameworks/cluster/main.bro)
rest_target(${psd} base/frameworks/cluster/nodes/manager.bro)
rest_target(${psd} base/frameworks/cluster/nodes/proxy.bro)
rest_target(${psd} base/frameworks/cluster/nodes/worker.bro)
rest_target(${psd} base/frameworks/cluster/setup-connections.bro)
rest_target(${psd} base/frameworks/communication/main.bro)
rest_target(${psd} base/frameworks/control/main.bro)
rest_target(${psd} base/frameworks/dpd/main.bro)
rest_target(${psd} base/frameworks/intel/main.bro)
rest_target(${psd} base/frameworks/logging/main.bro)
rest_target(${psd} base/frameworks/logging/postprocessors/scp.bro)
rest_target(${psd} base/frameworks/logging/postprocessors/sftp.bro)
rest_target(${psd} base/frameworks/logging/writers/ascii.bro)
rest_target(${psd} base/frameworks/logging/writers/dataseries.bro)
rest_target(${psd} base/frameworks/metrics/cluster.bro)
rest_target(${psd} base/frameworks/metrics/main.bro)
rest_target(${psd} base/frameworks/metrics/non-cluster.bro)
rest_target(${psd} base/frameworks/notice/actions/add-geodata.bro)
rest_target(${psd} base/frameworks/notice/actions/drop.bro)
rest_target(${psd} base/frameworks/notice/actions/email_admin.bro)
rest_target(${psd} base/frameworks/notice/actions/page.bro)
rest_target(${psd} base/frameworks/notice/actions/pp-alarms.bro)
rest_target(${psd} base/frameworks/notice/cluster.bro)
rest_target(${psd} base/frameworks/notice/extend-email/hostnames.bro)
rest_target(${psd} base/frameworks/notice/main.bro)
rest_target(${psd} base/frameworks/notice/weird.bro)
rest_target(${psd} base/frameworks/packet-filter/main.bro)
rest_target(${psd} base/frameworks/packet-filter/netstats.bro)
rest_target(${psd} base/frameworks/reporter/main.bro)
rest_target(${psd} base/frameworks/signatures/main.bro)
rest_target(${psd} base/frameworks/software/main.bro)
rest_target(${psd} base/protocols/conn/contents.bro)
rest_target(${psd} base/protocols/conn/inactivity.bro)
rest_target(${psd} base/protocols/conn/main.bro)
rest_target(${psd} base/protocols/dns/consts.bro)
rest_target(${psd} base/protocols/dns/main.bro)
rest_target(${psd} base/protocols/ftp/file-extract.bro)
rest_target(${psd} base/protocols/ftp/main.bro)
rest_target(${psd} base/protocols/ftp/utils-commands.bro)
rest_target(${psd} base/protocols/http/file-extract.bro)
rest_target(${psd} base/protocols/http/file-hash.bro)
rest_target(${psd} base/protocols/http/file-ident.bro)
rest_target(${psd} base/protocols/http/main.bro)
rest_target(${psd} base/protocols/http/utils.bro)
rest_target(${psd} base/protocols/irc/dcc-send.bro)
rest_target(${psd} base/protocols/irc/main.bro)
rest_target(${psd} base/protocols/smtp/entities-excerpt.bro)
rest_target(${psd} base/protocols/smtp/entities.bro)
rest_target(${psd} base/protocols/smtp/main.bro)
rest_target(${psd} base/protocols/ssh/main.bro)
rest_target(${psd} base/protocols/ssl/consts.bro)
rest_target(${psd} base/protocols/ssl/main.bro)
rest_target(${psd} base/protocols/ssl/mozilla-ca-list.bro)
rest_target(${psd} base/protocols/syslog/consts.bro)
rest_target(${psd} base/protocols/syslog/main.bro)
rest_target(${psd} base/utils/addrs.bro)
rest_target(${psd} base/utils/conn-ids.bro)
rest_target(${psd} base/utils/directions-and-hosts.bro)
rest_target(${psd} base/utils/files.bro)
rest_target(${psd} base/utils/numbers.bro)
rest_target(${psd} base/utils/paths.bro)
rest_target(${psd} base/utils/patterns.bro)
rest_target(${psd} base/utils/site.bro)
rest_target(${psd} base/utils/strings.bro)
rest_target(${psd} base/utils/thresholds.bro)
rest_target(${psd} policy/frameworks/communication/listen.bro)
rest_target(${psd} policy/frameworks/control/controllee.bro)
rest_target(${psd} policy/frameworks/control/controller.bro)
rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro)
rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro)
rest_target(${psd} policy/frameworks/metrics/conn-example.bro)
rest_target(${psd} policy/frameworks/metrics/http-example.bro)
rest_target(${psd} policy/frameworks/metrics/ssl-example.bro)
rest_target(${psd} policy/frameworks/software/version-changes.bro)
rest_target(${psd} policy/frameworks/software/vulnerable.bro)
rest_target(${psd} policy/integration/barnyard2/main.bro)
rest_target(${psd} policy/integration/barnyard2/types.bro)
rest_target(${psd} policy/misc/analysis-groups.bro)
rest_target(${psd} policy/misc/capture-loss.bro)
rest_target(${psd} policy/misc/loaded-scripts.bro)
rest_target(${psd} policy/misc/profiling.bro)
rest_target(${psd} policy/misc/stats.bro)
rest_target(${psd} policy/misc/trim-trace-file.bro)
rest_target(${psd} policy/protocols/conn/known-hosts.bro)
rest_target(${psd} policy/protocols/conn/known-services.bro)
rest_target(${psd} policy/protocols/conn/weirds.bro)
rest_target(${psd} policy/protocols/dns/auth-addl.bro)
rest_target(${psd} policy/protocols/dns/detect-external-names.bro)
rest_target(${psd} policy/protocols/ftp/detect.bro)
rest_target(${psd} policy/protocols/ftp/software.bro)
rest_target(${psd} policy/protocols/http/detect-MHR.bro)
rest_target(${psd} policy/protocols/http/detect-intel.bro)
rest_target(${psd} policy/protocols/http/detect-sqli.bro)
rest_target(${psd} policy/protocols/http/detect-webapps.bro)
rest_target(${psd} policy/protocols/http/header-names.bro)
rest_target(${psd} policy/protocols/http/software-browser-plugins.bro)
rest_target(${psd} policy/protocols/http/software.bro)
rest_target(${psd} policy/protocols/http/var-extraction-cookies.bro)
rest_target(${psd} policy/protocols/http/var-extraction-uri.bro)
rest_target(${psd} policy/protocols/smtp/blocklists.bro)
rest_target(${psd} policy/protocols/smtp/detect-suspicious-orig.bro)
rest_target(${psd} policy/protocols/smtp/software.bro)
rest_target(${psd} policy/protocols/ssh/detect-bruteforcing.bro)
rest_target(${psd} policy/protocols/ssh/geo-data.bro)
rest_target(${psd} policy/protocols/ssh/interesting-hostnames.bro)
rest_target(${psd} policy/protocols/ssh/software.bro)
rest_target(${psd} policy/protocols/ssl/cert-hash.bro)
rest_target(${psd} policy/protocols/ssl/expiring-certs.bro)
rest_target(${psd} policy/protocols/ssl/extract-certs-pem.bro)
rest_target(${psd} policy/protocols/ssl/known-certs.bro)
rest_target(${psd} policy/protocols/ssl/validate-certs.bro)
rest_target(${psd} policy/tuning/defaults/packet-fragments.bro)
rest_target(${psd} policy/tuning/defaults/warnings.bro)
rest_target(${psd} policy/tuning/track-all-assets.bro)
rest_target(${psd} site/local-manager.bro)
rest_target(${psd} site/local-proxy.bro)
rest_target(${psd} site/local-worker.bro)
rest_target(${psd} site/local.bro)
rest_target(${psd} test-all-policy.bro)

View file

@ -61,7 +61,10 @@ distclean:
rm -rf $(BUILD) rm -rf $(BUILD)
test: test:
@(cd testing && make ) @( cd testing && make )
test-all: test
test -d aux/broctl && ( cd aux/broctl && make test )
configured: configured:
@test -d $(BUILD) || ( echo "Error: No build/ directory found. Did you run configure?" && exit 1 ) @test -d $(BUILD) || ( echo "Error: No build/ directory found. Did you run configure?" && exit 1 )

126
NEWS
View file

@ -17,6 +17,10 @@ New Functionality
- GridFTP support. TODO: Extend. - GridFTP support. TODO: Extend.
- Modbus support. TODO: Extend.
- DNP3 support. TODO: Extend.
- ssl.log now also records the subject client and issuer certificates. - ssl.log now also records the subject client and issuer certificates.
- Hooks: TODO: Briefly summarize the documention from - Hooks: TODO: Briefly summarize the documention from
@ -46,11 +50,103 @@ New Functionality
have changed their signatures to work with opaques types rather have changed their signatures to work with opaques types rather
than global state as it was before. than global state as it was before.
- The scripting language now supports a constructing sets, tables,
vectors, and records by name:
type MyRecordType: record {
c: count;
s: string &optional;
};
global r: MyRecordType = record($c = 7);
type MySet: set[MyRec];
global s = MySet([$c=1], [$c=2]);
- Strings now support the subscript operator to extract individual - Strings now support the subscript operator to extract individual
characters and substrings (e.g., s[4], s[1,5]). The index expression characters and substrings (e.g., s[4], s[1,5]). The index expression
can take up to two indices for the start and end index of the can take up to two indices for the start and end index of the
substring to return (e.g. "mystring[1,3]"). substring to return (e.g. "mystring[1,3]").
- Functions now support default parameters, e.g.:
global foo: function(s: string, t: string &default="abc", u: count &default=0);
- Scripts can now use two new "magic constants" @DIR and @FILENAME
that expand to the directory path of the current script and just the
script file name without path, respectively. (Jon Siwek)
- The new file analysis framework moves most of the processing of file
content from script-land into the core, where it belongs. See
doc/file-analysis.rst for more information.
Much of this is an internal change, but the framework also comes
with the following user-visibible functionality (some of that was
already available before, but done differently):
[TODO: Update with changes from 984e9793db56.]
- A binary input reader interfaces the input framework with file
analysis, allowing to inject files on disk into Bro's
processing.
- Supports for analyzing data transfereed via HTTP range
requests.
- HTTP:
* Identify MIME type of message.
* Extract message to disk.
* Compute MD5 for messages.
- SMTP:
* Identify MIME type of message.
* Extract message to disk.
* Compute MD5 for messages.
* Provide access to start of entity data.
- FTP data transfers: Identify MIME type; record to disk.
- IRC DCC transfers: Record to disk.
- New packet filter framework supports BPF-based load-balancing,
shunting, and sampling; plus plugin support to customize filters
dynamically.
- Bro now provides Bloom filters of two kinds: basic Bloom filters
supporting membership tests, and counting Bloom filters that track
the frequency of elements. The corresponding functions are:
bloomfilter_basic_init(fp: double, capacity: count, name: string &default=""): opaque of bloomfilter
bloomfilter_basic_init2(k: count, cells: count, name: string &default=""): opaque of bloomfilter
bloomfilter_counting_init(k: count, cells: count, max: count, name: string &default=""): opaque of bloomfilter
bloomfilter_add(bf: opaque of bloomfilter, x: any)
bloomfilter_lookup(bf: opaque of bloomfilter, x: any): count
bloomfilter_merge(bf1: opaque of bloomfilter, bf2: opaque of bloomfilter): opaque of bloomfilter
bloomfilter_clear(bf: opaque of bloomfilter)
See <INSERT LINK> for full documentation.
- Bro now provides a probabilistic data structure for computing
"top k" elements. The corresponding functions are:
topk_init(size: count): opaque of topk
topk_add(handle: opaque of topk, value: any)
topk_get_top(handle: opaque of topk, k: count)
topk_count(handle: opaque of topk, value: any): count
topk_epsilon(handle: opaque of topk, value: any): count
topk_size(handle: opaque of topk): count
topk_sum(handle: opaque of topk): count
topk_merge(handle1: opaque of topk, handle2: opaque of topk)
topk_merge_prune(handle1: opaque of topk, handle2: opaque of topk)
See <INSERT LINK> for full documentation.
- base/utils/exec.bro provides a module to start external processes
asynchronously and retrieve their output on termination.
base/utils/dir.bro uses it to monitor a directory for changes, and
base/utils/active-http.bro for providing an interface for querying
remote web servers.
Changed Functionality Changed Functionality
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
@ -126,6 +222,15 @@ Changed Functionality
- Removed the byte_len() and length() bif functions. Use the "|...|" - Removed the byte_len() and length() bif functions. Use the "|...|"
operator instead. operator instead.
- The SSH::Login notice has been superseded by an corresponding
intelligence framework observation (SSH::SUCCESSFUL_LOGIN).
- PacketFilter::all_packets has been replaced with
PacketFilter::enable_auto_protocol_capture_filters.
- We removed the BitTorrent DPD signatures pending further updates to
that analyzer.
Bro 2.1 Bro 2.1
------- -------
@ -209,6 +314,27 @@ New Functionality
outputs. We do not yet recommend them for production (but welcome outputs. We do not yet recommend them for production (but welcome
feedback!) feedback!)
- Summary statistics framework. [Extend]
- A number of new applications build on top of the summary statistics
framework:
* Scan detection: Detectors for port and address scans return. See
policy/misc/scan.bro.
* Tracerouter detector: policy/misc/detect-traceroute
* Web application detection/measurement: policy/misc/app-metrics.bro
* FTP brute-forcing detector: policy/protocols/ftp/detect-bruteforcing.bro
* HTTP-based SQL injection detector: policy/protocols/http/detect-sqli.bro
(existed before, but now ported to the new framework)
* SSH brute-forcing detector feeding the intelligence framework:
policy/protocols/ssh/detect-bruteforcing.bro
Changed Functionality Changed Functionality
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~

View file

@ -1 +1 @@
2.1-357 2.1-1050

@ -1 +1 @@
Subproject commit 72d121ade5a37df83d3252646de51cb77ce69a89 Subproject commit 74e6a5401c4228d5293c0e309283f43c389e7c12

@ -1 +1 @@
Subproject commit ae14da422bfb252c8a53bd00d3e5fd7da8bc112e Subproject commit 01bb93cb23f31a98fb400584e8d2f2fbe8a589ef

@ -1 +1 @@
Subproject commit e64204fec55759c614a276c1933bbff2069a63db Subproject commit 907210ce1470724fb386f939cc1b10a4caa2ae39

@ -1 +1 @@
Subproject commit 3d2172a60aa503745c92cef8ab3020d1dfc13f0d Subproject commit fd0e7e0b0cf50131efaf536a5683266cfe169455

@ -1 +1 @@
Subproject commit 7b5055a664f8f26f45fed2d10f9e900776f1bac0 Subproject commit d8537a474fbab63ddea3e7137b40f76d994d7957

View file

@ -12,7 +12,7 @@
broPolicies=${BRO_SCRIPT_SOURCE_PATH}:${BRO_SCRIPT_SOURCE_PATH}/policy:${BRO_SCRIPT_SOURCE_PATH}/site broPolicies=${BRO_SCRIPT_SOURCE_PATH}:${BRO_SCRIPT_SOURCE_PATH}/policy:${BRO_SCRIPT_SOURCE_PATH}/site
broGenPolicies=${CMAKE_BINARY_DIR}/src broGenPolicies=${CMAKE_BINARY_DIR}/scripts
installedPolicies=${BRO_SCRIPT_INSTALL_PATH}:${BRO_SCRIPT_INSTALL_PATH}/site installedPolicies=${BRO_SCRIPT_INSTALL_PATH}:${BRO_SCRIPT_INSTALL_PATH}/site

2
cmake

@ -1 +1 @@
Subproject commit 94e72a3075bb0b9550ad05758963afda394bfb2c Subproject commit 14537f56d66b18ab9d5024f798caf4d1f356fc67

View file

@ -89,8 +89,7 @@ Note the fields that are set for the filter:
are generated by taking the stream's ID and munging it slightly. are generated by taking the stream's ID and munging it slightly.
:bro:enum:`Conn::LOG` is converted into ``conn``, :bro:enum:`Conn::LOG` is converted into ``conn``,
:bro:enum:`PacketFilter::LOG` is converted into :bro:enum:`PacketFilter::LOG` is converted into
``packet_filter``, and :bro:enum:`Notice::POLICY_LOG` is ``packet_filter``.
converted into ``notice_policy``.
``include`` ``include``
A set limiting the fields to the ones given. The names A set limiting the fields to the ones given. The names

View file

@ -86,21 +86,21 @@ directly make modifications to the :bro:see:`Notice::Info` record
given as the argument to the hook. given as the argument to the hook.
Here's a simple example which tells Bro to send an email for all notices of Here's a simple example which tells Bro to send an email for all notices of
type :bro:see:`SSH::Login` if the server is 10.0.0.1: type :bro:see:`SSH::Password_Guessing` if the server is 10.0.0.1:
.. code:: bro .. code:: bro
hook Notice::policy(n: Notice::Info) hook Notice::policy(n: Notice::Info)
{ {
if ( n$note == SSH::Login && n$id$resp_h == 10.0.0.1 ) if ( n$note == SSH::Password_Guessing && n$id$resp_h == 10.0.0.1 )
add n$actions[Notice::ACTION_EMAIL]; add n$actions[Notice::ACTION_EMAIL];
} }
.. note:: .. note::
Keep in mind that the semantics of the SSH::Login notice are Keep in mind that the semantics of the SSH::Password_Guessing notice are
such that it is only raised when Bro heuristically detects a successful such that it is only raised when Bro heuristically detects a failed
login. No apparently failed logins will raise this notice. login.
Hooks can also have priorities applied to order their execution like events Hooks can also have priorities applied to order their execution like events
with a default priority of 0. Greater values are executed first. Setting with a default priority of 0. Greater values are executed first. Setting
@ -110,7 +110,7 @@ a hook body to run before default hook bodies might look like this:
hook Notice::policy(n: Notice::Info) &priority=5 hook Notice::policy(n: Notice::Info) &priority=5
{ {
if ( n$note == SSH::Login && n$id$resp_h == 10.0.0.1 ) if ( n$note == SSH::Password_Guessing && n$id$resp_h == 10.0.0.1 )
add n$actions[Notice::ACTION_EMAIL]; add n$actions[Notice::ACTION_EMAIL];
} }
@ -173,16 +173,16 @@ Raising Notices
A script should raise a notice for any occurrence that a user may want A script should raise a notice for any occurrence that a user may want
to be notified about or take action on. For example, whenever the base to be notified about or take action on. For example, whenever the base
SSH analysis scripts sees an SSH session where it is heuristically SSH analysis scripts sees enough failed logins to a given host, it
guessed to be a successful login, it raises a Notice of the type raises a notice of the type :bro:see:`SSH::Password_Guessing`. The code
:bro:see:`SSH::Login`. The code in the base SSH analysis script looks in the base SSH analysis script which raises the notice looks like this:
like this:
.. code:: bro .. code:: bro
NOTICE([$note=SSH::Login, NOTICE([$note=Password_Guessing,
$msg="Heuristically detected successful SSH login.", $msg=fmt("%s appears to be guessing SSH passwords (seen in %d connections).", key$host, r$num),
$conn=c]); $src=key$host,
$identifier=cat(key$host)]);
:bro:see:`NOTICE` is a normal function in the global namespace which :bro:see:`NOTICE` is a normal function in the global namespace which
wraps a function within the ``Notice`` namespace. It takes a single wraps a function within the ``Notice`` namespace. It takes a single

View file

@ -82,7 +82,8 @@ class BroGeneric(ObjectDescription):
objects = self.env.domaindata['bro']['objects'] objects = self.env.domaindata['bro']['objects']
key = (self.objtype, name) key = (self.objtype, name)
if key in objects: if ( key in objects and self.objtype != "id" and
self.objtype != "type" ):
self.env.warn(self.env.docname, self.env.warn(self.env.docname,
'duplicate description of %s %s, ' % 'duplicate description of %s %s, ' %
(self.objtype, name) + (self.objtype, name) +
@ -150,6 +151,12 @@ class BroEnum(BroGeneric):
#self.indexnode['entries'].append(('single', indextext, #self.indexnode['entries'].append(('single', indextext,
# targetname, targetname)) # targetname, targetname))
m = sig.split() m = sig.split()
if len(m) < 2:
self.env.warn(self.env.docname,
"bro:enum directive missing argument(s)")
return
if m[1] == "Notice::Type": if m[1] == "Notice::Type":
if 'notices' not in self.env.domaindata['bro']: if 'notices' not in self.env.domaindata['bro']:
self.env.domaindata['bro']['notices'] = [] self.env.domaindata['bro']['notices'] = []

185
doc/file-analysis.rst Normal file
View file

@ -0,0 +1,185 @@
=============
File Analysis
=============
.. rst-class:: opening
In the past, writing Bro scripts with the intent of analyzing file
content could be cumbersome because of the fact that the content
would be presented in different ways, via events, at the
script-layer depending on which network protocol was involved in the
file transfer. Scripts written to analyze files over one protocol
would have to be copied and modified to fit other protocols. The
file analysis framework (FAF) instead provides a generalized
presentation of file-related information. The information regarding
the protocol involved in transporting a file over the network is
still available, but it no longer has to dictate how one organizes
their scripting logic to handle it. A goal of the FAF is to
provide analysis specifically for files that is analogous to the
analysis Bro provides for network connections.
.. contents::
File Lifecycle Events
=====================
The key events that may occur during the lifetime of a file are:
:bro:see:`file_new`, :bro:see:`file_over_new_connection`,
:bro:see:`file_timeout`, :bro:see:`file_gap`, and
:bro:see:`file_state_remove`. Handling any of these events provides
some information about the file such as which network
:bro:see:`connection` and protocol are transporting the file, how many
bytes have been transferred so far, and its MIME type.
.. code:: bro
event connection_state_remove(c: connection)
{
print "connection_state_remove";
print c$uid;
print c$id;
for ( s in c$service )
print s;
}
event file_state_remove(f: fa_file)
{
print "file_state_remove";
print f$id;
for ( cid in f$conns )
{
print f$conns[cid]$uid;
print cid;
}
print f$source;
}
might give output like::
file_state_remove
Cx92a0ym5R8
REs2LQfVW2j
[orig_h=10.0.0.7, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp]
HTTP
connection_state_remove
REs2LQfVW2j
[orig_h=10.0.0.7, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp]
HTTP
This doesn't perform any interesting analysis yet, but does highlight
the similarity between analysis of connections and files. Connections
are identified by the usual 5-tuple or a convenient UID string while
files are identified just by a string of the same format as the
connection UID. So there's unique ways to identify both files and
connections and files hold references to a connection (or connections)
that transported it.
Adding Analysis
===============
There are builtin file analyzers which can be attached to files. Once
attached, they start receiving the contents of the file as Bro extracts
it from an ongoing network connection. What they do with the file
contents is up to the particular file analyzer implementation, but
they'll typically either report further information about the file via
events (e.g. :bro:see:`Files::ANALYZER_MD5` will report the
file's MD5 checksum via :bro:see:`file_hash` once calculated) or they'll
have some side effect (e.g. :bro:see:`Files::ANALYZER_EXTRACT`
will write the contents of the file out to the local file system).
In the future there may be file analyzers that automatically attach to
files based on heuristics, similar to the Dynamic Protocol Detection
(DPD) framework for connections, but many will always require an
explicit attachment decision:
.. code:: bro
event file_new(f: fa_file)
{
print "new file", f$id;
if ( f?$mime_type && f$mime_type == "text/plain" )
Files::add_analyzer(f, Files::ANALYZER_MD5);
}
event file_hash(f: fa_file, kind: string, hash: string)
{
print "file_hash", f$id, kind, hash;
}
this script calculates MD5s for all plain text files and might give
output::
new file, Cx92a0ym5R8
file_hash, Cx92a0ym5R8, md5, 397168fd09991a0e712254df7bc639ac
Some file analyzers might have tunable parameters that need to be
specified in the call to :bro:see:`Files::add_analyzer`:
.. code:: bro
event file_new(f: fa_file)
{
Files::add_analyzer(f, Files::ANALYZER_EXTRACT,
[$extract_filename="myfile"]);
}
In this case, the file extraction analyzer doesn't generate any further
events, but does have the effect of writing out the file contents to the
local file system at the location resulting from the concatenation of
the path specified by :bro:see:`FileExtract::prefix` and the string,
``myfile``. Of course, for a network with more than a single file being
transferred, it's probably preferable to specify a different extraction
path for each file, unlike this example.
Regardless of which file analyzers end up acting on a file, general
information about the file (e.g. size, time of last data transferred,
MIME type, etc.) are logged in ``files.log``.
Input Framework Integration
===========================
The FAF comes with a simple way to integrate with the :doc:`Input
Framework <input>`, so that Bro can analyze files from external sources
in the same way it analyzes files that it sees coming over traffic from
a network interface it's monitoring. It only requires a call to
:bro:see:`Input::add_analysis`:
.. code:: bro
redef exit_only_after_terminate = T;
event file_new(f: fa_file)
{
print "new file", f$id;
Files::add_analyzer(f, Files::ANALYZER_MD5);
}
event file_state_remove(f: fa_file)
{
Input::remove(f$source);
terminate();
}
event file_hash(f: fa_file, kind: string, hash: string)
{
print "file_hash", f$id, kind, hash;
}
event bro_init()
{
local source: string = "./myfile";
Input::add_analysis([$source=source, $name=source]);
}
Note that the "source" field of :bro:see:`fa_file` corresponds to the
"name" field of :bro:see:`Input::AnalysisDescription` since that is what
the input framework uses to uniquely identify an input stream.
The output of the above script may be::
new file, G1fS2xthS4l
file_hash, G1fS2xthS4l, md5, 54098b367d2e87b078671fad4afb9dbb
Nothing that special, but it at least verifies the MD5 file analyzer
saw all the bytes of the input file and calculated the checksum
correctly!

View file

@ -29,10 +29,7 @@ Quick Start
Load the package of scripts that sends data into the Intelligence Load the package of scripts that sends data into the Intelligence
Framework to be checked by loading this script in local.bro:: Framework to be checked by loading this script in local.bro::
@load policy/frameworks/intel @load policy/frameworks/intel/seen
(TODO: find some good mechanism for getting setup with good data
quickly)
Refer to the "Loading Intelligence" section below to see the format Refer to the "Loading Intelligence" section below to see the format
for Intelligence Framework text files, then load those text files with for Intelligence Framework text files, then load those text files with
@ -63,16 +60,14 @@ data out to all of the nodes that need it.
Here is an example of the intelligence data format. Note that all Here is an example of the intelligence data format. Note that all
whitespace separators are literal tabs and fields containing only a whitespace separators are literal tabs and fields containing only a
hyphen a considered to be null values.:: hyphen are considered to be null values.::
#fields host net str str_type meta.source meta.desc meta.url #fields indicator indicator_type meta.source meta.desc meta.url
1.2.3.4 - - - source1 Sending phishing email http://source1.com/badhosts/1.2.3.4 1.2.3.4 Intel::ADDR source1 Sending phishing email http://source1.com/badhosts/1.2.3.4
- 31.131.248.0/21 - - spamhaus-drop SBL154982 - - a.b.com Intel::DOMAIN source2 Name used for data exfiltration -
- - a.b.com Intel::DOMAIN source2 Name used for data exfiltration -
For more examples of built in `str_type` values, please refer to the For more examples of built in `indicator_type` values, please refer to the
autogenerated documentation for the intelligence framework (TODO: autogenerated documentation for the intelligence framework.
figure out how to do this link).
To load the data once files are created, use the following example To load the data once files are created, use the following example
code to define files to load with your own file names of course:: code to define files to load with your own file names of course::
@ -92,8 +87,7 @@ When some bit of data is extracted (such as an email address in the
"From" header in a message over SMTP), the Intelligence Framework "From" header in a message over SMTP), the Intelligence Framework
needs to be informed that this data was discovered and it's presence needs to be informed that this data was discovered and it's presence
should be checked within the intelligence data set. This is should be checked within the intelligence data set. This is
accomplished through the Intel::seen (TODO: do a reference link) accomplished through the Intel::seen function.
function.
Typically users won't need to work with this function due to built in Typically users won't need to work with this function due to built in
hook scripts that Bro ships with that will "see" data and send it into hook scripts that Bro ships with that will "see" data and send it into
@ -108,7 +102,7 @@ The full package of hook scripts that Bro ships with for sending this
"seen" data into the intelligence framework can be loading by adding "seen" data into the intelligence framework can be loading by adding
this line to local.bro:: this line to local.bro::
@load policy/frameworks/intel @load policy/frameworks/intel/seen
Intelligence Matches Intelligence Matches
******************** ********************

View file

@ -5,6 +5,34 @@
Bro Documentation Bro Documentation
================= =================
Guides
------
.. toctree::
:maxdepth: 1
INSTALL
upgrade
quickstart
faq
reporting-problems
Frameworks
----------
.. toctree::
:maxdepth: 1
notice
logging
input
file-analysis
cluster
signatures
How-Tos
-------
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
@ -23,7 +51,11 @@ Just Testing
.. code:: bro .. code:: bro
print "Hey Bro!" scripts/packages
scripts/index
scripts/builtins
scripts/proto-analyzers
scripts/file-analyzers
.. btest:: test .. btest:: test

View file

@ -15,11 +15,11 @@ endif ()
# #
# srcDir: the directory which contains broInput # srcDir: the directory which contains broInput
# broInput: the file name of a bro policy script, any path prefix of this # broInput: the file name of a bro policy script, any path prefix of this
# argument will be used to derive what path under policy/ the generated # argument will be used to derive what path under scripts/ the generated
# documentation will be placed. # documentation will be placed.
# group: optional name of group that the script documentation will belong to. # group: optional name of group that the script documentation will belong to.
# If this is not given, .bif files automatically get their own group or # If this is not given, the group is automatically set to any path portion
# the group is automatically by any path portion of the broInput argument. # of the broInput argument.
# #
# In addition to adding the makefile target, several CMake variables are set: # In addition to adding the makefile target, several CMake variables are set:
# #
@ -45,12 +45,6 @@ macro(REST_TARGET srcDir broInput)
set(sumTextSrc ${absSrcPath}) set(sumTextSrc ${absSrcPath})
set(ogSourceFile ${absSrcPath}) set(ogSourceFile ${absSrcPath})
if (${extension} STREQUAL ".bif.bro")
set(ogSourceFile ${BIF_SRC_DIR}/${basename})
# the summary text is taken at configure time, but .bif.bro files
# may not have been generated yet, so read .bif file instead
set(sumTextSrc ${ogSourceFile})
endif ()
if (NOT relDstDir) if (NOT relDstDir)
set(docName "${basename}") set(docName "${basename}")
@ -70,8 +64,6 @@ macro(REST_TARGET srcDir broInput)
if (NOT "${ARGN}" STREQUAL "") if (NOT "${ARGN}" STREQUAL "")
set(group ${ARGN}) set(group ${ARGN})
elseif (${extension} STREQUAL ".bif.bro")
set(group bifs)
elseif (relDstDir) elseif (relDstDir)
set(group ${relDstDir}/index) set(group ${relDstDir}/index)
# add package index to master package list if not already in it # add package index to master package list if not already in it
@ -107,7 +99,7 @@ macro(REST_TARGET srcDir broInput)
COMMAND "${CMAKE_COMMAND}" COMMAND "${CMAKE_COMMAND}"
ARGS -E remove_directory .state ARGS -E remove_directory .state
# generate the reST documentation using bro # generate the reST documentation using bro
COMMAND BROPATH=${BROPATH}:${srcDir} ${CMAKE_BINARY_DIR}/src/bro COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
ARGS -b -Z ${broInput} || (rm -rf .state *.log *.rst && exit 1) ARGS -b -Z ${broInput} || (rm -rf .state *.log *.rst && exit 1)
# move generated doc into a new directory tree that # move generated doc into a new directory tree that
# defines the final structure of documents # defines the final structure of documents
@ -132,6 +124,35 @@ endmacro(REST_TARGET)
# Schedule Bro scripts for which to generate documentation. # Schedule Bro scripts for which to generate documentation.
include(DocSourcesList.cmake) include(DocSourcesList.cmake)
# Macro for generating reST docs that are independent of any particular Bro
# script.
macro(INDEPENDENT_REST_TARGET reST_file)
add_custom_command(OUTPUT ${reST_file}
# delete any leftover state from previous bro runs
COMMAND "${CMAKE_COMMAND}"
ARGS -E remove_directory .state
# generate the reST documentation using bro
COMMAND BROPATH=${BROPATH}:${srcDir} BROMAGIC=${CMAKE_SOURCE_DIR}/magic/database ${CMAKE_BINARY_DIR}/src/bro
ARGS -b -Z base/init-bare.bro || (rm -rf .state *.log *.rst && exit 1)
# move generated doc into a new directory tree that
# defines the final structure of documents
COMMAND "${CMAKE_COMMAND}"
ARGS -E make_directory ${dstDir}
COMMAND "${CMAKE_COMMAND}"
ARGS -E copy ${reST_file} ${dstDir}
# clean up the build directory
COMMAND rm
ARGS -rf .state *.log *.rst
DEPENDS bro
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "[Bro] Generating reST docs for ${reST_file}"
)
list(APPEND ALL_REST_OUTPUTS ${reST_file})
endmacro(INDEPENDENT_REST_TARGET)
independent_rest_target(proto-analyzers.rst)
independent_rest_target(file-analyzers.rst)
# create temporary list of all docs to include in the master policy/index file # create temporary list of all docs to include in the master policy/index file
file(WRITE ${MASTER_POLICY_INDEX} "${MASTER_POLICY_INDEX_TEXT}") file(WRITE ${MASTER_POLICY_INDEX} "${MASTER_POLICY_INDEX_TEXT}")

View file

@ -16,14 +16,69 @@ rest_target(${CMAKE_CURRENT_SOURCE_DIR} example.bro internal)
rest_target(${psd} base/init-default.bro internal) rest_target(${psd} base/init-default.bro internal)
rest_target(${psd} base/init-bare.bro internal) rest_target(${psd} base/init-bare.bro internal)
rest_target(${CMAKE_BINARY_DIR}/src base/bro.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/analyzer.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/const.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/bloom-filter.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/event.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/bro.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/input.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/const.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/logging.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/event.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/reporter.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/file_analysis.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/strings.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/input.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/src base/types.bif.bro) rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/logging.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ARP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_AYIYA.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_BackDoor.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_BitTorrent.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ConnSize.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_DCE_RPC.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_DHCP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_DNP3.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_DNS.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_FTP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_FTP.functions.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_File.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_FileHash.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Finger.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_GTPv1.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Gnutella.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_HTTP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_HTTP.functions.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ICMP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_IRC.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Ident.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_InterConn.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Login.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Login.functions.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_MIME.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Modbus.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_NCP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_NTP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_NetBIOS.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_NetBIOS.functions.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_NetFlow.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_PIA.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_POP3.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_RPC.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SMB.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SMTP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SMTP.functions.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SOCKS.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SSH.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SSL.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SSL.functions.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_SteppingStone.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Syslog.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_TCP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_TCP.functions.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_Teredo.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_UDP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/plugins/Bro_ZIP.events.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/reporter.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/strings.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/top-k.bif.bro)
rest_target(${CMAKE_BINARY_DIR}/scripts base/bif/types.bif.bro)
rest_target(${psd} base/files/extract/main.bro)
rest_target(${psd} base/files/hash/main.bro)
rest_target(${psd} base/frameworks/analyzer/main.bro)
rest_target(${psd} base/frameworks/cluster/main.bro) rest_target(${psd} base/frameworks/cluster/main.bro)
rest_target(${psd} base/frameworks/cluster/nodes/manager.bro) rest_target(${psd} base/frameworks/cluster/nodes/manager.bro)
rest_target(${psd} base/frameworks/cluster/nodes/proxy.bro) rest_target(${psd} base/frameworks/cluster/nodes/proxy.bro)
@ -32,10 +87,13 @@ rest_target(${psd} base/frameworks/cluster/setup-connections.bro)
rest_target(${psd} base/frameworks/communication/main.bro) rest_target(${psd} base/frameworks/communication/main.bro)
rest_target(${psd} base/frameworks/control/main.bro) rest_target(${psd} base/frameworks/control/main.bro)
rest_target(${psd} base/frameworks/dpd/main.bro) rest_target(${psd} base/frameworks/dpd/main.bro)
rest_target(${psd} base/frameworks/files/main.bro)
rest_target(${psd} base/frameworks/input/main.bro) rest_target(${psd} base/frameworks/input/main.bro)
rest_target(${psd} base/frameworks/input/readers/ascii.bro) rest_target(${psd} base/frameworks/input/readers/ascii.bro)
rest_target(${psd} base/frameworks/input/readers/benchmark.bro) rest_target(${psd} base/frameworks/input/readers/benchmark.bro)
rest_target(${psd} base/frameworks/input/readers/binary.bro)
rest_target(${psd} base/frameworks/input/readers/raw.bro) rest_target(${psd} base/frameworks/input/readers/raw.bro)
rest_target(${psd} base/frameworks/input/readers/sqlite.bro)
rest_target(${psd} base/frameworks/intel/cluster.bro) rest_target(${psd} base/frameworks/intel/cluster.bro)
rest_target(${psd} base/frameworks/intel/input.bro) rest_target(${psd} base/frameworks/intel/input.bro)
rest_target(${psd} base/frameworks/intel/main.bro) rest_target(${psd} base/frameworks/intel/main.bro)
@ -46,9 +104,7 @@ rest_target(${psd} base/frameworks/logging/writers/ascii.bro)
rest_target(${psd} base/frameworks/logging/writers/dataseries.bro) rest_target(${psd} base/frameworks/logging/writers/dataseries.bro)
rest_target(${psd} base/frameworks/logging/writers/elasticsearch.bro) rest_target(${psd} base/frameworks/logging/writers/elasticsearch.bro)
rest_target(${psd} base/frameworks/logging/writers/none.bro) rest_target(${psd} base/frameworks/logging/writers/none.bro)
rest_target(${psd} base/frameworks/metrics/cluster.bro) rest_target(${psd} base/frameworks/logging/writers/sqlite.bro)
rest_target(${psd} base/frameworks/metrics/main.bro)
rest_target(${psd} base/frameworks/metrics/non-cluster.bro)
rest_target(${psd} base/frameworks/notice/actions/add-geodata.bro) rest_target(${psd} base/frameworks/notice/actions/add-geodata.bro)
rest_target(${psd} base/frameworks/notice/actions/drop.bro) rest_target(${psd} base/frameworks/notice/actions/drop.bro)
rest_target(${psd} base/frameworks/notice/actions/email_admin.bro) rest_target(${psd} base/frameworks/notice/actions/email_admin.bro)
@ -61,32 +117,53 @@ rest_target(${psd} base/frameworks/notice/non-cluster.bro)
rest_target(${psd} base/frameworks/notice/weird.bro) rest_target(${psd} base/frameworks/notice/weird.bro)
rest_target(${psd} base/frameworks/packet-filter/main.bro) rest_target(${psd} base/frameworks/packet-filter/main.bro)
rest_target(${psd} base/frameworks/packet-filter/netstats.bro) rest_target(${psd} base/frameworks/packet-filter/netstats.bro)
rest_target(${psd} base/frameworks/packet-filter/utils.bro)
rest_target(${psd} base/frameworks/reporter/main.bro) rest_target(${psd} base/frameworks/reporter/main.bro)
rest_target(${psd} base/frameworks/signatures/main.bro) rest_target(${psd} base/frameworks/signatures/main.bro)
rest_target(${psd} base/frameworks/software/main.bro) rest_target(${psd} base/frameworks/software/main.bro)
rest_target(${psd} base/frameworks/sumstats/cluster.bro)
rest_target(${psd} base/frameworks/sumstats/main.bro)
rest_target(${psd} base/frameworks/sumstats/non-cluster.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/average.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/last.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/max.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/min.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/sample.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/std-dev.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/sum.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/topk.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/unique.bro)
rest_target(${psd} base/frameworks/sumstats/plugins/variance.bro)
rest_target(${psd} base/frameworks/tunnels/main.bro) rest_target(${psd} base/frameworks/tunnels/main.bro)
rest_target(${psd} base/misc/find-checksum-offloading.bro) rest_target(${psd} base/misc/find-checksum-offloading.bro)
rest_target(${psd} base/protocols/conn/contents.bro) rest_target(${psd} base/protocols/conn/contents.bro)
rest_target(${psd} base/protocols/conn/inactivity.bro) rest_target(${psd} base/protocols/conn/inactivity.bro)
rest_target(${psd} base/protocols/conn/main.bro) rest_target(${psd} base/protocols/conn/main.bro)
rest_target(${psd} base/protocols/conn/polling.bro) rest_target(${psd} base/protocols/conn/polling.bro)
rest_target(${psd} base/protocols/dhcp/consts.bro)
rest_target(${psd} base/protocols/dhcp/main.bro)
rest_target(${psd} base/protocols/dhcp/utils.bro)
rest_target(${psd} base/protocols/dnp3/consts.bro)
rest_target(${psd} base/protocols/dnp3/main.bro)
rest_target(${psd} base/protocols/dns/consts.bro) rest_target(${psd} base/protocols/dns/consts.bro)
rest_target(${psd} base/protocols/dns/main.bro) rest_target(${psd} base/protocols/dns/main.bro)
rest_target(${psd} base/protocols/ftp/file-extract.bro) rest_target(${psd} base/protocols/ftp/files.bro)
rest_target(${psd} base/protocols/ftp/gridftp.bro) rest_target(${psd} base/protocols/ftp/gridftp.bro)
rest_target(${psd} base/protocols/ftp/info.bro)
rest_target(${psd} base/protocols/ftp/main.bro) rest_target(${psd} base/protocols/ftp/main.bro)
rest_target(${psd} base/protocols/ftp/utils-commands.bro) rest_target(${psd} base/protocols/ftp/utils-commands.bro)
rest_target(${psd} base/protocols/http/file-extract.bro) rest_target(${psd} base/protocols/ftp/utils.bro)
rest_target(${psd} base/protocols/http/file-hash.bro) rest_target(${psd} base/protocols/http/entities.bro)
rest_target(${psd} base/protocols/http/file-ident.bro) rest_target(${psd} base/protocols/http/files.bro)
rest_target(${psd} base/protocols/http/main.bro) rest_target(${psd} base/protocols/http/main.bro)
rest_target(${psd} base/protocols/http/utils.bro) rest_target(${psd} base/protocols/http/utils.bro)
rest_target(${psd} base/protocols/irc/dcc-send.bro) rest_target(${psd} base/protocols/irc/dcc-send.bro)
rest_target(${psd} base/protocols/irc/files.bro)
rest_target(${psd} base/protocols/irc/main.bro) rest_target(${psd} base/protocols/irc/main.bro)
rest_target(${psd} base/protocols/modbus/consts.bro) rest_target(${psd} base/protocols/modbus/consts.bro)
rest_target(${psd} base/protocols/modbus/main.bro) rest_target(${psd} base/protocols/modbus/main.bro)
rest_target(${psd} base/protocols/smtp/entities-excerpt.bro)
rest_target(${psd} base/protocols/smtp/entities.bro) rest_target(${psd} base/protocols/smtp/entities.bro)
rest_target(${psd} base/protocols/smtp/files.bro)
rest_target(${psd} base/protocols/smtp/main.bro) rest_target(${psd} base/protocols/smtp/main.bro)
rest_target(${psd} base/protocols/socks/consts.bro) rest_target(${psd} base/protocols/socks/consts.bro)
rest_target(${psd} base/protocols/socks/main.bro) rest_target(${psd} base/protocols/socks/main.bro)
@ -96,53 +173,70 @@ rest_target(${psd} base/protocols/ssl/main.bro)
rest_target(${psd} base/protocols/ssl/mozilla-ca-list.bro) rest_target(${psd} base/protocols/ssl/mozilla-ca-list.bro)
rest_target(${psd} base/protocols/syslog/consts.bro) rest_target(${psd} base/protocols/syslog/consts.bro)
rest_target(${psd} base/protocols/syslog/main.bro) rest_target(${psd} base/protocols/syslog/main.bro)
rest_target(${psd} base/utils/active-http.bro)
rest_target(${psd} base/utils/addrs.bro) rest_target(${psd} base/utils/addrs.bro)
rest_target(${psd} base/utils/conn-ids.bro) rest_target(${psd} base/utils/conn-ids.bro)
rest_target(${psd} base/utils/dir.bro)
rest_target(${psd} base/utils/directions-and-hosts.bro) rest_target(${psd} base/utils/directions-and-hosts.bro)
rest_target(${psd} base/utils/exec.bro)
rest_target(${psd} base/utils/files.bro) rest_target(${psd} base/utils/files.bro)
rest_target(${psd} base/utils/numbers.bro) rest_target(${psd} base/utils/numbers.bro)
rest_target(${psd} base/utils/paths.bro) rest_target(${psd} base/utils/paths.bro)
rest_target(${psd} base/utils/patterns.bro) rest_target(${psd} base/utils/patterns.bro)
rest_target(${psd} base/utils/queue.bro)
rest_target(${psd} base/utils/site.bro) rest_target(${psd} base/utils/site.bro)
rest_target(${psd} base/utils/strings.bro) rest_target(${psd} base/utils/strings.bro)
rest_target(${psd} base/utils/thresholds.bro) rest_target(${psd} base/utils/thresholds.bro)
rest_target(${psd} base/utils/time.bro)
rest_target(${psd} base/utils/urls.bro) rest_target(${psd} base/utils/urls.bro)
rest_target(${psd} policy/frameworks/communication/listen.bro) rest_target(${psd} policy/frameworks/communication/listen.bro)
rest_target(${psd} policy/frameworks/control/controllee.bro) rest_target(${psd} policy/frameworks/control/controllee.bro)
rest_target(${psd} policy/frameworks/control/controller.bro) rest_target(${psd} policy/frameworks/control/controller.bro)
rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro) rest_target(${psd} policy/frameworks/dpd/detect-protocols.bro)
rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro) rest_target(${psd} policy/frameworks/dpd/packet-segment-logging.bro)
rest_target(${psd} policy/frameworks/intel/conn-established.bro) rest_target(${psd} policy/frameworks/files/detect-MHR.bro)
rest_target(${psd} policy/frameworks/intel/dns.bro) rest_target(${psd} policy/frameworks/files/hash-all-files.bro)
rest_target(${psd} policy/frameworks/intel/http-host-header.bro) rest_target(${psd} policy/frameworks/intel/do_notice.bro)
rest_target(${psd} policy/frameworks/intel/http-url.bro) rest_target(${psd} policy/frameworks/intel/seen/conn-established.bro)
rest_target(${psd} policy/frameworks/intel/http-user-agents.bro) rest_target(${psd} policy/frameworks/intel/seen/dns.bro)
rest_target(${psd} policy/frameworks/intel/smtp-url-extraction.bro) rest_target(${psd} policy/frameworks/intel/seen/http-host-header.bro)
rest_target(${psd} policy/frameworks/intel/smtp.bro) rest_target(${psd} policy/frameworks/intel/seen/http-url.bro)
rest_target(${psd} policy/frameworks/intel/ssl.bro) rest_target(${psd} policy/frameworks/intel/seen/http-user-agents.bro)
rest_target(${psd} policy/frameworks/intel/where-locations.bro) rest_target(${psd} policy/frameworks/intel/seen/smtp-url-extraction.bro)
rest_target(${psd} policy/frameworks/metrics/conn-example.bro) rest_target(${psd} policy/frameworks/intel/seen/smtp.bro)
rest_target(${psd} policy/frameworks/metrics/http-example.bro) rest_target(${psd} policy/frameworks/intel/seen/ssl.bro)
rest_target(${psd} policy/frameworks/metrics/ssl-example.bro) rest_target(${psd} policy/frameworks/intel/seen/where-locations.bro)
rest_target(${psd} policy/frameworks/packet-filter/shunt.bro)
rest_target(${psd} policy/frameworks/software/version-changes.bro) rest_target(${psd} policy/frameworks/software/version-changes.bro)
rest_target(${psd} policy/frameworks/software/vulnerable.bro) rest_target(${psd} policy/frameworks/software/vulnerable.bro)
rest_target(${psd} policy/integration/barnyard2/main.bro) rest_target(${psd} policy/integration/barnyard2/main.bro)
rest_target(${psd} policy/integration/barnyard2/types.bro) rest_target(${psd} policy/integration/barnyard2/types.bro)
rest_target(${psd} policy/integration/collective-intel/main.bro) rest_target(${psd} policy/integration/collective-intel/main.bro)
rest_target(${psd} policy/misc/analysis-groups.bro) rest_target(${psd} policy/misc/app-stats/main.bro)
rest_target(${psd} policy/misc/app-stats/plugins/facebook.bro)
rest_target(${psd} policy/misc/app-stats/plugins/gmail.bro)
rest_target(${psd} policy/misc/app-stats/plugins/google.bro)
rest_target(${psd} policy/misc/app-stats/plugins/netflix.bro)
rest_target(${psd} policy/misc/app-stats/plugins/pandora.bro)
rest_target(${psd} policy/misc/app-stats/plugins/youtube.bro)
rest_target(${psd} policy/misc/capture-loss.bro) rest_target(${psd} policy/misc/capture-loss.bro)
rest_target(${psd} policy/misc/detect-traceroute/main.bro)
rest_target(${psd} policy/misc/known-devices.bro)
rest_target(${psd} policy/misc/load-balancing.bro)
rest_target(${psd} policy/misc/loaded-scripts.bro) rest_target(${psd} policy/misc/loaded-scripts.bro)
rest_target(${psd} policy/misc/profiling.bro) rest_target(${psd} policy/misc/profiling.bro)
rest_target(${psd} policy/misc/scan.bro)
rest_target(${psd} policy/misc/stats.bro) rest_target(${psd} policy/misc/stats.bro)
rest_target(${psd} policy/misc/trim-trace-file.bro) rest_target(${psd} policy/misc/trim-trace-file.bro)
rest_target(${psd} policy/protocols/conn/known-hosts.bro) rest_target(${psd} policy/protocols/conn/known-hosts.bro)
rest_target(${psd} policy/protocols/conn/known-services.bro) rest_target(${psd} policy/protocols/conn/known-services.bro)
rest_target(${psd} policy/protocols/conn/weirds.bro) rest_target(${psd} policy/protocols/conn/weirds.bro)
rest_target(${psd} policy/protocols/dhcp/known-devices-and-hostnames.bro)
rest_target(${psd} policy/protocols/dns/auth-addl.bro) rest_target(${psd} policy/protocols/dns/auth-addl.bro)
rest_target(${psd} policy/protocols/dns/detect-external-names.bro) rest_target(${psd} policy/protocols/dns/detect-external-names.bro)
rest_target(${psd} policy/protocols/ftp/detect-bruteforcing.bro)
rest_target(${psd} policy/protocols/ftp/detect.bro) rest_target(${psd} policy/protocols/ftp/detect.bro)
rest_target(${psd} policy/protocols/ftp/software.bro) rest_target(${psd} policy/protocols/ftp/software.bro)
rest_target(${psd} policy/protocols/http/detect-MHR.bro)
rest_target(${psd} policy/protocols/http/detect-sqli.bro) rest_target(${psd} policy/protocols/http/detect-sqli.bro)
rest_target(${psd} policy/protocols/http/detect-webapps.bro) rest_target(${psd} policy/protocols/http/detect-webapps.bro)
rest_target(${psd} policy/protocols/http/header-names.bro) rest_target(${psd} policy/protocols/http/header-names.bro)
@ -154,6 +248,7 @@ rest_target(${psd} policy/protocols/modbus/known-masters-slaves.bro)
rest_target(${psd} policy/protocols/modbus/track-memmap.bro) rest_target(${psd} policy/protocols/modbus/track-memmap.bro)
rest_target(${psd} policy/protocols/smtp/blocklists.bro) rest_target(${psd} policy/protocols/smtp/blocklists.bro)
rest_target(${psd} policy/protocols/smtp/detect-suspicious-orig.bro) rest_target(${psd} policy/protocols/smtp/detect-suspicious-orig.bro)
rest_target(${psd} policy/protocols/smtp/entities-excerpt.bro)
rest_target(${psd} policy/protocols/smtp/software.bro) rest_target(${psd} policy/protocols/smtp/software.bro)
rest_target(${psd} policy/protocols/ssh/detect-bruteforcing.bro) rest_target(${psd} policy/protocols/ssh/detect-bruteforcing.bro)
rest_target(${psd} policy/protocols/ssh/geo-data.bro) rest_target(${psd} policy/protocols/ssh/geo-data.bro)

View file

@ -1,5 +0,0 @@
.. This is a stub doc to which broxygen appends during the build process
Built-In Functions (BIFs)
=========================

View file

@ -329,6 +329,31 @@ The Bro scripting language supports the following built-in types.
[5] = "five", [5] = "five",
}; };
A table constructor (equivalent to above example) can also be used
to create a table:
.. code:: bro
global t2: table[count] of string = table(
[11] = "eleven",
[5] = "five"
);
Table constructors can also be explicitly named by a type, which is
useful for when a more complex index type could otherwise be
ambiguous:
.. code:: bro
type MyRec: record {
a: count &optional;
b: count;
};
type MyTable: table[MyRec] of string;
global t3 = MyTable([[$b=5]] = "b5", [[$b=7]] = "b7");
Accessing table elements is provided by enclosing index values within Accessing table elements is provided by enclosing index values within
square brackets (``[]``), for example: square brackets (``[]``), for example:
@ -397,6 +422,28 @@ The Bro scripting language supports the following built-in types.
The types are explicitly shown in the example above, but they could The types are explicitly shown in the example above, but they could
have been left to type inference. have been left to type inference.
A set constructor (equivalent to above example) can also be used to
create a set:
.. code:: bro
global s3: set[port] = set(21/tcp, 23/tcp, 80/tcp, 443/tcp);
Set constructors can also be explicitly named by a type, which is
useful for when a more complex index type could otherwise be
ambiguous:
.. code:: bro
type MyRec: record {
a: count &optional;
b: count;
};
type MySet: set[MyRec];
global s4 = MySet([$b=1], [$b=2]);
Set membership is tested with ``in`` or ``!in``: Set membership is tested with ``in`` or ``!in``:
.. code:: bro .. code:: bro
@ -404,6 +451,9 @@ The Bro scripting language supports the following built-in types.
if ( 21/tcp in s ) if ( 21/tcp in s )
... ...
if ( 21/tcp !in s )
...
Iterate over a set with a ``for`` loop: Iterate over a set with a ``for`` loop:
.. code:: bro .. code:: bro
@ -453,6 +503,21 @@ The Bro scripting language supports the following built-in types.
global v: vector of string = vector("one", "two", "three"); global v: vector of string = vector("one", "two", "three");
Vector constructors can also be explicitly named by a type, which
is useful for when a more complex yield type could otherwise be
ambiguous.
.. code:: bro
type MyRec: record {
a: count &optional;
b: count;
};
type MyVec: vector of MyRec;
global v2 = MyVec([$b=1], [$b=2], [$b=3]);
Accessing vector elements is provided by enclosing index values within Accessing vector elements is provided by enclosing index values within
square brackets (``[]``), for example: square brackets (``[]``), for example:
@ -536,6 +601,44 @@ The Bro scripting language supports the following built-in types.
if ( r?$s ) if ( r?$s )
... ...
Records can also be created using a constructor syntax:
.. code:: bro
global r2: MyRecordType = record($c = 7);
And the constructor can be explicitly named by type, too, which
is arguably more readable code:
.. code:: bro
global r3 = MyRecordType($c = 42);
.. bro:type:: opaque
A data type whose actual representation/implementation is
intentionally hidden, but whose values may be passed to certain
functions that can actually access the internal/hidden resources.
Opaque types are differentiated from each other by qualifying them
like ``opaque of md5`` or ``opaque of sha1``. Any valid identifier
can be used as the type qualifier.
An example use of this type is the set of built-in functions which
perform hashing:
.. code:: bro
local handle: opaque of md5 = md5_hash_init();
md5_hash_update(handle, "test");
md5_hash_update(handle, "testing");
print md5_hash_finish(handle);
Here the opaque type is used to provide a handle to a particular
resource which is calculating an MD5 checksum incrementally over
time, but the details of that resource aren't relevant, it's only
necessary to have a handle as a way of identifying it and
distinguishing it from other such resources.
.. bro:type:: file .. bro:type:: file
Bro supports writing to files, but not reading from them. Files Bro supports writing to files, but not reading from them. Files
@ -595,6 +698,31 @@ The Bro scripting language supports the following built-in types.
print greeting("Dave"); print greeting("Dave");
Function parameters may specify default values as long as they appear
last in the parameter list:
.. code:: bro
global foo: function(s: string, t: string &default="abc", u: count &default=0);
If a function was previously declared with default parameters, the
default expressions can be omitted when implementing the function
body and they will still be used for function calls that lack those
arguments.
.. code:: bro
function foo(s: string, t: string, u: count)
{
print s, t, u;
}
And calls to the function may omit the defaults from the argument list:
.. code:: bro
foo("test");
.. bro:type:: event .. bro:type:: event
Event handlers are nearly identical in both syntax and semantics to Event handlers are nearly identical in both syntax and semantics to
@ -733,10 +861,10 @@ scripting language supports the following built-in attributes.
.. bro:attr:: &default .. bro:attr:: &default
Uses a default value for a record field or container elements. For Uses a default value for a record field, a function/hook/event
example, ``table[int] of string &default="foo" }`` would create a parameter, or container elements. For example, ``table[int] of
table that returns the :bro:type:`string` ``"foo"`` for any string &default="foo" }`` would create a table that returns the
non-existing index. :bro:type:`string` ``"foo"`` for any non-existing index.
.. bro:attr:: &redef .. bro:attr:: &redef

View file

@ -54,11 +54,11 @@ global example_ports = {
443/tcp, 562/tcp, 443/tcp, 562/tcp,
} &redef; } &redef;
# redefinitions of "dpd_config" are self-documenting and
# go into the generated doc's "Port Analysis" section event bro_init()
redef dpd_config += { {
[ANALYZER_SSL] = [$ports = example_ports] Analyzer::register_for_ports(Analyzer::ANALYZER_SSL, example_ports);
}; }
# redefinitions of "Notice::Type" are self-documenting, but # redefinitions of "Notice::Type" are self-documenting, but
# more information can be supplied in two different ways # more information can be supplied in two different ways

View file

@ -67,12 +67,12 @@ sourcedir=${thisdir}/../..
echo "$statictext" > $outfile echo "$statictext" > $outfile
bifs=`( cd ${sourcedir}/src && find . -name \*\.bif | sort )` bifs=`( cd ${sourcedir}/build/scripts/base && find . -name \*\.bif.bro | sort )`
for file in $bifs for file in $bifs
do do
f=${file:2}.bro f=${file:2}
echo "rest_target(\${CMAKE_BINARY_DIR}/src base/$f)" >> $outfile echo "rest_target(\${CMAKE_BINARY_DIR}/scripts base/$f)" >> $outfile
done done
scriptfiles=`( cd ${sourcedir}/scripts && find . -name \*\.bro | sort )` scriptfiles=`( cd ${sourcedir}/scripts && find . -name \*\.bro | sort )`

1
magic Submodule

@ -0,0 +1 @@
Subproject commit e87fe13a7b776182ffc8c75076d42702f5c28fed

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,38 @@
@load base/frameworks/files
@load base/utils/paths
module FileExtract;
export {
## The prefix where files are extracted to.
const prefix = "./extract_files/" &redef;
redef record Files::Info += {
## Local filenames of extracted file.
extracted: string &optional &log;
};
redef record Files::AnalyzerArgs += {
## The local filename to which to write an extracted file.
## This field is used in the core by the extraction plugin
## to know where to write the file to. It's also optional
extract_filename: string &optional;
};
}
function on_add(f: fa_file, args: Files::AnalyzerArgs)
{
if ( ! args?$extract_filename )
args$extract_filename = cat("extract-", f$source, "-", f$id);
f$info$extracted = args$extract_filename;
args$extract_filename = build_path_compressed(prefix, args$extract_filename);
}
event bro_init() &priority=10
{
Files::register_analyzer_add_callback(Files::ANALYZER_EXTRACT, on_add);
# Create the extraction directory.
mkdir(prefix);
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,32 @@
@load base/frameworks/files
module FileHash;
export {
redef record Files::Info += {
## An MD5 digest of the file contents.
md5: string &log &optional;
## A SHA1 digest of the file contents.
sha1: string &log &optional;
## A SHA256 digest of the file contents.
sha256: string &log &optional;
};
}
event file_hash(f: fa_file, kind: string, hash: string) &priority=5
{
switch ( kind ) {
case "md5":
f$info$md5 = hash;
break;
case "sha1":
f$info$sha1 = hash;
break;
case "sha256":
f$info$sha256 = hash;
break;
}
}

View file

@ -0,0 +1 @@
@load ./main

View file

@ -0,0 +1,229 @@
##! Framework for managing Bro's protocol analyzers.
##!
##! The analyzer framework allows to dynamically enable or disable analyzers, as
##! well as to manage the well-known ports which automatically activate a
##! particular analyzer for new connections.
##!
##! Protocol analyzers are identified by unique tags of type
##! :bro:type:`Analyzer::Tag`, such as :bro:enum:`Analyzer::ANALYZER_HTTP` and
##! :bro:enum:`Analyzer::ANALYZER_HTTP`. These tags are defined internally by
##! the analyzers themselves, and documented in their analyzer-specific
##! description along with the events that they generate.
@load base/frameworks/packet-filter/utils
module Analyzer;
export {
## If true, all available analyzers are initially disabled at startup. One
## can then selectively enable them with
## :bro:id:`Analyzer::enable_analyzer`.
global disable_all = F &redef;
## Enables an analyzer. Once enabled, the analyzer may be used for analysis
## of future connections as decided by Bro's dynamic protocol detection.
##
## tag: The tag of the analyzer to enable.
##
## Returns: True if the analyzer was successfully enabled.
global enable_analyzer: function(tag: Analyzer::Tag) : bool;
## Disables an analyzer. Once disabled, the analyzer will not be used
## further for analysis of future connections.
##
## tag: The tag of the analyzer to disable.
##
## Returns: True if the analyzer was successfully disabled.
global disable_analyzer: function(tag: Analyzer::Tag) : bool;
## Registers a set of well-known ports for an analyzer. If a future
## connection on one of these ports is seen, the analyzer will be
## automatically assigned to parsing it. The function *adds* to all ports
## already registered, it doesn't replace them.
##
## tag: The tag of the analyzer.
##
## ports: The set of well-known ports to associate with the analyzer.
##
## Returns: True if the ports were sucessfully registered.
global register_for_ports: function(tag: Analyzer::Tag, ports: set[port]) : bool;
## Registers an individual well-known port for an analyzer. If a future
## connection on this port is seen, the analyzer will be automatically
## assigned to parsing it. The function *adds* to all ports already
## registered, it doesn't replace them.
##
## tag: The tag of the analyzer.
##
## p: The well-known port to associate with the analyzer.
##
## Returns: True if the port was sucessfully registered.
global register_for_port: function(tag: Analyzer::Tag, p: port) : bool;
## Returns a set of all well-known ports currently registered for a
## specific analyzer.
##
## tag: The tag of the analyzer.
##
## Returns: The set of ports.
global registered_ports: function(tag: Analyzer::Tag) : set[port];
## Returns a table of all ports-to-analyzer mappings currently registered.
##
## Returns: A table mapping each analyzer to the set of ports
## registered for it.
global all_registered_ports: function() : table[Analyzer::Tag] of set[port];
## Translates an analyzer type to a string with the analyzer's name.
##
## tag: The analyzer tag.
##
## Returns: The analyzer name corresponding to the tag.
global name: function(tag: Analyzer::Tag) : string;
## Translates an analyzer's name to a tag enum value.
##
## name: The analyzer name.
##
## Returns: The analyzer tag corresponding to the name.
global get_tag: function(name: string): Analyzer::Tag;
## Schedules an analyzer for a future connection originating from a given IP
## address and port.
##
## orig: The IP address originating a connection in the future.
## 0.0.0.0 can be used as a wildcard to match any originator address.
##
## resp: The IP address responding to a connection from *orig*.
##
## resp_p: The destination port at *resp*.
##
## analyzer: The analyzer ID.
##
## tout: A timeout interval after which the scheduling request will be
## discarded if the connection has not yet been seen.
##
## Returns: True if succesful.
global schedule_analyzer: function(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool;
## Automatically creates a BPF filter for the specified protocol based
## on the data supplied for the protocol through the
## :bro:see:`Analyzer::register_for_ports` function.
##
## tag: The analyzer tag.
##
## Returns: BPF filter string.
global analyzer_to_bpf: function(tag: Analyzer::Tag): string;
## Create a BPF filter which matches all of the ports defined
## by the various protocol analysis scripts as "registered ports"
## for the protocol.
global get_bpf: function(): string;
## A set of analyzers to disable by default at startup. The default set
## contains legacy analyzers that are no longer supported.
global disabled_analyzers: set[Analyzer::Tag] = {
ANALYZER_INTERCONN,
ANALYZER_STEPPINGSTONE,
ANALYZER_BACKDOOR,
ANALYZER_TCPSTATS,
} &redef;
}
@load base/bif/analyzer.bif
global ports: table[Analyzer::Tag] of set[port];
event bro_init() &priority=5
{
if ( disable_all )
__disable_all_analyzers();
for ( a in disabled_analyzers )
disable_analyzer(a);
}
function enable_analyzer(tag: Analyzer::Tag) : bool
{
return __enable_analyzer(tag);
}
function disable_analyzer(tag: Analyzer::Tag) : bool
{
return __disable_analyzer(tag);
}
function register_for_ports(tag: Analyzer::Tag, ports: set[port]) : bool
{
local rc = T;
for ( p in ports )
{
if ( ! register_for_port(tag, p) )
rc = F;
}
return rc;
}
function register_for_port(tag: Analyzer::Tag, p: port) : bool
{
if ( ! __register_for_port(tag, p) )
return F;
if ( tag !in ports )
ports[tag] = set();
add ports[tag][p];
return T;
}
function registered_ports(tag: Analyzer::Tag) : set[port]
{
return tag in ports ? ports[tag] : set();
}
function all_registered_ports(): table[Analyzer::Tag] of set[port]
{
return ports;
}
function name(atype: Analyzer::Tag) : string
{
return __name(atype);
}
function get_tag(name: string): Analyzer::Tag
{
return __tag(name);
}
function schedule_analyzer(orig: addr, resp: addr, resp_p: port,
analyzer: Analyzer::Tag, tout: interval) : bool
{
return __schedule_analyzer(orig, resp, resp_p, analyzer, tout);
}
function analyzer_to_bpf(tag: Analyzer::Tag): string
{
# Return an empty string if an undefined analyzer was given.
if ( tag !in ports )
return "";
local output = "";
for ( p in ports[tag] )
output = PacketFilter::combine_filters(output, "or", PacketFilter::port_to_bpf(p));
return output;
}
function get_bpf(): string
{
local output = "";
for ( tag in ports )
{
output = PacketFilter::combine_filters(output, "or", analyzer_to_bpf(tag));
}
return output;
}

View file

@ -216,12 +216,9 @@ function setup_peer(p: event_peer, node: Node)
request_remote_events(p, node$events); request_remote_events(p, node$events);
} }
if ( node?$capture_filter ) if ( node?$capture_filter && node$capture_filter != "" )
{ {
local filter = node$capture_filter; local filter = node$capture_filter;
if ( filter == "" )
filter = PacketFilter::default_filter;
do_script_log(p, fmt("sending capture_filter: %s", filter)); do_script_log(p, fmt("sending capture_filter: %s", filter));
send_capture_filter(p, filter); send_capture_filter(p, filter);
} }

View file

@ -1,212 +0,0 @@
# Signatures to initiate dynamic protocol detection.
signature dpd_ftp_client {
ip-proto == tcp
payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
tcp-state originator
}
# Match for server greeting (220, 120) and for login or passwd
# required (230, 331).
signature dpd_ftp_server {
ip-proto == tcp
payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
tcp-state responder
requires-reverse-signature dpd_ftp_client
enable "ftp"
}
signature dpd_http_client {
ip-proto == tcp
payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
tcp-state originator
}
signature dpd_http_server {
ip-proto == tcp
payload /^HTTP\/[0-9]/
tcp-state responder
requires-reverse-signature dpd_http_client
enable "http"
}
signature dpd_bittorrenttracker_client {
ip-proto == tcp
payload /^.*\/announce\?.*info_hash/
tcp-state originator
}
signature dpd_bittorrenttracker_server {
ip-proto == tcp
payload /^HTTP\/[0-9]/
tcp-state responder
requires-reverse-signature dpd_bittorrenttracker_client
enable "bittorrenttracker"
}
signature dpd_bittorrent_peer1 {
ip-proto == tcp
payload /^\x13BitTorrent protocol/
tcp-state originator
}
signature dpd_bittorrent_peer2 {
ip-proto == tcp
payload /^\x13BitTorrent protocol/
tcp-state responder
requires-reverse-signature dpd_bittorrent_peer1
enable "bittorrent"
}
signature irc_client1 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Uu][Ss][Ee][Rr] +.+[\n\r]+ *[Nn][Ii][Cc][Kk] +.*[\r\n]/
requires-reverse-signature irc_server_reply
tcp-state originator
enable "irc"
}
signature irc_client2 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Nn][Ii][Cc][Kk] +.+[\r\n]+ *[Uu][Ss][Ee][Rr] +.+[\r\n]/
requires-reverse-signature irc_server_reply
tcp-state originator
enable "irc"
}
signature irc_server_reply {
ip-proto == tcp
payload /^(|.*[\n\r])(:[^ \n\r]+ )?[0-9][0-9][0-9] /
tcp-state responder
}
signature irc_server_to_server1 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
}
signature irc_server_to_server2 {
ip-proto == tcp
payload /(|.*[\r\n]) *[Ss][Ee][Rr][Vv][Ee][Rr] +[^ ]+ +[0-9]+ +:.+[\r\n]/
requires-reverse-signature irc_server_to_server1
enable "irc"
}
signature dpd_smtp_client {
ip-proto == tcp
payload /(|.*[\n\r])[[:space:]]*([hH][eE][lL][oO]|[eE][hH][lL][oO])/
requires-reverse-signature dpd_smtp_server
enable "smtp"
tcp-state originator
}
signature dpd_smtp_server {
ip-proto == tcp
payload /^[[:space:]]*220[[:space:]-]/
tcp-state responder
}
signature dpd_ssh_client {
ip-proto == tcp
payload /^[sS][sS][hH]-/
requires-reverse-signature dpd_ssh_server
enable "ssh"
tcp-state originator
}
signature dpd_ssh_server {
ip-proto == tcp
payload /^[sS][sS][hH]-/
tcp-state responder
}
signature dpd_pop3_server {
ip-proto == tcp
payload /^\+OK/
requires-reverse-signature dpd_pop3_client
enable "pop3"
tcp-state responder
}
signature dpd_pop3_client {
ip-proto == tcp
payload /(|.*[\r\n])[[:space:]]*([uU][sS][eE][rR][[:space:]]|[aA][pP][oO][pP][[:space:]]|[cC][aA][pP][aA]|[aA][uU][tT][hH])/
tcp-state originator
}
signature dpd_ssl_server {
ip-proto == tcp
# Server hello.
payload /^(\x16\x03[\x00\x01\x02]..\x02...\x03[\x00\x01\x02]|...?\x04..\x00\x02).*/
requires-reverse-signature dpd_ssl_client
enable "ssl"
tcp-state responder
}
signature dpd_ssl_client {
ip-proto == tcp
# Client hello.
payload /^(\x16\x03[\x00\x01\x02]..\x01...\x03[\x00\x01\x02]|...?\x01[\x00\x01\x02][\x02\x03]).*/
tcp-state originator
}
signature dpd_ayiya {
ip-proto = udp
payload /^..\x11\x29/
enable "ayiya"
}
signature dpd_teredo {
ip-proto = udp
payload /^(\x00\x00)|(\x00\x01)|([\x60-\x6f])/
enable "teredo"
}
signature dpd_socks4_client {
ip-proto == tcp
# '32' is a rather arbitrary max length for the user name.
payload /^\x04[\x01\x02].{0,32}\x00/
tcp-state originator
}
signature dpd_socks4_server {
ip-proto == tcp
requires-reverse-signature dpd_socks4_client
payload /^\x00[\x5a\x5b\x5c\x5d]/
tcp-state responder
enable "socks"
}
signature dpd_socks4_reverse_client {
ip-proto == tcp
# '32' is a rather arbitrary max length for the user name.
payload /^\x04[\x01\x02].{0,32}\x00/
tcp-state responder
}
signature dpd_socks4_reverse_server {
ip-proto == tcp
requires-reverse-signature dpd_socks4_reverse_client
payload /^\x00[\x5a\x5b\x5c\x5d]/
tcp-state originator
enable "socks"
}
signature dpd_socks5_client {
ip-proto == tcp
# Watch for a few authentication methods to reduce false positives.
payload /^\x05.[\x00\x01\x02]/
tcp-state originator
}
signature dpd_socks5_server {
ip-proto == tcp
requires-reverse-signature dpd_socks5_client
# Watch for a single authentication method to be chosen by the server or
# the server to indicate the no authentication is required.
payload /^\x05(\x00|\x01[\x00\x01\x02])/
tcp-state responder
enable "socks"
}

View file

@ -3,8 +3,6 @@
module DPD; module DPD;
@load-sigs ./dpd.sig
export { export {
## Add the DPD logging stream identifier. ## Add the DPD logging stream identifier.
redef enum Log::ID += { LOG }; redef enum Log::ID += { LOG };
@ -41,22 +39,11 @@ redef record connection += {
event bro_init() &priority=5 event bro_init() &priority=5
{ {
Log::create_stream(DPD::LOG, [$columns=Info]); Log::create_stream(DPD::LOG, [$columns=Info]);
# Populate the internal DPD analysis variable.
for ( a in dpd_config )
{
for ( p in dpd_config[a]$ports )
{
if ( p !in dpd_analyzer_ports )
dpd_analyzer_ports[p] = set();
add dpd_analyzer_ports[p][a];
}
}
} }
event protocol_confirmation(c: connection, atype: count, aid: count) &priority=10 event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=10
{ {
local analyzer = analyzer_name(atype); local analyzer = Analyzer::name(atype);
if ( fmt("-%s",analyzer) in c$service ) if ( fmt("-%s",analyzer) in c$service )
delete c$service[fmt("-%s", analyzer)]; delete c$service[fmt("-%s", analyzer)];
@ -64,10 +51,10 @@ event protocol_confirmation(c: connection, atype: count, aid: count) &priority=1
add c$service[analyzer]; add c$service[analyzer];
} }
event protocol_violation(c: connection, atype: count, aid: count, event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
reason: string) &priority=10 reason: string) &priority=10
{ {
local analyzer = analyzer_name(atype); local analyzer = Analyzer::name(atype);
# If the service hasn't been confirmed yet, don't generate a log message # If the service hasn't been confirmed yet, don't generate a log message
# for the protocol violation. # for the protocol violation.
if ( analyzer !in c$service ) if ( analyzer !in c$service )
@ -86,7 +73,7 @@ event protocol_violation(c: connection, atype: count, aid: count,
c$dpd = info; c$dpd = info;
} }
event protocol_violation(c: connection, atype: count, aid: count, reason: string) &priority=5 event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason: string) &priority=5
{ {
if ( !c?$dpd || aid in c$dpd$disabled_aids ) if ( !c?$dpd || aid in c$dpd$disabled_aids )
return; return;
@ -100,7 +87,7 @@ event protocol_violation(c: connection, atype: count, aid: count, reason: string
add c$dpd$disabled_aids[aid]; add c$dpd$disabled_aids[aid];
} }
event protocol_violation(c: connection, atype: count, aid: count, event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count,
reason: string) &priority=-5 reason: string) &priority=-5
{ {
if ( c?$dpd ) if ( c?$dpd )

View file

@ -0,0 +1 @@
@load ./main.bro

View file

@ -0,0 +1,363 @@
##! An interface for driving the analysis of files, possibly independent of
##! any network protocol over which they're transported.
@load base/bif/file_analysis.bif
@load base/frameworks/analyzer
@load base/frameworks/logging
@load base/utils/site
module Files;
export {
redef enum Log::ID += {
## Logging stream for file analysis.
LOG
};
## A structure which represents a desired type of file analysis.
type AnalyzerArgs: record {
## An event which will be generated for all new file contents,
## chunk-wise. Used when *tag* is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
chunk_event: event(f: fa_file, data: string, off: count) &optional;
## An event which will be generated for all new file contents,
## stream-wise. Used when *tag* is
## :bro:see:`Files::ANALYZER_DATA_EVENT`.
stream_event: event(f: fa_file, data: string) &optional;
} &redef;
## Contains all metadata related to the analysis of a given file.
## For the most part, fields here are derived from ones of the same name
## in :bro:see:`fa_file`.
type Info: record {
## The time when the file was first seen.
ts: time &log;
## An identifier associated with a single file.
fuid: string &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data sourced from.
tx_hosts: set[addr] &log;
## If this file was transferred over a network
## connection this should show the host or hosts that
## the data traveled to.
rx_hosts: set[addr] &log;
## Connection UIDS over which the file was transferred.
conn_uids: set[string] &log;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string &log &optional;
## A value to represent the depth of this file in relation
## to its source. In SMTP, it is the depth of the MIME
## attachment on the message. In HTTP, it is the depth of the
## request within the TCP connection.
depth: count &default=0 &log;
## A set of analysis types done during the file analysis.
analyzers: set[string] &log;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &log &optional;
## A filename for the file if one is available from the source
## for the file. These will frequently come from
## "Content-Disposition" headers in network protocols.
filename: string &log &optional;
## The duration the file was analyzed for.
duration: interval &log &default=0secs;
## If the source of this file is a network connection, this field
## indicates if the data originated from the local network or not as
## determined by the configured bro:see:`Site::local_nets`.
local_orig: bool &log &optional;
## If the source of this file is a network connection, this field
## indicates if the file is being sent by the originator of the connection
## or the responder.
is_orig: bool &log &optional;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &log &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &log &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &log &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &log &default=0;
## Whether the file analysis timed out at least once for the file.
timedout: bool &log &default=F;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_fuid: string &log &optional;
} &redef;
## A table that can be used to disable file analysis completely for
## any files transferred over given network protocol analyzers.
const disable: table[Files::Tag] of bool = table() &redef;
## The salt concatenated to unique file handle strings generated by
## :bro:see:`get_file_handle` before hashing them in to a file id
## (the *id* field of :bro:see:`fa_file`).
## Provided to help mitigate the possiblility of manipulating parts of
## network connections that factor in to the file handle in order to
## generate two handles that would hash to the same file id.
const salt = "I recommend changing this." &redef;
## Sets the *timeout_interval* field of :bro:see:`fa_file`, which is
## used to determine the length of inactivity that is allowed for a file
## before internal state related to it is cleaned up. When used within a
## :bro:see:`file_timeout` handler, the analysis will delay timing out
## again for the period specified by *t*.
##
## f: the file.
##
## t: the amount of time the file can remain inactive before discarding.
##
## Returns: true if the timeout interval was set, or false if analysis
## for the *id* isn't currently active.
global set_timeout_interval: function(f: fa_file, t: interval): bool;
## Adds an analyzer to the analysis of a given file.
##
## f: the file.
##
## tag: the analyzer type.
##
## args: any parameters the analyzer takes.
##
## Returns: true if the analyzer will be added, or false if analysis
## for the *id* isn't currently active or the *args*
## were invalid for the analyzer type.
global add_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Removes an analyzer from the analysis of a given file.
##
## f: the file.
##
## args: the analyzer (type and args) to remove.
##
## Returns: true if the analyzer will be removed, or false if analysis
## for the *id* isn't currently active.
global remove_analyzer: function(f: fa_file,
tag: Files::Tag,
args: AnalyzerArgs &default=AnalyzerArgs()): bool;
## Stops/ignores any further analysis of a given file.
##
## f: the file.
##
## Returns: true if analysis for the given file will be ignored for the
## rest of it's contents, or false if analysis for the *id*
## isn't currently active.
global stop: function(f: fa_file): bool;
## Translates an file analyzer enum value to a string with the analyzer's name.
##
## tag: The analyzer tag.
##
## Returns: The analyzer name corresponding to the tag.
global analyzer_name: function(tag: Files::Tag): string;
## Provides a text description regarding metadata of the file.
## For example, with HTTP it would return a URL.
##
## f: The file to be described.
##
## Returns a text description regarding metadata of the file.
global describe: function(f: fa_file): string;
type ProtoRegistration: record {
## A callback to generate a file handle on demand when
## one is needed by the core.
get_file_handle: function(c: connection, is_orig: bool): string;
## A callback to "describe" a file. In the case of an HTTP
## transfer the most obvious description would be the URL.
## It's like an extremely compressed version of the normal log.
describe: function(f: fa_file): string
&default=function(f: fa_file): string { return ""; };
};
## Register callbacks for protocols that work with the Files framework.
## The callbacks must uniquely identify a file and each protocol can
## only have a single callback registered for it.
##
## tag: Tag for the protocol analyzer having a callback being registered.
##
## reg: A :bro:see:`Files::ProtoRegistration` record.
##
## Returns: true if the protocol being registered was not previously registered.
global register_protocol: function(tag: Analyzer::Tag, reg: ProtoRegistration): bool;
## Register a callback for file analyzers to use if they need to do some manipulation
## when they are being added to a file before the core code takes over. This is
## unlikely to be interesting for users and should only be called by file analyzer
## authors but it *not required*.
##
## tag: Tag for the file analyzer.
##
## callback: Function to execute when the given file analyzer is being added.
global register_analyzer_add_callback: function(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs));
## Event that can be handled to access the Info record as it is sent on
## to the logging framework.
global log_files: event(rec: Info);
}
redef record fa_file += {
info: Info &optional;
};
# Store the callbacks for protocol analyzers that have files.
global registered_protocols: table[Analyzer::Tag] of ProtoRegistration = table();
global analyzer_add_callbacks: table[Files::Tag] of function(f: fa_file, args: AnalyzerArgs) = table();
event bro_init() &priority=5
{
Log::create_stream(Files::LOG, [$columns=Info, $ev=log_files]);
}
function set_info(f: fa_file)
{
if ( ! f?$info )
{
local tmp: Info = Info($ts=f$last_active,
$fuid=f$id);
f$info = tmp;
}
if ( f?$parent_id )
f$info$parent_fuid = f$parent_id;
if ( f?$source )
f$info$source = f$source;
f$info$duration = f$last_active - f$info$ts;
f$info$seen_bytes = f$seen_bytes;
if ( f?$total_bytes )
f$info$total_bytes = f$total_bytes;
f$info$missing_bytes = f$missing_bytes;
f$info$overflow_bytes = f$overflow_bytes;
if ( f?$is_orig )
f$info$is_orig = f$is_orig;
if ( f?$mime_type )
f$info$mime_type = f$mime_type;
}
function set_timeout_interval(f: fa_file, t: interval): bool
{
return __set_timeout_interval(f$id, t);
}
function add_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
add f$info$analyzers[Files::analyzer_name(tag)];
if ( tag in analyzer_add_callbacks )
analyzer_add_callbacks[tag](f, args);
if ( ! __add_analyzer(f$id, tag, args) )
{
Reporter::warning(fmt("Analyzer %s not added successfully to file %s.", tag, f$id));
return F;
}
return T;
}
function register_analyzer_add_callback(tag: Files::Tag, callback: function(f: fa_file, args: AnalyzerArgs))
{
analyzer_add_callbacks[tag] = callback;
}
function remove_analyzer(f: fa_file, tag: Files::Tag, args: AnalyzerArgs): bool
{
return __remove_analyzer(f$id, tag, args);
}
function stop(f: fa_file): bool
{
return __stop(f$id);
}
function analyzer_name(tag: Files::Tag): string
{
return __analyzer_name(tag);
}
event file_new(f: fa_file) &priority=10
{
set_info(f);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=10
{
set_info(f);
add f$info$conn_uids[c$uid];
local cid = c$id;
add f$info$tx_hosts[f$is_orig ? cid$orig_h : cid$resp_h];
if( |Site::local_nets| > 0 )
f$info$local_orig=Site::is_local_addr(f$is_orig ? cid$orig_h : cid$resp_h);
add f$info$rx_hosts[f$is_orig ? cid$resp_h : cid$orig_h];
}
event file_timeout(f: fa_file) &priority=10
{
set_info(f);
f$info$timedout = T;
}
event file_state_remove(f: fa_file) &priority=10
{
set_info(f);
}
event file_state_remove(f: fa_file) &priority=-10
{
Log::write(Files::LOG, f$info);
}
function register_protocol(tag: Analyzer::Tag, reg: ProtoRegistration): bool
{
local result = (tag !in registered_protocols);
registered_protocols[tag] = reg;
return result;
}
function describe(f: fa_file): string
{
local tag = Analyzer::get_tag(f$source);
if ( tag !in registered_protocols )
return "";
local handler = registered_protocols[tag];
return handler$describe(f);
}
event get_file_handle(tag: Analyzer::Tag, c: connection, is_orig: bool) &priority=5
{
if ( tag !in registered_protocols )
return;
local handler = registered_protocols[tag];
set_file_handle(handler$get_file_handle(c, is_orig));
}

View file

@ -2,4 +2,5 @@
@load ./readers/ascii @load ./readers/ascii
@load ./readers/raw @load ./readers/raw
@load ./readers/benchmark @load ./readers/benchmark
@load ./readers/binary
@load ./readers/sqlite

View file

@ -122,6 +122,34 @@ export {
config: table[string] of string &default=table(); config: table[string] of string &default=table();
}; };
## A file analyis input stream type used to forward input data to the
## file analysis framework.
type AnalysisDescription: record {
## String that allows the reader to find the source.
## For `READER_ASCII`, this is the filename.
source: string;
## Reader to use for this steam. Compatible readers must be
## able to accept a filter of a single string type (i.e.
## they read a byte stream).
reader: Reader &default=Input::READER_BINARY;
## Read mode to use for this stream
mode: Mode &default=default_mode;
## Descriptive name that uniquely identifies the input source.
## Can be used used to remove a stream at a later time.
## This will also be used for the unique *source* field of
## :bro:see:`fa_file`. Most of the time, the best choice for this
## field will be the same value as the *source* field.
name: string;
## A key/value table that will be passed on the reader.
## Interpretation of the values is left to the writer, but
## usually they will be used for configuration purposes.
config: table[string] of string &default=table();
};
## Create a new table input from a given source. Returns true on success. ## Create a new table input from a given source. Returns true on success.
## ##
## description: `TableDescription` record describing the source. ## description: `TableDescription` record describing the source.
@ -132,6 +160,14 @@ export {
## description: `TableDescription` record describing the source. ## description: `TableDescription` record describing the source.
global add_event: function(description: Input::EventDescription) : bool; global add_event: function(description: Input::EventDescription) : bool;
## Create a new file analysis input from a given source. Data read from
## the source is automatically forwarded to the file analysis framework.
##
## description: A record describing the source
##
## Returns: true on sucess.
global add_analysis: function(description: Input::AnalysisDescription) : bool;
## Remove a input stream. Returns true on success and false if the named stream was ## Remove a input stream. Returns true on success and false if the named stream was
## not found. ## not found.
## ##
@ -149,7 +185,7 @@ export {
global end_of_data: event(name: string, source:string); global end_of_data: event(name: string, source:string);
} }
@load base/input.bif @load base/bif/input.bif
module Input; module Input;
@ -164,6 +200,11 @@ function add_event(description: Input::EventDescription) : bool
return __create_event_stream(description); return __create_event_stream(description);
} }
function add_analysis(description: Input::AnalysisDescription) : bool
{
return __create_analysis_stream(description);
}
function remove(id: string) : bool function remove(id: string) : bool
{ {
return __remove_stream(id); return __remove_stream(id);

View file

@ -0,0 +1,8 @@
##! Interface for the binary input reader.
module InputBinary;
export {
## Size of data chunks to read from the input file at a time.
const chunk_size = 1024 &redef;
}

View file

@ -6,4 +6,12 @@ export {
## Separator between input records. ## Separator between input records.
## Please note that the separator has to be exactly one character long ## Please note that the separator has to be exactly one character long
const record_separator = "\n" &redef; const record_separator = "\n" &redef;
## Event that is called when a process created by the raw reader exits.
##
## name: name of the input stream
## source: source of the input stream
## exit_code: exit code of the program, or number of the signal that forced the program to exit
## signal_exit: false when program exitted normally, true when program was forced to exit by a signal
global process_finished: event(name: string, source:string, exit_code:count, signal_exit:bool);
} }

View file

@ -0,0 +1,17 @@
##! Interface for the SQLite input reader.
##!
##! The defaults are set to match Bro's ASCII output.
module InputSQLite;
export {
## Separator between set elements.
## Please note that the separator has to be exactly one character long.
const set_separator = Input::set_separator &redef;
## String to use for an unset &optional field.
const unset_field = Input::unset_field &redef;
## String to use for empty fields.
const empty_field = Input::empty_field &redef;
}

View file

@ -10,13 +10,14 @@ module Intel;
export { export {
redef enum Log::ID += { LOG }; redef enum Log::ID += { LOG };
## String data needs to be further categoried since it could represent ## Enum type to represent various types of intelligence data.
## and number of types of data. type Type: enum {
type StrType: enum { ## An IP address.
ADDR,
## A complete URL without the prefix "http://". ## A complete URL without the prefix "http://".
URL, URL,
## User-Agent string, typically HTTP or mail message body. ## Software name.
USER_AGENT, SOFTWARE,
## Email address. ## Email address.
EMAIL, EMAIL,
## DNS domain name. ## DNS domain name.
@ -44,18 +45,15 @@ export {
## Represents a piece of intelligence. ## Represents a piece of intelligence.
type Item: record { type Item: record {
## The IP address if the intelligence is about an IP address. ## The intelligence indicator.
host: addr &optional; indicator: string;
## The network if the intelligence is about a CIDR block.
net: subnet &optional;
## The string if the intelligence is about a string.
str: string &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &optional;
## Metadata for the item. Typically represents more deeply \ ## The type of data that the indicator field represents.
indicator_type: Type;
## Metadata for the item. Typically represents more deeply
## descriptive data for a piece of intelligence. ## descriptive data for a piece of intelligence.
meta: MetaData; meta: MetaData;
}; };
## Enum to represent where data came from when it was discovered. ## Enum to represent where data came from when it was discovered.
@ -65,23 +63,23 @@ export {
IN_ANYWHERE, IN_ANYWHERE,
}; };
## The $host field and combination of $str and $str_type fields are mutually
## exclusive. These records *must* represent either an IP address being
## seen or a string being seen.
type Seen: record { type Seen: record {
## The IP address if the data seen is an IP address.
host: addr &log &optional;
## The string if the data is about a string. ## The string if the data is about a string.
str: string &log &optional; indicator: string &log &optional;
## The type of data that is in the string if the $str field is set.
str_type: StrType &log &optional; ## The type of data that the indicator represents.
indicator_type: Type &log &optional;
## If the indicator type was :bro:enum:`Intel::ADDR`, then this
## field will be present.
host: addr &optional;
## Where the data was discovered. ## Where the data was discovered.
where: Where &log; where: Where &log;
## If the data was discovered within a connection, the ## If the data was discovered within a connection, the
## connection record should go into get to give context to the data. ## connection record should go into get to give context to the data.
conn: connection &optional; conn: connection &optional;
}; };
## Record used for the logging framework representing a positive ## Record used for the logging framework representing a positive
@ -100,7 +98,7 @@ export {
## Where the data was seen. ## Where the data was seen.
seen: Seen &log; seen: Seen &log;
## Sources which supplied data that resulted in this match. ## Sources which supplied data that resulted in this match.
sources: set[string] &log; sources: set[string] &log &default=string_set();
}; };
## Intelligence data manipulation functions. ## Intelligence data manipulation functions.
@ -135,8 +133,8 @@ const have_full_data = T &redef;
# The in memory data structure for holding intelligence. # The in memory data structure for holding intelligence.
type DataStore: record { type DataStore: record {
net_data: table[subnet] of set[MetaData]; host_data: table[addr] of set[MetaData];
string_data: table[string, StrType] of set[MetaData]; string_data: table[string, Type] of set[MetaData];
}; };
global data_store: DataStore &redef; global data_store: DataStore &redef;
@ -144,8 +142,8 @@ global data_store: DataStore &redef;
# This is primarily for workers to do the initial quick matches and store # This is primarily for workers to do the initial quick matches and store
# a minimal amount of data for the full match to happen on the manager. # a minimal amount of data for the full match to happen on the manager.
type MinDataStore: record { type MinDataStore: record {
net_data: set[subnet]; host_data: set[addr];
string_data: set[string, StrType]; string_data: set[string, Type];
}; };
global min_data_store: MinDataStore &redef; global min_data_store: MinDataStore &redef;
@ -157,15 +155,13 @@ event bro_init() &priority=5
function find(s: Seen): bool function find(s: Seen): bool
{ {
if ( s?$host && if ( s?$host )
((have_full_data && s$host in data_store$net_data) ||
(s$host in min_data_store$net_data)))
{ {
return T; return ((s$host in min_data_store$host_data) ||
(have_full_data && s$host in data_store$host_data));
} }
else if ( s?$str && s?$str_type && else if ( ([to_lower(s$indicator), s$indicator_type] in min_data_store$string_data) ||
((have_full_data && [s$str, s$str_type] in data_store$string_data) || (have_full_data && [to_lower(s$indicator), s$indicator_type] in data_store$string_data) )
([s$str, s$str_type] in min_data_store$string_data)))
{ {
return T; return T;
} }
@ -177,8 +173,7 @@ function find(s: Seen): bool
function get_items(s: Seen): set[Item] function get_items(s: Seen): set[Item]
{ {
local item: Item; local return_data: set[Item];
local return_data: set[Item] = set();
if ( ! have_full_data ) if ( ! have_full_data )
{ {
@ -191,26 +186,23 @@ function get_items(s: Seen): set[Item]
if ( s?$host ) if ( s?$host )
{ {
# See if the host is known about and it has meta values # See if the host is known about and it has meta values
if ( s$host in data_store$net_data ) if ( s$host in data_store$host_data )
{ {
for ( m in data_store$net_data[s$host] ) for ( m in data_store$host_data[s$host] )
{ {
# TODO: the lookup should be finding all and not just most specific add return_data[Item($indicator=cat(s$host), $indicator_type=ADDR, $meta=m)];
# and $host/$net should have the correct value.
item = [$host=s$host, $meta=m];
add return_data[item];
} }
} }
} }
else if ( s?$str && s?$str_type ) else
{ {
local lower_indicator = to_lower(s$indicator);
# See if the string is known about and it has meta values # See if the string is known about and it has meta values
if ( [s$str, s$str_type] in data_store$string_data ) if ( [lower_indicator, s$indicator_type] in data_store$string_data )
{ {
for ( m in data_store$string_data[s$str, s$str_type] ) for ( m in data_store$string_data[lower_indicator, s$indicator_type] )
{ {
item = [$str=s$str, $str_type=s$str_type, $meta=m]; add return_data[Item($indicator=s$indicator, $indicator_type=s$indicator_type, $meta=m)];
add return_data[item];
} }
} }
} }
@ -222,6 +214,12 @@ function Intel::seen(s: Seen)
{ {
if ( find(s) ) if ( find(s) )
{ {
if ( s?$host )
{
s$indicator = cat(s$host);
s$indicator_type = Intel::ADDR;
}
if ( have_full_data ) if ( have_full_data )
{ {
local items = get_items(s); local items = get_items(s);
@ -250,8 +248,7 @@ function has_meta(check: MetaData, metas: set[MetaData]): bool
event Intel::match(s: Seen, items: set[Item]) &priority=5 event Intel::match(s: Seen, items: set[Item]) &priority=5
{ {
local empty_set: set[string] = set(); local info: Info = [$ts=network_time(), $seen=s];
local info: Info = [$ts=network_time(), $seen=s, $sources=empty_set];
if ( s?$conn ) if ( s?$conn )
{ {
@ -267,52 +264,37 @@ event Intel::match(s: Seen, items: set[Item]) &priority=5
function insert(item: Item) function insert(item: Item)
{ {
if ( item?$str && !item?$str_type )
{
event reporter_warning(network_time(), fmt("You must provide a str_type for strings or this item doesn't make sense. Item: %s", item), "");
return;
}
# Create and fill out the meta data item. # Create and fill out the meta data item.
local meta = item$meta; local meta = item$meta;
local metas: set[MetaData]; local metas: set[MetaData];
if ( item?$host ) # All intelligence is case insensitive at the moment.
local lower_indicator = to_lower(item$indicator);
if ( item$indicator_type == ADDR )
{ {
local host = mask_addr(item$host, is_v4_addr(item$host) ? 32 : 128); local host = to_addr(item$indicator);
if ( have_full_data ) if ( have_full_data )
{ {
if ( host !in data_store$net_data ) if ( host !in data_store$host_data )
data_store$net_data[host] = set(); data_store$host_data[host] = set();
metas = data_store$net_data[host]; metas = data_store$host_data[host];
} }
add min_data_store$net_data[host]; add min_data_store$host_data[host];
} }
else if ( item?$net ) else
{ {
if ( have_full_data ) if ( have_full_data )
{ {
if ( item$net !in data_store$net_data ) if ( [lower_indicator, item$indicator_type] !in data_store$string_data )
data_store$net_data[item$net] = set(); data_store$string_data[lower_indicator, item$indicator_type] = set();
metas = data_store$net_data[item$net]; metas = data_store$string_data[lower_indicator, item$indicator_type];
} }
add min_data_store$net_data[item$net]; add min_data_store$string_data[lower_indicator, item$indicator_type];
}
else if ( item?$str )
{
if ( have_full_data )
{
if ( [item$str, item$str_type] !in data_store$string_data )
data_store$string_data[item$str, item$str_type] = set();
metas = data_store$string_data[item$str, item$str_type];
}
add min_data_store$string_data[item$str, item$str_type];
} }
local updated = F; local updated = F;

View file

@ -2,5 +2,6 @@
@load ./postprocessors @load ./postprocessors
@load ./writers/ascii @load ./writers/ascii
@load ./writers/dataseries @load ./writers/dataseries
@load ./writers/sqlite
@load ./writers/elasticsearch @load ./writers/elasticsearch
@load ./writers/none @load ./writers/none

View file

@ -189,6 +189,15 @@ export {
## .. bro:see:: Log::add_default_filter Log::remove_default_filter ## .. bro:see:: Log::add_default_filter Log::remove_default_filter
global create_stream: function(id: ID, stream: Stream) : bool; global create_stream: function(id: ID, stream: Stream) : bool;
## Removes a logging stream completely, stopping all the threads.
##
## id: The ID enum to be associated with the new logging stream.
##
## Returns: True if a new stream was successfully removed.
##
## .. bro:see:: Log::create_stream
global remove_stream: function(id: ID) : bool;
## Enables a previously disabled logging stream. Disabled streams ## Enables a previously disabled logging stream. Disabled streams
## will not be written to until they are enabled again. New streams ## will not be written to until they are enabled again. New streams
## are enabled by default. ## are enabled by default.
@ -357,7 +366,7 @@ export {
# We keep a script-level copy of all filters so that we can manipulate them. # We keep a script-level copy of all filters so that we can manipulate them.
global filters: table[ID, string] of Filter; global filters: table[ID, string] of Filter;
@load base/logging.bif # Needs Filter and Stream defined. @load base/bif/logging.bif # Needs Filter and Stream defined.
module Log; module Log;
@ -442,6 +451,12 @@ function create_stream(id: ID, stream: Stream) : bool
return add_default_filter(id); return add_default_filter(id);
} }
function remove_stream(id: ID) : bool
{
delete active_streams[id];
return __remove_stream(id);
}
function disable_stream(id: ID) : bool function disable_stream(id: ID) : bool
{ {
delete active_streams[id]; delete active_streams[id];

View file

@ -0,0 +1,17 @@
##! Interface for the SQLite log writer. Redefinable options are available
##! to tweak the output format of the SQLite reader.
module LogSQLite;
export {
## Separator between set elements.
const set_separator = Log::set_separator &redef;
## String to use for an unset &optional field.
const unset_field = Log::unset_field &redef;
## String to use for empty fields. This should be different from
## *unset_field* to make the output non-ambigious.
const empty_field = Log::empty_field &redef;
}

View file

@ -1,264 +0,0 @@
##! This implements transparent cluster support for the metrics framework.
##! Do not load this file directly. It's only meant to be loaded automatically
##! and will be depending on if the cluster framework has been enabled.
##! The goal of this script is to make metric calculation completely and
##! transparently automated when running on a cluster.
##!
##! Events defined here are not exported deliberately because they are meant
##! to be an internal implementation detail.
@load base/frameworks/cluster
@load ./main
module Metrics;
export {
## Allows a user to decide how large of result groups the
## workers should transmit values for cluster metric aggregation.
const cluster_send_in_groups_of = 50 &redef;
## The percent of the full threshold value that needs to be met
## on a single worker for that worker to send the value to its manager in
## order for it to request a global view for that value. There is no
## requirement that the manager requests a global view for the index
## since it may opt not to if it requested a global view for the index
## recently.
const cluster_request_global_view_percent = 0.1 &redef;
## Event sent by the manager in a cluster to initiate the
## collection of metrics values for a filter.
global cluster_filter_request: event(uid: string, id: ID, filter_name: string);
## Event sent by nodes that are collecting metrics after receiving
## a request for the metric filter from the manager.
global cluster_filter_response: event(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool);
## This event is sent by the manager in a cluster to initiate the
## collection of a single index value from a filter. It's typically
## used to get intermediate updates before the break interval triggers
## to speed detection of a value crossing a threshold.
global cluster_index_request: event(uid: string, id: ID, filter_name: string, index: Index);
## This event is sent by nodes in response to a
## :bro:id:`Metrics::cluster_index_request` event.
global cluster_index_response: event(uid: string, id: ID, filter_name: string, index: Index, val: count);
## This is sent by workers to indicate that they crossed the percent of the
## current threshold by the percentage defined globally in
## :bro:id:`Metrics::cluster_request_global_view_percent`
global cluster_index_intermediate_response: event(id: Metrics::ID, filter_name: string, index: Metrics::Index, val: count);
## This event is scheduled internally on workers to send result chunks.
global send_data: event(uid: string, id: ID, filter_name: string, data: MetricTable);
}
# This is maintained by managers so they can know what data they requested and
# when they requested it.
global requested_results: table[string] of time = table() &create_expire=5mins;
# TODO: The next 4 variables make the assumption that a value never
# takes longer than 5 minutes to transmit from workers to manager. This needs to
# be tunable or self-tuning. These should also be restructured to be
# maintained within a single variable.
# This variable is maintained by manager nodes as they collect and aggregate
# results.
global filter_results: table[string, ID, string] of MetricTable &create_expire=5mins;
# This variable is maintained by manager nodes to track how many "dones" they
# collected per collection unique id. Once the number of results for a uid
# matches the number of peer nodes that results should be coming from, the
# result is written out and deleted from here.
# TODO: add an &expire_func in case not all results are received.
global done_with: table[string] of count &create_expire=5mins &default=0;
# This variable is maintained by managers to track intermediate responses as
# they are getting a global view for a certain index.
global index_requests: table[string, ID, string, Index] of count &create_expire=5mins &default=0;
# This variable is maintained by all hosts for different purposes. Non-managers
# maintain it to know what indexes they have recently sent as intermediate
# updates so they don't overwhelm their manager. Managers maintain it so they
# don't overwhelm workers with intermediate index requests. The count that is
# yielded is the number of times the percentage threshold has been crossed and
# an intermediate result has been received. The manager may optionally request
# the index again before data expires from here if too many workers are crossing
# the percentage threshold (not implemented yet!).
global recent_global_view_indexes: table[ID, string, Index] of count &create_expire=5mins &default=0;
# Add events to the cluster framework to make this work.
redef Cluster::manager2worker_events += /Metrics::cluster_(filter_request|index_request)/;
redef Cluster::worker2manager_events += /Metrics::cluster_(filter_response|index_response|index_intermediate_response)/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
# This is done on all non-manager node types in the event that a metric is
# being collected somewhere other than a worker.
function data_added(filter: Filter, index: Index, val: count)
{
# If an intermediate update for this value was sent recently, don't send
# it again.
if ( [filter$id, filter$name, index] in recent_global_view_indexes )
return;
# If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that
# crosses the full threshold then it's a candidate to send as an
# intermediate update.
local pct_val = double_to_count(val / cluster_request_global_view_percent);
if ( check_notice(filter, index, pct_val) )
{
# kick off intermediate update
event Metrics::cluster_index_intermediate_response(filter$id, filter$name, index, val);
++recent_global_view_indexes[filter$id, filter$name, index];
}
}
event Metrics::send_data(uid: string, id: ID, filter_name: string, data: MetricTable)
{
#print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
local local_data: MetricTable;
local num_added = 0;
for ( index in data )
{
local_data[index] = data[index];
delete data[index];
# Only send cluster_send_in_groups_of at a time. Queue another
# event to send the next group.
if ( cluster_send_in_groups_of == ++num_added )
break;
}
local done = F;
# If data is empty, this metric is done.
if ( |data| == 0 )
done = T;
event Metrics::cluster_filter_response(uid, id, filter_name, local_data, done);
if ( ! done )
event Metrics::send_data(uid, id, filter_name, data);
}
event Metrics::cluster_filter_request(uid: string, id: ID, filter_name: string)
{
#print fmt("WORKER %s: received the cluster_filter_request event.", Cluster::node);
# Initiate sending all of the data for the requested filter.
event Metrics::send_data(uid, id, filter_name, store[id, filter_name]);
# Lookup the actual filter and reset it, the reference to the data
# currently stored will be maintained interally by the send_data event.
reset(filter_store[id, filter_name]);
}
event Metrics::cluster_index_request(uid: string, id: ID, filter_name: string, index: Index)
{
local val=0;
if ( index in store[id, filter_name] )
val = store[id, filter_name][index];
# fmt("WORKER %s: received the cluster_index_request event for %s=%d.", Cluster::node, index2str(index), val);
event Metrics::cluster_index_response(uid, id, filter_name, index, val);
}
@endif
@if ( Cluster::local_node_type() == Cluster::MANAGER )
# Manager's handle logging.
event Metrics::log_it(filter: Filter)
{
#print fmt("%.6f MANAGER: breaking %s filter for %s metric", network_time(), filter$name, filter$id);
local uid = unique_id("");
# Set some tracking variables.
requested_results[uid] = network_time();
filter_results[uid, filter$id, filter$name] = table();
# Request data from peers.
event Metrics::cluster_filter_request(uid, filter$id, filter$name);
# Schedule the log_it event for the next break period.
schedule filter$break_interval { Metrics::log_it(filter) };
}
# This is unlikely to be called often, but it's here in case there are metrics
# being collected by managers.
function data_added(filter: Filter, index: Index, val: count)
{
if ( check_notice(filter, index, val) )
do_notice(filter, index, val);
}
event Metrics::cluster_index_response(uid: string, id: ID, filter_name: string, index: Index, val: count)
{
#print fmt("%0.6f MANAGER: receiving index data from %s", network_time(), get_event_peer()$descr);
if ( [uid, id, filter_name, index] !in index_requests )
index_requests[uid, id, filter_name, index] = 0;
index_requests[uid, id, filter_name, index] += val;
local ir = index_requests[uid, id, filter_name, index];
++done_with[uid];
if ( Cluster::worker_count == done_with[uid] )
{
if ( check_notice(filter_store[id, filter_name], index, ir) )
do_notice(filter_store[id, filter_name], index, ir);
delete done_with[uid];
delete index_requests[uid, id, filter_name, index];
}
}
# Managers handle intermediate updates here.
event Metrics::cluster_index_intermediate_response(id: ID, filter_name: string, index: Index, val: count)
{
#print fmt("MANAGER: receiving intermediate index data from %s", get_event_peer()$descr);
#print fmt("MANAGER: requesting index data for %s", index2str(index));
local uid = unique_id("");
event Metrics::cluster_index_request(uid, id, filter_name, index);
++recent_global_view_indexes[id, filter_name, index];
}
event Metrics::cluster_filter_response(uid: string, id: ID, filter_name: string, data: MetricTable, done: bool)
{
#print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
local local_data = filter_results[uid, id, filter_name];
for ( index in data )
{
if ( index !in local_data )
local_data[index] = 0;
local_data[index] += data[index];
}
# Mark another worker as being "done" for this uid.
if ( done )
++done_with[uid];
# If the data has been collected from all peers, we are done and ready to log.
if ( Cluster::worker_count == done_with[uid] )
{
local ts = network_time();
# Log the time this was initially requested if it's available.
if ( uid in requested_results )
{
ts = requested_results[uid];
delete requested_results[uid];
}
write_log(ts, filter_store[id, filter_name], local_data);
# Clean up
delete filter_results[uid, id, filter_name];
delete done_with[uid];
}
}
@endif

View file

@ -1,320 +0,0 @@
##! The metrics framework provides a way to count and measure data.
@load base/frameworks/notice
module Metrics;
export {
## The metrics logging stream identifier.
redef enum Log::ID += { LOG };
## Identifiers for metrics to collect.
type ID: enum {
## Blank placeholder value.
NOTHING,
};
## The default interval used for "breaking" metrics and writing the
## current value to the logging stream.
const default_break_interval = 15mins &redef;
## This is the interval for how often threshold based notices will happen
## after they have already fired.
const renotice_interval = 1hr &redef;
## Represents a thing which is having metrics collected for it. An instance
## of this record type and a :bro:type:`Metrics::ID` together represent a
## single measurement.
type Index: record {
## Host is the value to which this metric applies.
host: addr &optional;
## A non-address related metric or a sub-key for an address based metric.
## An example might be successful SSH connections by client IP address
## where the client string would be the index value.
## Another example might be number of HTTP requests to a particular
## value in a Host header. This is an example of a non-host based
## metric since multiple IP addresses could respond for the same Host
## header value.
str: string &optional;
## The CIDR block that this metric applies to. This is typically
## only used internally for host based aggregation.
network: subnet &optional;
} &log;
## The record type that is used for logging metrics.
type Info: record {
## Timestamp at which the metric was "broken".
ts: time &log;
## What measurement the metric represents.
metric_id: ID &log;
## The name of the filter being logged. :bro:type:`Metrics::ID` values
## can have multiple filters which represent different perspectives on
## the data so this is necessary to understand the value.
filter_name: string &log;
## What the metric value applies to.
index: Index &log;
## The simple numeric value of the metric.
value: count &log;
};
# TODO: configure a metrics filter logging stream to log the current
# metrics configuration in case someone is looking through
# old logs and the configuration has changed since then.
## Filters define how the data from a metric is aggregated and handled.
## Filters can be used to set how often the measurements are cut or "broken"
## and logged or how the data within them is aggregated. It's also
## possible to disable logging and use filters for thresholding.
type Filter: record {
## The :bro:type:`Metrics::ID` that this filter applies to.
id: ID &optional;
## The name for this filter so that multiple filters can be
## applied to a single metrics to get a different view of the same
## metric data being collected (different aggregation, break, etc).
name: string &default="default";
## A predicate so that you can decide per index if you would like
## to accept the data being inserted.
pred: function(index: Index): bool &optional;
## Global mask by which you'd like to aggregate traffic.
aggregation_mask: count &optional;
## This is essentially a mapping table between addresses and subnets.
aggregation_table: table[subnet] of subnet &optional;
## The interval at which this filter should be "broken" and written
## to the logging stream. The counters are also reset to zero at
## this time so any threshold based detection needs to be set to a
## number that should be expected to happen within this period.
break_interval: interval &default=default_break_interval;
## This determines if the result of this filter is sent to the metrics
## logging stream. One use for the logging framework is as an internal
## thresholding and statistics gathering utility that is meant to
## never log but rather to generate notices and derive data.
log: bool &default=T;
## If this and a $notice_threshold value are set, this notice type
## will be generated by the metrics framework.
note: Notice::Type &optional;
## A straight threshold for generating a notice.
notice_threshold: count &optional;
## A series of thresholds at which to generate notices.
notice_thresholds: vector of count &optional;
## How often this notice should be raised for this filter. It
## will be generated everytime it crosses a threshold, but if the
## $break_interval is set to 5mins and this is set to 1hr the notice
## only be generated once per hour even if something crosses the
## threshold in every break interval.
notice_freq: interval &optional;
};
## Function to associate a metric filter with a metric ID.
##
## id: The metric ID that the filter should be associated with.
##
## filter: The record representing the filter configuration.
global add_filter: function(id: ID, filter: Filter);
## Add data into a :bro:type:`Metrics::ID`. This should be called when
## a script has measured some point value and is ready to increment the
## counters.
##
## id: The metric ID that the data represents.
##
## index: The metric index that the value is to be added to.
##
## increment: How much to increment the counter by.
global add_data: function(id: ID, index: Index, increment: count);
## Helper function to represent a :bro:type:`Metrics::Index` value as
## a simple string
##
## index: The metric index that is to be converted into a string.
##
## Returns: A string reprentation of the metric index.
global index2str: function(index: Index): string;
## Event that is used to "finish" metrics and adapt the metrics
## framework for clustered or non-clustered usage.
##
## ..note: This is primarily intended for internal use.
global log_it: event(filter: Filter);
## Event to access metrics records as they are passed to the logging framework.
global log_metrics: event(rec: Info);
## Type to store a table of metrics values. Interal use only!
type MetricTable: table[Index] of count &default=0;
}
redef record Notice::Info += {
metric_index: Index &log &optional;
};
global metric_filters: table[ID] of vector of Filter = table();
global filter_store: table[ID, string] of Filter = table();
# This is indexed by metric ID and stream filter name.
global store: table[ID, string] of MetricTable = table() &default=table();
# This function checks if a threshold has been crossed and generates a
# notice if it has. It is also used as a method to implement
# mid-break-interval threshold crossing detection for cluster deployments.
global check_notice: function(filter: Filter, index: Index, val: count): bool;
# This is hook for watching thresholds being crossed. It is called whenever
# index values are updated and the new val is given as the `val` argument.
global data_added: function(filter: Filter, index: Index, val: count);
# This stores the current threshold index for filters using the
# $notice_threshold and $notice_thresholds elements.
global thresholds: table[ID, string, Index] of count = {} &create_expire=renotice_interval &default=0;
event bro_init() &priority=5
{
Log::create_stream(Metrics::LOG, [$columns=Info, $ev=log_metrics]);
}
function index2str(index: Index): string
{
local out = "";
if ( index?$host )
out = fmt("%shost=%s", out, index$host);
if ( index?$network )
out = fmt("%s%snetwork=%s", out, |out|==0 ? "" : ", ", index$network);
if ( index?$str )
out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", index$str);
return fmt("metric_index(%s)", out);
}
function write_log(ts: time, filter: Filter, data: MetricTable)
{
for ( index in data )
{
local val = data[index];
local m: Info = [$ts=ts,
$metric_id=filter$id,
$filter_name=filter$name,
$index=index,
$value=val];
if ( filter$log )
Log::write(Metrics::LOG, m);
}
}
function reset(filter: Filter)
{
store[filter$id, filter$name] = table();
}
function add_filter(id: ID, filter: Filter)
{
if ( filter?$aggregation_table && filter?$aggregation_mask )
{
print "INVALID Metric filter: Defined $aggregation_table and $aggregation_mask.";
return;
}
if ( [id, filter$name] in store )
{
print fmt("INVALID Metric filter: Filter with name \"%s\" already exists.", filter$name);
return;
}
if ( filter?$notice_threshold && filter?$notice_thresholds )
{
print "INVALID Metric filter: Defined both $notice_threshold and $notice_thresholds";
return;
}
if ( ! filter?$id )
filter$id = id;
if ( id !in metric_filters )
metric_filters[id] = vector();
metric_filters[id][|metric_filters[id]|] = filter;
filter_store[id, filter$name] = filter;
store[id, filter$name] = table();
schedule filter$break_interval { Metrics::log_it(filter) };
}
function add_data(id: ID, index: Index, increment: count)
{
if ( id !in metric_filters )
return;
local filters = metric_filters[id];
# Try to add the data to all of the defined filters for the metric.
for ( filter_id in filters )
{
local filter = filters[filter_id];
# If this filter has a predicate, run the predicate and skip this
# index if the predicate return false.
if ( filter?$pred && ! filter$pred(index) )
next;
if ( index?$host )
{
if ( filter?$aggregation_mask )
{
index$network = mask_addr(index$host, filter$aggregation_mask);
delete index$host;
}
else if ( filter?$aggregation_table )
{
# Don't add the data if the aggregation table doesn't include
# the given host address.
if ( index$host !in filter$aggregation_table )
return;
index$network = filter$aggregation_table[index$host];
delete index$host;
}
}
local metric_tbl = store[id, filter$name];
if ( index !in metric_tbl )
metric_tbl[index] = 0;
metric_tbl[index] += increment;
data_added(filter, index, metric_tbl[index]);
}
}
function check_notice(filter: Filter, index: Index, val: count): bool
{
if ( (filter?$notice_threshold &&
[filter$id, filter$name, index] !in thresholds &&
val >= filter$notice_threshold) ||
(filter?$notice_thresholds &&
|filter$notice_thresholds| <= thresholds[filter$id, filter$name, index] &&
val >= filter$notice_thresholds[thresholds[filter$id, filter$name, index]]) )
return T;
else
return F;
}
function do_notice(filter: Filter, index: Index, val: count)
{
# We include $peer_descr here because the a manager count have actually
# generated the notice even though the current remote peer for the event
# calling this could be a worker if this is running as a cluster.
local n: Notice::Info = [$note=filter$note,
$n=val,
$metric_index=index,
$peer_descr=peer_description];
n$msg = fmt("Threshold crossed by %s %d/%d", index2str(index), val, filter$notice_threshold);
if ( index?$str )
n$sub = index$str;
if ( index?$host )
n$src = index$host;
# TODO: not sure where to put the network yet.
NOTICE(n);
# This just needs set to some value so that it doesn't refire the
# notice until it expires from the table or it crosses the next
# threshold in the case of vectors of thresholds.
++thresholds[filter$id, filter$name, index];
}

View file

@ -1,21 +0,0 @@
@load ./main
module Metrics;
event Metrics::log_it(filter: Filter)
{
local id = filter$id;
local name = filter$name;
write_log(network_time(), filter, store[id, name]);
reset(filter);
schedule filter$break_interval { Metrics::log_it(filter) };
}
function data_added(filter: Filter, index: Index, val: count)
{
if ( check_notice(filter, index, val) )
do_notice(filter, index, val);
}

View file

@ -68,6 +68,25 @@ export {
## the notice policy. ## the notice policy.
iconn: icmp_conn &optional; iconn: icmp_conn &optional;
## A file record if the notice is relted to a file. The
## reference to the actual fa_file record will be deleted after applying
## the notice policy.
f: fa_file &optional;
## A file unique ID if this notice is related to a file. If the $f
## field is provided, this will be automatically filled out.
fuid: string &log &optional;
## A mime type if the notice is related to a file. If the $f field
## is provided, this will be automatically filled out.
file_mime_type: string &log &optional;
## Frequently files can be "described" to give a bit more context.
## This field will typically be automatically filled out from an
## fa_file record. For example, if a notice was related to a
## file over HTTP, the URL of the request would be shown.
file_desc: string &log &optional;
## The transport protocol. Filled automatically when either conn, iconn ## The transport protocol. Filled automatically when either conn, iconn
## or p is specified. ## or p is specified.
proto: transport_proto &log &optional; proto: transport_proto &log &optional;
@ -431,9 +450,6 @@ hook Notice::notice(n: Notice::Info) &priority=-5
} }
} }
## This determines if a notice is being suppressed. It is only used
## internally as part of the mechanics for the global :bro:id:`NOTICE`
## function.
function is_being_suppressed(n: Notice::Info): bool function is_being_suppressed(n: Notice::Info): bool
{ {
if ( n?$identifier && [n$note, n$identifier] in suppressing ) if ( n?$identifier && [n$note, n$identifier] in suppressing )
@ -463,10 +479,28 @@ function apply_policy(n: Notice::Info)
if ( ! n?$ts ) if ( ! n?$ts )
n$ts = network_time(); n$ts = network_time();
if ( n?$f )
{
if ( ! n?$fuid )
n$fuid = n$f$id;
if ( ! n?$file_mime_type && n$f?$mime_type )
n$file_mime_type = n$f$mime_type;
n$file_desc = Files::describe(n$f);
if ( n$f?$conns && |n$f$conns| == 1 )
{
for ( id in n$f$conns )
n$conn = n$f$conns[id];
}
}
if ( n?$conn ) if ( n?$conn )
{ {
if ( ! n?$id ) if ( ! n?$id )
n$id = n$conn$id; n$id = n$conn$id;
if ( ! n?$uid ) if ( ! n?$uid )
n$uid = n$conn$uid; n$uid = n$conn$uid;
} }
@ -516,13 +550,15 @@ function apply_policy(n: Notice::Info)
if ( ! n?$suppress_for ) if ( ! n?$suppress_for )
n$suppress_for = default_suppression_interval; n$suppress_for = default_suppression_interval;
# Delete the connection record if it's there so we aren't sending that # Delete the connection and file records if they're there so we
# to remote machines. It can cause problems due to the size of the # aren't sending that to remote machines. It can cause problems
# connection record. # due to the size of those records.
if ( n?$conn ) if ( n?$conn )
delete n$conn; delete n$conn;
if ( n?$iconn ) if ( n?$iconn )
delete n$iconn; delete n$iconn;
if ( n?$f )
delete n$f;
} }
function internal_NOTICE(n: Notice::Info) function internal_NOTICE(n: Notice::Info)

View file

@ -1,2 +1,3 @@
@load ./utils
@load ./main @load ./main
@load ./netstats @load ./netstats

View file

@ -1,10 +1,12 @@
##! This script supports how Bro sets it's BPF capture filter. By default ##! This script supports how Bro sets it's BPF capture filter. By default
##! Bro sets an unrestricted filter that allows all traffic. If a filter ##! Bro sets a capture filter that allows all traffic. If a filter
##! is set on the command line, that filter takes precedence over the default ##! is set on the command line, that filter takes precedence over the default
##! open filter and all filters defined in Bro scripts with the ##! open filter and all filters defined in Bro scripts with the
##! :bro:id:`capture_filters` and :bro:id:`restrict_filters` variables. ##! :bro:id:`capture_filters` and :bro:id:`restrict_filters` variables.
@load base/frameworks/notice @load base/frameworks/notice
@load base/frameworks/analyzer
@load ./utils
module PacketFilter; module PacketFilter;
@ -14,11 +16,14 @@ export {
## Add notice types related to packet filter errors. ## Add notice types related to packet filter errors.
redef enum Notice::Type += { redef enum Notice::Type += {
## This notice is generated if a packet filter is unable to be compiled. ## This notice is generated if a packet filter cannot be compiled.
Compile_Failure, Compile_Failure,
## This notice is generated if a packet filter is fails to install. ## Generated if a packet filter is fails to install.
Install_Failure, Install_Failure,
## Generated when a notice takes too long to compile.
Too_Long_To_Compile_Filter
}; };
## The record type defining columns to be logged in the packet filter ## The record type defining columns to be logged in the packet filter
@ -42,83 +47,248 @@ export {
success: bool &log &default=T; success: bool &log &default=T;
}; };
## By default, Bro will examine all packets. If this is set to false, ## The BPF filter that is used by default to define what traffic should
## it will dynamically build a BPF filter that only select protocols ## be captured. Filters defined in :bro:id:`restrict_filters` will still
## for which the user has loaded a corresponding analysis script. ## be applied to reduce the captured traffic.
## The latter used to be default for Bro versions < 2.0. That has now const default_capture_filter = "ip or not ip" &redef;
## changed however to enable port-independent protocol analysis.
const all_packets = T &redef;
## Filter string which is unconditionally or'ed to the beginning of every ## Filter string which is unconditionally or'ed to the beginning of every
## dynamically built filter. ## dynamically built filter.
const unrestricted_filter = "" &redef; const unrestricted_filter = "" &redef;
## Filter string which is unconditionally and'ed to the beginning of every
## dynamically built filter. This is mostly used when a custom filter is being
## used but MPLS or VLAN tags are on the traffic.
const restricted_filter = "" &redef;
## The maximum amount of time that you'd like to allow for BPF filters to compile.
## If this time is exceeded, compensation measures may be taken by the framework
## to reduce the filter size. This threshold being crossed also results in
## the :bro:see:`PacketFilter::Too_Long_To_Compile_Filter` notice.
const max_filter_compile_time = 100msec &redef;
## Install a BPF filter to exclude some traffic. The filter should positively
## match what is to be excluded, it will be wrapped in a "not".
##
## filter_id: An arbitrary string that can be used to identify
## the filter.
##
## filter: A BPF expression of traffic that should be excluded.
##
## Returns: A boolean value to indicate if the filter was successfully
## installed or not.
global exclude: function(filter_id: string, filter: string): bool;
## Install a temporary filter to traffic which should not be passed through
## the BPF filter. The filter should match the traffic you don't want
## to see (it will be wrapped in a "not" condition).
##
## filter_id: An arbitrary string that can be used to identify
## the filter.
##
## filter: A BPF expression of traffic that should be excluded.
##
## length: The duration for which this filter should be put in place.
##
## Returns: A boolean value to indicate if the filter was successfully
## installed or not.
global exclude_for: function(filter_id: string, filter: string, span: interval): bool;
## Call this function to build and install a new dynamically built ## Call this function to build and install a new dynamically built
## packet filter. ## packet filter.
global install: function(); global install: function(): bool;
## A data structure to represent filter generating plugins.
type FilterPlugin: record {
## A function that is directly called when generating the complete filter.
func : function();
};
## API function to register a new plugin for dynamic restriction filters.
global register_filter_plugin: function(fp: FilterPlugin);
## Enables the old filtering approach of "only watch common ports for
## analyzed protocols".
##
## Unless you know what you are doing, leave this set to F.
const enable_auto_protocol_capture_filters = F &redef;
## This is where the default packet filter is stored and it should not ## This is where the default packet filter is stored and it should not
## normally be modified by users. ## normally be modified by users.
global default_filter = "<not set yet>"; global current_filter = "<not set yet>";
} }
global dynamic_restrict_filters: table[string] of string = {};
# Track if a filter is currently building so functions that would ultimately
# install a filter immediately can still be used but they won't try to build or
# install the filter.
global currently_building = F;
# Internal tracking for if the the filter being built has possibly been changed.
global filter_changed = F;
global filter_plugins: set[FilterPlugin] = {};
redef enum PcapFilterID += { redef enum PcapFilterID += {
DefaultPcapFilter, DefaultPcapFilter,
FilterTester,
}; };
function combine_filters(lfilter: string, rfilter: string, op: string): string function test_filter(filter: string): bool
{ {
if ( lfilter == "" && rfilter == "" ) if ( ! precompile_pcap_filter(FilterTester, filter) )
return ""; {
else if ( lfilter == "" ) # The given filter was invalid
return rfilter; # TODO: generate a notice.
else if ( rfilter == "" ) return F;
return lfilter; }
else return T;
return fmt("(%s) %s (%s)", lfilter, op, rfilter);
} }
function build_default_filter(): string # This tracks any changes for filtering mechanisms that play along nice
# and set filter_changed to T.
event filter_change_tracking()
{
if ( filter_changed )
install();
schedule 5min { filter_change_tracking() };
}
event bro_init() &priority=5
{
Log::create_stream(PacketFilter::LOG, [$columns=Info]);
# Preverify the capture and restrict filters to give more granular failure messages.
for ( id in capture_filters )
{
if ( ! test_filter(capture_filters[id]) )
Reporter::fatal(fmt("Invalid capture_filter named '%s' - '%s'", id, capture_filters[id]));
}
for ( id in restrict_filters )
{
if ( ! test_filter(restrict_filters[id]) )
Reporter::fatal(fmt("Invalid restrict filter named '%s' - '%s'", id, restrict_filters[id]));
}
}
event bro_init() &priority=-5
{
install();
event filter_change_tracking();
}
function register_filter_plugin(fp: FilterPlugin)
{
add filter_plugins[fp];
}
event remove_dynamic_filter(filter_id: string)
{
if ( filter_id in dynamic_restrict_filters )
{
delete dynamic_restrict_filters[filter_id];
install();
}
}
function exclude(filter_id: string, filter: string): bool
{
if ( ! test_filter(filter) )
return F;
dynamic_restrict_filters[filter_id] = filter;
install();
return T;
}
function exclude_for(filter_id: string, filter: string, span: interval): bool
{
if ( exclude(filter_id, filter) )
{
schedule span { remove_dynamic_filter(filter_id) };
return T;
}
return F;
}
function build(): string
{ {
if ( cmd_line_bpf_filter != "" ) if ( cmd_line_bpf_filter != "" )
# Return what the user specified on the command line; # Return what the user specified on the command line;
return cmd_line_bpf_filter; return cmd_line_bpf_filter;
if ( all_packets ) currently_building = T;
# Return an "always true" filter.
return "ip or not ip";
# Build filter dynamically. # Generate all of the plugin based filters.
for ( plugin in filter_plugins )
{
plugin$func();
}
# First the capture_filter.
local cfilter = ""; local cfilter = "";
for ( id in capture_filters ) if ( |capture_filters| == 0 && ! enable_auto_protocol_capture_filters )
cfilter = combine_filters(cfilter, capture_filters[id], "or"); cfilter = default_capture_filter;
# Then the restrict_filter. for ( id in capture_filters )
cfilter = combine_filters(cfilter, "or", capture_filters[id]);
if ( enable_auto_protocol_capture_filters )
cfilter = combine_filters(cfilter, "or", Analyzer::get_bpf());
# Apply the restriction filters.
local rfilter = ""; local rfilter = "";
for ( id in restrict_filters ) for ( id in restrict_filters )
rfilter = combine_filters(rfilter, restrict_filters[id], "and"); rfilter = combine_filters(rfilter, "and", restrict_filters[id]);
# Apply the dynamic restriction filters.
for ( filt in dynamic_restrict_filters )
rfilter = combine_filters(rfilter, "and", string_cat("not (", dynamic_restrict_filters[filt], ")"));
# Finally, join them into one filter. # Finally, join them into one filter.
local filter = combine_filters(rfilter, cfilter, "and"); local filter = combine_filters(cfilter, "and", rfilter);
if ( unrestricted_filter != "" )
filter = combine_filters(unrestricted_filter, filter, "or");
if ( unrestricted_filter != "" )
filter = combine_filters(unrestricted_filter, "or", filter);
if ( restricted_filter != "" )
filter = combine_filters(restricted_filter, "and", filter);
currently_building = F;
return filter; return filter;
} }
function install() function install(): bool
{ {
default_filter = build_default_filter(); if ( currently_building )
return F;
if ( ! precompile_pcap_filter(DefaultPcapFilter, default_filter) ) local tmp_filter = build();
# No need to proceed if the filter hasn't changed.
if ( tmp_filter == current_filter )
return F;
local ts = current_time();
if ( ! precompile_pcap_filter(DefaultPcapFilter, tmp_filter) )
{ {
NOTICE([$note=Compile_Failure, NOTICE([$note=Compile_Failure,
$msg=fmt("Compiling packet filter failed"), $msg=fmt("Compiling packet filter failed"),
$sub=default_filter]); $sub=tmp_filter]);
Reporter::fatal(fmt("Bad pcap filter '%s'", default_filter)); if ( network_time() == 0.0 )
Reporter::fatal(fmt("Bad pcap filter '%s'", tmp_filter));
else
Reporter::warning(fmt("Bad pcap filter '%s'", tmp_filter));
} }
local diff = current_time()-ts;
if ( diff > max_filter_compile_time )
NOTICE([$note=Too_Long_To_Compile_Filter,
$msg=fmt("A BPF filter is taking longer than %0.1f seconds to compile", diff)]);
# Set it to the current filter if it passed precompiling
current_filter = tmp_filter;
# Do an audit log for the packet filter. # Do an audit log for the packet filter.
local info: Info; local info: Info;
@ -129,7 +299,7 @@ function install()
info$ts = current_time(); info$ts = current_time();
info$init = T; info$init = T;
} }
info$filter = default_filter; info$filter = current_filter;
if ( ! install_pcap_filter(DefaultPcapFilter) ) if ( ! install_pcap_filter(DefaultPcapFilter) )
{ {
@ -137,15 +307,13 @@ function install()
info$success = F; info$success = F;
NOTICE([$note=Install_Failure, NOTICE([$note=Install_Failure,
$msg=fmt("Installing packet filter failed"), $msg=fmt("Installing packet filter failed"),
$sub=default_filter]); $sub=current_filter]);
} }
if ( reading_live_traffic() || reading_traces() ) if ( reading_live_traffic() || reading_traces() )
Log::write(PacketFilter::LOG, info); Log::write(PacketFilter::LOG, info);
}
event bro_init() &priority=10 # Update the filter change tracking
{ filter_changed = F;
Log::create_stream(PacketFilter::LOG, [$columns=Info]); return T;
PacketFilter::install();
} }

View file

@ -13,7 +13,7 @@ export {
}; };
## This is the interval between individual statistics collection. ## This is the interval between individual statistics collection.
const stats_collection_interval = 10secs; const stats_collection_interval = 5min;
} }
event net_stats_update(last_stat: NetStats) event net_stats_update(last_stat: NetStats)

View file

@ -0,0 +1,58 @@
module PacketFilter;
export {
## Takes a :bro:type:`port` and returns a BPF expression which will
## match the port.
##
## p: The port.
##
## Returns: A valid BPF filter string for matching the port.
global port_to_bpf: function(p: port): string;
## Create a BPF filter to sample IPv4 and IPv6 traffic.
##
## num_parts: The number of parts the traffic should be split into.
##
## this_part: The part of the traffic this filter will accept. 0-based.
global sampling_filter: function(num_parts: count, this_part: count): string;
## Combines two valid BPF filter strings with a string based operator
## to form a new filter.
##
## lfilter: Filter which will go on the left side.
##
## op: Operation being applied (typically "or" or "and").
##
## rfilter: Filter which will go on the right side.
##
## Returns: A new string representing the two filters combined with
## the operator. Either filter being an empty string will
## still result in a valid filter.
global combine_filters: function(lfilter: string, op: string, rfilter: string): string;
}
function port_to_bpf(p: port): string
{
local tp = get_port_transport_proto(p);
return cat(tp, " and ", fmt("port %d", p));
}
function combine_filters(lfilter: string, op: string, rfilter: string): string
{
if ( lfilter == "" && rfilter == "" )
return "";
else if ( lfilter == "" )
return rfilter;
else if ( rfilter == "" )
return lfilter;
else
return fmt("(%s) %s (%s)", lfilter, op, rfilter);
}
function sampling_filter(num_parts: count, this_part: count): string
{
local v4_filter = fmt("ip and ((ip[14:2]+ip[18:2]) - (%d*((ip[14:2]+ip[18:2])/%d)) == %d)", num_parts, num_parts, this_part);
# TODO: this is probably a fairly suboptimal filter, but it should work for now.
local v6_filter = fmt("ip6 and ((ip6[22:2]+ip6[38:2]) - (%d*((ip6[22:2]+ip6[38:2])/%d)) == %d)", num_parts, num_parts, this_part);
return combine_filters(v4_filter, "or", v6_filter);
}

View file

@ -9,7 +9,7 @@
##! Note that this framework deals with the handling of internally generated ##! Note that this framework deals with the handling of internally generated
##! reporter messages, for the interface in to actually creating interface ##! reporter messages, for the interface in to actually creating interface
##! into actually creating reporter messages from the scripting layer, use ##! into actually creating reporter messages from the scripting layer, use
##! the built-in functions in :doc:`/scripts/base/reporter.bif`. ##! the built-in functions in :doc:`/scripts/base/bif/reporter.bif`.
module Reporter; module Reporter;

View file

@ -29,6 +29,8 @@ export {
minor: count &optional; minor: count &optional;
## Minor subversion number ## Minor subversion number
minor2: count &optional; minor2: count &optional;
## Minor updates number
minor3: count &optional;
## Additional version string (e.g. "beta42") ## Additional version string (e.g. "beta42")
addl: string &optional; addl: string &optional;
} &log; } &log;
@ -146,10 +148,10 @@ function parse(unparsed_version: string): Description
if ( /^[\/\-\._v\(]/ in sv ) if ( /^[\/\-\._v\(]/ in sv )
sv = strip(sub(version_parts[2], /^\(?[\/\-\._v\(]/, "")); sv = strip(sub(version_parts[2], /^\(?[\/\-\._v\(]/, ""));
local version_numbers = split_n(sv, /[\-\._,\[\(\{ ]/, F, 3); local version_numbers = split_n(sv, /[\-\._,\[\(\{ ]/, F, 3);
if ( 4 in version_numbers && version_numbers[4] != "" ) if ( 5 in version_numbers && version_numbers[5] != "" )
v$addl = strip(version_numbers[4]); v$addl = strip(version_numbers[5]);
else if ( 3 in version_parts && version_parts[3] != "" && else if ( 3 in version_parts && version_parts[3] != "" &&
version_parts[3] != ")" ) version_parts[3] != ")" )
{ {
if ( /^[[:blank:]]*\([a-zA-Z0-9\-\._[:blank:]]*\)/ in version_parts[3] ) if ( /^[[:blank:]]*\([a-zA-Z0-9\-\._[:blank:]]*\)/ in version_parts[3] )
{ {
@ -178,6 +180,8 @@ function parse(unparsed_version: string): Description
} }
} }
if ( 4 in version_numbers && version_numbers[4] != "" )
v$minor3 = extract_count(version_numbers[4]);
if ( 3 in version_numbers && version_numbers[3] != "" ) if ( 3 in version_numbers && version_numbers[3] != "" )
v$minor2 = extract_count(version_numbers[3]); v$minor2 = extract_count(version_numbers[3]);
if ( 2 in version_numbers && version_numbers[2] != "" ) if ( 2 in version_numbers && version_numbers[2] != "" )
@ -332,8 +336,25 @@ function cmp_versions(v1: Version, v2: Version): int
return v1?$minor2 ? 1 : -1; return v1?$minor2 ? 1 : -1;
} }
if ( v1?$minor3 && v2?$minor3 )
{
if ( v1$minor3 < v2$minor3 )
return -1;
if ( v1$minor3 > v2$minor3 )
return 1;
}
else
{
if ( !v1?$minor3 && !v2?$minor3 )
{ }
else
return v1?$minor3 ? 1 : -1;
}
if ( v1?$addl && v2?$addl ) if ( v1?$addl && v2?$addl )
{
return strcmp(v1$addl, v2$addl); return strcmp(v1$addl, v2$addl);
}
else else
{ {
if ( !v1?$addl && !v2?$addl ) if ( !v1?$addl && !v2?$addl )
@ -341,6 +362,9 @@ function cmp_versions(v1: Version, v2: Version): int
else else
return v1?$addl ? 1 : -1; return v1?$addl ? 1 : -1;
} }
# A catcher return that should never be reached...hopefully
return 0;
} }
function software_endpoint_name(id: conn_id, host: addr): string function software_endpoint_name(id: conn_id, host: addr): string
@ -351,10 +375,11 @@ function software_endpoint_name(id: conn_id, host: addr): string
# Convert a version into a string "a.b.c-x". # Convert a version into a string "a.b.c-x".
function software_fmt_version(v: Version): string function software_fmt_version(v: Version): string
{ {
return fmt("%d.%d.%d%s", return fmt("%s%s%s%s%s",
v?$major ? v$major : 0, v?$major ? fmt("%d", v$major) : "0",
v?$minor ? v$minor : 0, v?$minor ? fmt(".%d", v$minor) : "",
v?$minor2 ? v$minor2 : 0, v?$minor2 ? fmt(".%d", v$minor2) : "",
v?$minor3 ? fmt(".%d", v$minor3) : "",
v?$addl ? fmt("-%s", v$addl) : ""); v?$addl ? fmt("-%s", v$addl) : "");
} }

View file

@ -1,4 +1,5 @@
@load ./main @load ./main
@load ./plugins
# The cluster framework must be loaded first. # The cluster framework must be loaded first.
@load base/frameworks/cluster @load base/frameworks/cluster

View file

@ -0,0 +1,580 @@
##! This implements transparent cluster support for the SumStats framework.
##! Do not load this file directly. It's only meant to be loaded automatically
##! and will be depending on if the cluster framework has been enabled.
##! The goal of this script is to make sumstats calculation completely and
##! transparently automated when running on a cluster.
@load base/frameworks/cluster
@load ./main
module SumStats;
export {
## The percent of the full threshold value that needs to be met on a single worker
## for that worker to send the value to its manager in order for it to request a
## global view for that value. There is no requirement that the manager requests
## a global view for the key since it may opt not to if it requested a global view
## for the key recently.
const cluster_request_global_view_percent = 0.2 &redef;
## This is to deal with intermediate update overload. A manager will only allow
## this many intermediate update requests to the workers to be inflight at any
## given time. Requested intermediate updates are currently thrown out and not
## performed. In practice this should hopefully have a minimal effect.
const max_outstanding_global_views = 10 &redef;
## Event sent by the manager in a cluster to initiate the collection of values for
## a sumstat.
global cluster_ss_request: event(uid: string, ss_name: string, cleanup: bool);
## Event sent by nodes that are collecting sumstats after receiving a request for
## the sumstat from the manager.
#global cluster_ss_response: event(uid: string, ss_name: string, data: ResultTable, done: bool, cleanup: bool);
## This event is sent by the manager in a cluster to initiate the collection of
## a single key value from a sumstat. It's typically used to get intermediate
## updates before the break interval triggers to speed detection of a value
## crossing a threshold.
global cluster_get_result: event(uid: string, ss_name: string, key: Key, cleanup: bool);
## This event is sent by nodes in response to a
## :bro:id:`SumStats::cluster_get_result` event.
global cluster_send_result: event(uid: string, ss_name: string, key: Key, result: Result, cleanup: bool);
## This is sent by workers to indicate that they crossed the percent
## of the current threshold by the percentage defined globally in
## :bro:id:`SumStats::cluster_request_global_view_percent`
global cluster_key_intermediate_response: event(ss_name: string, key: SumStats::Key);
## This event is scheduled internally on workers to send result chunks.
global send_data: event(uid: string, ss_name: string, data: ResultTable, cleanup: bool);
global get_a_key: event(uid: string, ss_name: string, cleanup: bool &default=F);
global send_a_key: event(uid: string, ss_name: string, key: Key);
global send_no_key: event(uid: string, ss_name: string);
## This event is generated when a threshold is crossed.
global cluster_threshold_crossed: event(ss_name: string, key: SumStats::Key, thold_index: count);
}
# Add events to the cluster framework to make this work.
redef Cluster::manager2worker_events += /SumStats::cluster_(ss_request|get_result|threshold_crossed)/;
redef Cluster::manager2worker_events += /SumStats::(thresholds_reset|get_a_key)/;
redef Cluster::worker2manager_events += /SumStats::cluster_(ss_response|send_result|key_intermediate_response)/;
redef Cluster::worker2manager_events += /SumStats::(send_a_key|send_no_key)/;
@if ( Cluster::local_node_type() != Cluster::MANAGER )
# This variable is maintained to know what keys have recently sent as
# intermediate updates so they don't overwhelm their manager. The count that is
# yielded is the number of times the percentage threshold has been crossed and
# an intermediate result has been received.
global recent_global_view_keys: table[string, Key] of count &create_expire=1min &default=0;
# Result tables indexed on a uid that are currently being sent to the
# manager.
global sending_results: table[string] of ResultTable = table() &create_expire=1min;
# This is done on all non-manager node types in the event that a sumstat is
# being collected somewhere other than a worker.
function data_added(ss: SumStat, key: Key, result: Result)
{
# If an intermediate update for this value was sent recently, don't send
# it again.
if ( [ss$name, key] in recent_global_view_keys )
return;
# If val is 5 and global view % is 0.1 (10%), pct_val will be 50. If that
# crosses the full threshold then it's a candidate to send as an
# intermediate update.
if ( check_thresholds(ss, key, result, cluster_request_global_view_percent) )
{
# kick off intermediate update
event SumStats::cluster_key_intermediate_response(ss$name, key);
++recent_global_view_keys[ss$name, key];
}
}
#event SumStats::send_data(uid: string, ss_name: string, cleanup: bool)
# {
# #print fmt("WORKER %s: sending data for uid %s...", Cluster::node, uid);
#
# local local_data: ResultTable = table();
# local incoming_data: ResultTable = cleanup ? data : copy(data);
#
# local num_added = 0;
# for ( key in incoming_data )
# {
# local_data[key] = incoming_data[key];
# delete incoming_data[key];
#
# # Only send cluster_send_in_groups_of at a time. Queue another
# # event to send the next group.
# if ( cluster_send_in_groups_of == ++num_added )
# break;
# }
#
# local done = F;
# # If data is empty, this sumstat is done.
# if ( |incoming_data| == 0 )
# done = T;
#
# # Note: copy is needed to compensate serialization caching issue. This should be
# # changed to something else later.
# event SumStats::cluster_ss_response(uid, ss_name, copy(local_data), done, cleanup);
# if ( ! done )
# schedule 0.01 sec { SumStats::send_data(uid, T) };
# }
event SumStats::get_a_key(uid: string, ss_name: string, cleanup: bool)
{
if ( uid in sending_results )
{
if ( |sending_results[uid]| == 0 )
{
event SumStats::send_no_key(uid, ss_name);
}
else
{
for ( key in sending_results[uid] )
{
event SumStats::send_a_key(uid, ss_name, key);
# break to only send one.
break;
}
}
}
else if ( !cleanup && ss_name in result_store && |result_store[ss_name]| > 0 )
{
if ( |result_store[ss_name]| == 0 )
{
event SumStats::send_no_key(uid, ss_name);
}
else
{
for ( key in result_store[ss_name] )
{
event SumStats::send_a_key(uid, ss_name, key);
# break to only send one.
break;
}
}
}
else
{
event SumStats::send_no_key(uid, ss_name);
}
}
event SumStats::cluster_ss_request(uid: string, ss_name: string, cleanup: bool)
{
#print fmt("WORKER %s: received the cluster_ss_request event for %s.", Cluster::node, id);
# Create a back store for the result
sending_results[uid] = (ss_name in result_store) ? result_store[ss_name] : table();
# Lookup the actual sumstats and reset it, the reference to the data
# currently stored will be maintained internally from the
# sending_results table.
if ( cleanup && ss_name in stats_store )
reset(stats_store[ss_name]);
}
event SumStats::cluster_get_result(uid: string, ss_name: string, key: Key, cleanup: bool)
{
#print fmt("WORKER %s: received the cluster_get_result event for %s=%s.", Cluster::node, key2str(key), data);
if ( cleanup ) # data will implicitly be in sending_results (i know this isn't great)
{
if ( uid in sending_results && key in sending_results[uid] )
{
# Note: copy is needed to compensate serialization caching issue. This should be
# changed to something else later.
event SumStats::cluster_send_result(uid, ss_name, key, copy(sending_results[uid][key]), cleanup);
delete sending_results[uid][key];
}
else
{
# We need to send an empty response if we don't have the data so that the manager
# can know that it heard back from all of the workers.
event SumStats::cluster_send_result(uid, ss_name, key, table(), cleanup);
}
}
else
{
if ( ss_name in result_store && key in result_store[ss_name] )
{
event SumStats::cluster_send_result(uid, ss_name, key, copy(result_store[ss_name][key]), cleanup);
}
else
{
# We need to send an empty response if we don't have the data so that the manager
# can know that it heard back from all of the workers.
event SumStats::cluster_send_result(uid, ss_name, key, table(), cleanup);
}
}
}
event SumStats::cluster_threshold_crossed(ss_name: string, key: SumStats::Key, thold_index: count)
{
if ( ss_name !in threshold_tracker )
threshold_tracker[ss_name] = table();
threshold_tracker[ss_name][key] = thold_index;
}
event SumStats::thresholds_reset(ss_name: string)
{
delete threshold_tracker[ss_name];
}
@endif
@if ( Cluster::local_node_type() == Cluster::MANAGER )
# This variable is maintained by manager nodes as they collect and aggregate
# results.
# Index on a uid.
global stats_keys: table[string] of set[Key] &create_expire=1min;
# This variable is maintained by manager nodes to track how many "dones" they
# collected per collection unique id. Once the number of results for a uid
# matches the number of peer nodes that results should be coming from, the
# result is written out and deleted from here.
# Indexed on a uid.
# TODO: add an &expire_func in case not all results are received.
global done_with: table[string] of count &create_expire=1min &default=0;
# This variable is maintained by managers to track intermediate responses as
# they are getting a global view for a certain key.
# Indexed on a uid.
global key_requests: table[string] of Result &create_expire=1min;
# This variable is maintained by managers to prevent overwhelming communication due
# to too many intermediate updates. Each sumstat is tracked separately so that
# one won't overwhelm and degrade other quieter sumstats.
# Indexed on a sumstat id.
global outstanding_global_views: table[string] of count &create_expire=1min &default=0;
const zero_time = double_to_time(0.0);
# Managers handle logging.
event SumStats::finish_epoch(ss: SumStat)
{
if ( network_time() > zero_time )
{
#print fmt("%.6f MANAGER: breaking %s sumstat", network_time(), ss$name);
local uid = unique_id("");
if ( uid in stats_keys )
delete stats_keys[uid];
stats_keys[uid] = set();
# Request data from peers.
event SumStats::cluster_ss_request(uid, ss$name, T);
done_with[uid] = 0;
#print fmt("get_key by uid: %s", uid);
event SumStats::get_a_key(uid, ss$name, T);
}
# Schedule the next finish_epoch event.
schedule ss$epoch { SumStats::finish_epoch(ss) };
}
# This is unlikely to be called often, but it's here in
# case there are sumstats being collected by managers.
function data_added(ss: SumStat, key: Key, result: Result)
{
if ( check_thresholds(ss, key, result, 1.0) )
{
threshold_crossed(ss, key, result);
event SumStats::cluster_threshold_crossed(ss$name, key, threshold_tracker[ss$name][key]);
}
}
function handle_end_of_result_collection(uid: string, ss_name: string, key: Key, cleanup: bool)
{
#print fmt("worker_count:%d :: done_with:%d", Cluster::worker_count, done_with[uid]);
local ss = stats_store[ss_name];
local ir = key_requests[uid];
if ( check_thresholds(ss, key, ir, 1.0) )
{
threshold_crossed(ss, key, ir);
event SumStats::cluster_threshold_crossed(ss_name, key, threshold_tracker[ss_name][key]);
}
if ( cleanup )
{
# This is done here because "cleanup" implicitly means
# it's the end of an epoch.
if ( ss?$epoch_result && |ir| > 0 )
{
local now = network_time();
ss$epoch_result(now, key, ir);
}
# Check that there is an outstanding view before subtracting.
# Global views only apply to non-dynamic requests. Dynamic
# requests must be serviced.
if ( outstanding_global_views[ss_name] > 0 )
--outstanding_global_views[ss_name];
}
delete key_requests[uid];
delete done_with[uid];
}
function request_all_current_keys(uid: string, ss_name: string, cleanup: bool)
{
#print "request_all_current_keys";
if ( uid in stats_keys && |stats_keys[uid]| > 0 )
{
#print fmt(" -- %d remaining keys here", |stats_keys[uid]|);
for ( key in stats_keys[uid] )
{
done_with[uid] = 0;
event SumStats::cluster_get_result(uid, ss_name, key, cleanup);
when ( uid in done_with && Cluster::worker_count == done_with[uid] )
{
#print "done getting result";
handle_end_of_result_collection(uid, ss_name, key, cleanup);
request_all_current_keys(uid, ss_name, cleanup);
}
delete stats_keys[uid][key];
break; # only a single key
}
}
else
{
# Get more keys! And this breaks us out of the evented loop.
done_with[uid] = 0;
#print fmt("get_key by uid: %s", uid);
event SumStats::get_a_key(uid, ss_name, cleanup);
}
}
event SumStats::send_no_key(uid: string, ss_name: string)
{
#print "send_no_key";
++done_with[uid];
if ( Cluster::worker_count == done_with[uid] )
{
delete done_with[uid];
if ( |stats_keys[uid]| > 0 )
{
#print "we need more keys!";
# Now that we have a key from each worker, lets
# grab all of the results.
request_all_current_keys(uid, ss_name, T);
}
else
{
#print "we're out of keys!";
local ss = stats_store[ss_name];
if ( ss?$epoch_finished )
ss$epoch_finished(network_time());
}
}
}
event SumStats::send_a_key(uid: string, ss_name: string, key: Key)
{
#print fmt("send_a_key %s", key);
if ( uid !in stats_keys )
{
# no clue what happened here
return;
}
if ( key !in stats_keys[uid] )
add stats_keys[uid][key];
++done_with[uid];
if ( Cluster::worker_count == done_with[uid] )
{
delete done_with[uid];
if ( |stats_keys[uid]| > 0 )
{
#print "we need more keys!";
# Now that we have a key from each worker, lets
# grab all of the results.
request_all_current_keys(uid, ss_name, T);
}
else
{
#print "we're out of keys!";
local ss = stats_store[ss_name];
if ( ss?$epoch_finished )
ss$epoch_finished(network_time());
}
}
}
event SumStats::cluster_send_result(uid: string, ss_name: string, key: Key, result: Result, cleanup: bool)
{
#print "cluster_send_result";
#print fmt("%0.6f MANAGER: receiving key data from %s - %s=%s", network_time(), get_event_peer()$descr, key2str(key), result);
# We only want to try and do a value merge if there are actually measured datapoints
# in the Result.
if ( uid !in key_requests || |key_requests[uid]| == 0 )
key_requests[uid] = result;
else
key_requests[uid] = compose_results(key_requests[uid], result);
# Mark that a worker is done.
++done_with[uid];
#if ( Cluster::worker_count == done_with[uid] )
# {
# print "done";
# handle_end_of_result_collection(uid, ss_name, key, cleanup);
# }
}
# Managers handle intermediate updates here.
event SumStats::cluster_key_intermediate_response(ss_name: string, key: Key)
{
#print fmt("MANAGER: receiving intermediate key data from %s", get_event_peer()$descr);
#print fmt("MANAGER: requesting key data for %s", key2str(key));
if ( ss_name in outstanding_global_views &&
|outstanding_global_views[ss_name]| > max_outstanding_global_views )
{
# Don't do this intermediate update. Perhaps at some point in the future
# we will queue and randomly select from these ignored intermediate
# update requests.
return;
}
++outstanding_global_views[ss_name];
local uid = unique_id("");
done_with[uid] = 0;
event SumStats::cluster_get_result(uid, ss_name, key, F);
when ( uid in done_with && Cluster::worker_count == done_with[uid] )
{
handle_end_of_result_collection(uid, ss_name, key, F);
}
timeout 1.1min
{
Reporter::warning(fmt("Dynamic SumStat intermediate key request for %s (%s) took longer than 1 minute and was automatically cancelled.", ss_name, key));
}
}
#event SumStats::cluster_ss_response(uid: string, ss_name: string, data: ResultTable, done: bool, cleanup: bool)
# {
# #print fmt("MANAGER: receiving results from %s", get_event_peer()$descr);
#
# # Mark another worker as being "done" for this uid.
# if ( done )
# ++done_with[uid];
#
# # We had better only be getting requests for stuff that exists.
# if ( ss_name !in stats_store )
# return;
#
# if ( uid !in stats_keys )
# stats_keys[uid] = table();
#
# local local_data = stats_keys[uid];
# local ss = stats_store[ss_name];
#
# for ( key in data )
# {
# if ( key in local_data )
# local_data[key] = compose_results(local_data[key], data[key]);
# else
# local_data[key] = data[key];
#
# # If a stat is done being collected, thresholds for each key
# # need to be checked so we're doing it here to avoid doubly
# # iterating over each key.
# if ( Cluster::worker_count == done_with[uid] )
# {
# if ( check_thresholds(ss, key, local_data[key], 1.0) )
# {
# threshold_crossed(ss, key, local_data[key]);
# event SumStats::cluster_threshold_crossed(ss$name, key, threshold_tracker[ss$name][key]);
# }
# }
# }
#
# # If the data has been collected from all peers, we are done and ready to finish.
# if ( cleanup && Cluster::worker_count == done_with[uid] )
# {
# local now = network_time();
# if ( ss?$epoch_result )
# {
# for ( key in local_data )
# ss$epoch_result(now, key, local_data[key]);
# }
#
# if ( ss?$epoch_finished )
# ss$epoch_finished(now);
#
# # Clean up
# delete stats_keys[uid];
# delete done_with[uid];
# reset(ss);
# }
# }
#function request(ss_name: string): ResultTable
# {
# # This only needs to be implemented this way for cluster compatibility.
# local uid = unique_id("dyn-");
# stats_keys[uid] = table();
# done_with[uid] = 0;
# event SumStats::cluster_ss_request(uid, ss_name, F);
#
# return when ( uid in done_with && Cluster::worker_count == done_with[uid] )
# {
# if ( uid in stats_keys )
# {
# local ss_result = stats_keys[uid];
# # Clean up
# delete stats_keys[uid];
# delete done_with[uid];
# reset(stats_store[ss_name]);
# return ss_result;
# }
# else
# return table();
# }
# timeout 1.1min
# {
# Reporter::warning(fmt("Dynamic SumStat request for %s took longer than 1 minute and was automatically cancelled.", ss_name));
# return table();
# }
# }
function request_key(ss_name: string, key: Key): Result
{
local uid = unique_id("");
done_with[uid] = 0;
key_requests[uid] = table();
event SumStats::cluster_get_result(uid, ss_name, key, F);
return when ( uid in done_with && Cluster::worker_count == done_with[uid] )
{
#print "done with request_key";
local result = key_requests[uid];
# Clean up
delete key_requests[uid];
delete done_with[uid];
return result;
}
timeout 1.1min
{
Reporter::warning(fmt("Dynamic SumStat key request for %s (%s) took longer than 1 minute and was automatically cancelled.", ss_name, key));
return table();
}
}
@endif

View file

@ -0,0 +1,529 @@
##! The summary statistics framework provides a way to
##! summarize large streams of data into simple reduced
##! measurements.
module SumStats;
export {
## The various calculations are all defined as plugins.
type Calculation: enum {
PLACEHOLDER
};
## Represents a thing which is having summarization
## results collected for it.
type Key: record {
## A non-address related summarization or a sub-key for
## an address based summarization. An example might be
## successful SSH connections by client IP address
## where the client string would be the key value.
## Another example might be number of HTTP requests to
## a particular value in a Host header. This is an
## example of a non-host based metric since multiple
## IP addresses could respond for the same Host
## header value.
str: string &optional;
## Host is the value to which this metric applies.
host: addr &optional;
};
## Represents data being added for a single observation.
## Only supply a single field at a time!
type Observation: record {
## Count value.
num: count &optional;
## Double value.
dbl: double &optional;
## String value.
str: string &optional;
};
type Reducer: record {
## Observation stream identifier for the reducer
## to attach to.
stream: string;
## The calculations to perform on the data points.
apply: set[Calculation];
## A predicate so that you can decide per key if you
## would like to accept the data being inserted.
pred: function(key: SumStats::Key, obs: SumStats::Observation): bool &optional;
## A function to normalize the key. This can be used to aggregate or
## normalize the entire key.
normalize_key: function(key: SumStats::Key): Key &optional;
};
## Value calculated for an observation stream fed into a reducer.
## Most of the fields are added by plugins.
type ResultVal: record {
## The time when the first observation was added to
## this result value.
begin: time;
## The time when the last observation was added to
## this result value.
end: time;
## The number of observations received.
num: count &default=0;
};
## Type to store results for multiple reducers.
type Result: table[string] of ResultVal;
## SumStats represent an aggregation of reducers along with
## mechanisms to handle various situations like the epoch ending
## or thresholds being crossed.
##
## It's best to not access any global state outside
## of the variables given to the callbacks because there
## is no assurance provided as to where the callbacks
## will be executed on clusters.
type SumStat: record {
## An arbitrary name for the sumstat so that it can
## be referred to later.
name: string;
## The interval at which this filter should be "broken"
## and the '$epoch_result' callback called. The
## results are also reset at this time so any threshold
## based detection needs to be set to a
## value that should be expected to happen within
## this epoch.
epoch: interval;
## The reducers for the SumStat
reducers: set[Reducer];
## Provide a function to calculate a value from the
## :bro:see:`SumStats::Result` structure which will be used
## for thresholding.
## This is required if a $threshold value is given.
threshold_val: function(key: SumStats::Key, result: SumStats::Result): double &optional;
## The threshold value for calling the
## $threshold_crossed callback.
threshold: double &optional;
## A series of thresholds for calling the
## $threshold_crossed callback.
threshold_series: vector of double &optional;
## A callback that is called when a threshold is crossed.
threshold_crossed: function(key: SumStats::Key, result: SumStats::Result) &optional;
## A callback that receives each of the results at the
## end of the analysis epoch. The function will be
## called once for each key.
epoch_result: function(ts: time, key: SumStats::Key, result: SumStats::Result) &optional;
## A callback that will be called when a single collection
## interval is completed. The ts value will be the time of
## when the collection started.
epoch_finished: function(ts:time) &optional;
};
## Create a summary statistic.
global create: function(ss: SumStats::SumStat);
## Add data into an observation stream. This should be
## called when a script has measured some point value.
##
## id: The observation stream identifier that the data
## point represents.
##
## key: The key that the value is related to.
##
## obs: The data point to send into the stream.
global observe: function(id: string, key: SumStats::Key, obs: SumStats::Observation);
## Dynamically request a sumstat key. This function should be
## used sparingly and not as a replacement for the callbacks
## from the :bro:see:`SumStat` record. The function is only
## available for use within "when" statements as an asynchronous
## function.
##
## ss_name: SumStat name.
##
## key: The SumStat key being requested.
##
## Returns: The result for the requested sumstat key.
global request_key: function(ss_name: string, key: Key): Result;
## This event is generated when thresholds are reset for a SumStat.
##
## name: SumStats name that thresholds were reset for.
global thresholds_reset: event(name: string);
## Helper function to represent a :bro:type:`SumStats::Key` value as
## a simple string.
##
## key: The metric key that is to be converted into a string.
##
## Returns: A string representation of the metric key.
global key2str: function(key: SumStats::Key): string;
}
# Type to store a table of sumstats results indexed by keys.
type ResultTable: table[Key] of Result;
# The function prototype for plugins to do calculations.
type ObserveFunc: function(r: Reducer, val: double, data: Observation, rv: ResultVal);
redef record Reducer += {
# Internal use only. Provides a reference back to the related SumStats by its name.
ssname: string &optional;
calc_funcs: vector of Calculation &optional;
};
# Internal use only. For tracking thresholds per sumstat and key.
# In the case of a single threshold, 0 means the threshold isn't crossed.
# In the case of a threshold series, the number tracks the threshold offset.
global threshold_tracker: table[string] of table[Key] of count;
function increment_threshold_tracker(ss_name: string, key: Key)
{
if ( ss_name !in threshold_tracker )
threshold_tracker[ss_name] = table();
if ( key !in threshold_tracker[ss_name] )
threshold_tracker[ss_name][key] = 0;
++threshold_tracker[ss_name][key];
}
function get_threshold_index(ss_name: string, key: Key): count
{
if ( ss_name !in threshold_tracker )
return 0;
if ( key !in threshold_tracker[ss_name] )
return 0;
return threshold_tracker[ss_name][key];
}
# Prototype the hook point for plugins to initialize any result values.
global init_resultval_hook: hook(r: Reducer, rv: ResultVal);
# Prototype the hook point for plugins to merge Results.
global compose_resultvals_hook: hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal);
# Store of sumstats indexed on the sumstat id.
global stats_store: table[string] of SumStat = table();
# Store of reducers indexed on the data point stream id.
global reducer_store: table[string] of set[Reducer] = table();
# Store of results indexed on the measurement id.
global result_store: table[string] of ResultTable = table();
# Store of threshold information.
global thresholds_store: table[string, Key] of bool = table();
# Store the calculations.
global calc_store: table[Calculation] of ObserveFunc = table();
# Store the dependencies for Calculations.
global calc_deps: table[Calculation] of vector of Calculation = table();
# Hook for registering observation calculation plugins.
global register_observe_plugins: hook();
# This is called whenever key values are updated and the new val is given as the
# `val` argument. It's only prototyped here because cluster and non-cluster have
# separate implementations.
global data_added: function(ss: SumStat, key: Key, result: Result);
# Event that is used to "finish" measurements and adapt the measurement
# framework for clustered or non-clustered usage.
global finish_epoch: event(ss: SumStat);
function key2str(key: Key): string
{
local out = "";
if ( key?$host )
out = fmt("%shost=%s", out, key$host);
if ( key?$str )
out = fmt("%s%sstr=%s", out, |out|==0 ? "" : ", ", key$str);
return fmt("sumstats_key(%s)", out);
}
function register_observe_plugin(calc: Calculation, func: ObserveFunc)
{
calc_store[calc] = func;
}
function add_observe_plugin_dependency(calc: Calculation, depends_on: Calculation)
{
if ( calc !in calc_deps )
calc_deps[calc] = vector();
calc_deps[calc][|calc_deps[calc]|] = depends_on;
}
event bro_init() &priority=100000
{
# Call all of the plugin registration hooks
hook register_observe_plugins();
}
function init_resultval(r: Reducer): ResultVal
{
local rv: ResultVal = [$begin=network_time(), $end=network_time()];
hook init_resultval_hook(r, rv);
return rv;
}
function compose_resultvals(rv1: ResultVal, rv2: ResultVal): ResultVal
{
local result: ResultVal;
result$begin = (rv1$begin < rv2$begin) ? rv1$begin : rv2$begin;
result$end = (rv1$end > rv2$end) ? rv1$end : rv2$end;
result$num = rv1$num + rv2$num;
# Run the plugin composition hooks.
hook compose_resultvals_hook(result, rv1, rv2);
return result;
}
function compose_results(r1: Result, r2: Result): Result
{
local result: Result = table();
for ( id in r1 )
{
result[id] = r1[id];
}
for ( id in r2 )
{
if ( id in r1 )
result[id] = compose_resultvals(r1[id], r2[id]);
else
result[id] = r2[id];
}
return result;
}
function reset(ss: SumStat)
{
if ( ss$name in result_store )
delete result_store[ss$name];
result_store[ss$name] = table();
if ( ss$name in threshold_tracker )
{
delete threshold_tracker[ss$name];
threshold_tracker[ss$name] = table();
event SumStats::thresholds_reset(ss$name);
}
}
# This could potentially recurse forever, but plugin authors
# should be making sure they aren't causing reflexive dependencies.
function add_calc_deps(calcs: vector of Calculation, c: Calculation)
{
#print fmt("Checking for deps for %s", c);
for ( i in calc_deps[c] )
{
local skip_calc=F;
for ( j in calcs )
{
if ( calcs[j] == calc_deps[c][i] )
skip_calc=T;
}
if ( ! skip_calc )
{
if ( calc_deps[c][i] in calc_deps )
add_calc_deps(calcs, calc_deps[c][i]);
calcs[|c|] = calc_deps[c][i];
#print fmt("add dep for %s [%s] ", c, calc_deps[c][i]);
}
}
}
function create(ss: SumStat)
{
if ( (ss?$threshold || ss?$threshold_series) && ! ss?$threshold_val )
{
Reporter::error("SumStats given a threshold with no $threshold_val function");
}
stats_store[ss$name] = ss;
if ( ss?$threshold || ss?$threshold_series )
threshold_tracker[ss$name] = table();
for ( reducer in ss$reducers )
{
reducer$ssname = ss$name;
reducer$calc_funcs = vector();
for ( calc in reducer$apply )
{
# Add in dependencies recursively.
if ( calc in calc_deps )
add_calc_deps(reducer$calc_funcs, calc);
# Don't add this calculation to the vector if
# it was already added by something else as a
# dependency.
local skip_calc=F;
for ( j in reducer$calc_funcs )
{
if ( calc == reducer$calc_funcs[j] )
skip_calc=T;
}
if ( ! skip_calc )
reducer$calc_funcs[|reducer$calc_funcs|] = calc;
}
if ( reducer$stream !in reducer_store )
reducer_store[reducer$stream] = set();
add reducer_store[reducer$stream][reducer];
}
reset(ss);
schedule ss$epoch { SumStats::finish_epoch(ss) };
}
function observe(id: string, key: Key, obs: Observation)
{
if ( id !in reducer_store )
return;
# Try to add the data to all of the defined reducers.
for ( r in reducer_store[id] )
{
if ( r?$normalize_key )
key = r$normalize_key(copy(key));
# If this reducer has a predicate, run the predicate
# and skip this key if the predicate return false.
if ( r?$pred && ! r$pred(key, obs) )
next;
local ss = stats_store[r$ssname];
# If there is a threshold and no epoch_result callback
# we don't need to continue counting since the data will
# never be accessed. This was leading
# to some state management issues when measuring
# uniqueness.
# NOTE: this optimization could need removed in the
# future if on demand access is provided to the
# SumStats results.
if ( ! ss?$epoch_result &&
r$ssname in threshold_tracker &&
( ss?$threshold &&
key in threshold_tracker[r$ssname] &&
threshold_tracker[r$ssname][key] != 0 ) ||
( ss?$threshold_series &&
key in threshold_tracker[r$ssname] &&
threshold_tracker[r$ssname][key] == |ss$threshold_series| ) )
{
next;
}
if ( r$ssname !in result_store )
result_store[r$ssname] = table();
local results = result_store[r$ssname];
if ( key !in results )
results[key] = table();
local result = results[key];
if ( id !in result )
result[id] = init_resultval(r);
local result_val = result[id];
++result_val$num;
# Continually update the $end field.
result_val$end=network_time();
# If a string was given, fall back to 1.0 as the value.
local val = 1.0;
if ( obs?$num )
val = obs$num;
else if ( obs?$dbl )
val = obs$dbl;
for ( i in r$calc_funcs )
calc_store[r$calc_funcs[i]](r, val, obs, result_val);
data_added(ss, key, result);
}
}
# This function checks if a threshold has been crossed. It is also used as a method to implement
# mid-break-interval threshold crossing detection for cluster deployments.
function check_thresholds(ss: SumStat, key: Key, result: Result, modify_pct: double): bool
{
if ( ! (ss?$threshold || ss?$threshold_series || ss?$threshold_crossed) )
return F;
# Add in the extra ResultVals to make threshold_vals easier to write.
# This length comparison should work because we just need to make
# sure that we have the same number of reducers and results.
if ( |ss$reducers| != |result| )
{
for ( reducer in ss$reducers )
{
if ( reducer$stream !in result )
result[reducer$stream] = init_resultval(reducer);
}
}
local watch = ss$threshold_val(key, result);
if ( modify_pct < 1.0 && modify_pct > 0.0 )
watch = watch/modify_pct;
local t_index = get_threshold_index(ss$name, key);
if ( ss?$threshold &&
t_index == 0 && # Check that the threshold hasn't already been crossed.
watch >= ss$threshold )
{
# Value crossed the threshold.
return T;
}
if ( ss?$threshold_series &&
|ss$threshold_series| > t_index && # Check if there are more thresholds.
watch >= ss$threshold_series[t_index] )
{
# A threshold series was given and the value crossed the next
# value in the series.
return T;
}
return F;
}
function threshold_crossed(ss: SumStat, key: Key, result: Result)
{
# If there is no callback, there is no point in any of this.
if ( ! ss?$threshold_crossed )
return;
increment_threshold_tracker(ss$name,key);
# Add in the extra ResultVals to make threshold_crossed callbacks easier to write.
if ( |ss$reducers| != |result| )
{
for ( reducer in ss$reducers )
{
if ( reducer$stream !in result )
result[reducer$stream] = init_resultval(reducer);
}
}
ss$threshold_crossed(key, result);
}

View file

@ -0,0 +1,56 @@
@load ./main
module SumStats;
event SumStats::finish_epoch(ss: SumStat)
{
if ( ss$name in result_store )
{
local now = network_time();
if ( ss?$epoch_result )
{
local data = result_store[ss$name];
# TODO: don't block here.
for ( key in data )
ss$epoch_result(now, key, data[key]);
}
if ( ss?$epoch_finished )
ss$epoch_finished(now);
reset(ss);
}
schedule ss$epoch { SumStats::finish_epoch(ss) };
}
function data_added(ss: SumStat, key: Key, result: Result)
{
if ( check_thresholds(ss, key, result, 1.0) )
threshold_crossed(ss, key, result);
}
function request(ss_name: string): ResultTable
{
# This only needs to be implemented this way for cluster compatibility.
return when ( T )
{
if ( ss_name in result_store )
return result_store[ss_name];
else
return table();
}
}
function request_key(ss_name: string, key: Key): Result
{
# This only needs to be implemented this way for cluster compatibility.
return when ( T )
{
if ( ss_name in result_store && key in result_store[ss_name] )
return result_store[ss_name][key];
else
return table();
}
}

View file

@ -0,0 +1,10 @@
@load ./average
@load ./last
@load ./max
@load ./min
@load ./sample
@load ./std-dev
@load ./sum
@load ./topk
@load ./unique
@load ./variance

View file

@ -0,0 +1,37 @@
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Calculate the average of the values.
AVERAGE
};
redef record ResultVal += {
## For numeric data, this calculates the average of all values.
average: double &optional;
};
}
hook register_observe_plugins()
{
register_observe_plugin(AVERAGE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$average )
rv$average = val;
else
rv$average += (val - rv$average) / rv$num;
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$average && rv2?$average )
result$average = ((rv1$average*rv1$num) + (rv2$average*rv2$num))/(rv1$num+rv2$num);
else if ( rv1?$average )
result$average = rv1$average;
else if ( rv2?$average )
result$average = rv2$average;
}

View file

@ -0,0 +1,59 @@
@load base/frameworks/sumstats
@load base/utils/queue
module SumStats;
export {
redef enum Calculation += {
## Keep last X observations in a queue
LAST
};
redef record Reducer += {
## number of elements to keep.
num_last_elements: count &default=0;
};
redef record ResultVal += {
## This is the queue where elements are maintained. Use the
## :bro:see:`SumStats::get_last` function to get a vector of
## the current element values.
last_elements: Queue::Queue &optional;
};
## Get a vector of element values from a ResultVal.
global get_last: function(rv: ResultVal): vector of Observation;
}
function get_last(rv: ResultVal): vector of Observation
{
local s: vector of Observation = vector();
if ( rv?$last_elements )
Queue::get_vector(rv$last_elements, s);
return s;
}
hook register_observe_plugins()
{
register_observe_plugin(LAST, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( r$num_last_elements > 0 )
{
if ( ! rv?$last_elements )
rv$last_elements = Queue::init([$max_len=r$num_last_elements]);
Queue::put(rv$last_elements, obs);
}
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
# Merge $samples
if ( rv1?$last_elements && rv2?$last_elements )
result$last_elements = Queue::merge(rv1$last_elements, rv2$last_elements);
else if ( rv1?$last_elements )
result$last_elements = rv1$last_elements;
else if ( rv2?$last_elements )
result$last_elements = rv2$last_elements;
}

View file

@ -0,0 +1,38 @@
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Find the maximum value.
MAX
};
redef record ResultVal += {
## For numeric data, this tracks the maximum value given.
max: double &optional;
};
}
hook register_observe_plugins()
{
register_observe_plugin(MAX, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$max )
rv$max = val;
else if ( val > rv$max )
rv$max = val;
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$max && rv2?$max )
result$max = (rv1$max > rv2$max) ? rv1$max : rv2$max;
else if ( rv1?$max )
result$max = rv1$max;
else if ( rv2?$max )
result$max = rv2$max;
}

View file

@ -0,0 +1,37 @@
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Find the minimum value.
MIN
};
redef record ResultVal += {
## For numeric data, this tracks the minimum value given.
min: double &optional;
};
}
hook register_observe_plugins()
{
register_observe_plugin(MIN, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$min )
rv$min = val;
else if ( val < rv$min )
rv$min = val;
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$min && rv2?$min )
result$min = (rv1$min < rv2$min) ? rv1$min : rv2$min;
else if ( rv1?$min )
result$min = rv1$min;
else if ( rv2?$min )
result$min = rv2$min;
}

View file

@ -0,0 +1,118 @@
@load base/frameworks/sumstats/main
module SumStats;
export {
redef enum Calculation += {
## Get uniquely distributed random samples from the observation stream.
SAMPLE
};
redef record Reducer += {
## A number of sample Observations to collect.
num_samples: count &default=0;
};
redef record ResultVal += {
## This is the vector in which the samples are maintained.
samples: vector of Observation &default=vector();
## Number of total observed elements.
sample_elements: count &default=0;
};
}
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# and just a copy of num_samples from the Reducer. Needed for availability
# in the compose hook.
num_samples: count &default=0;
};
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
if ( SAMPLE in r$apply )
rv$num_samples = r$num_samples;
}
function sample_add_sample(obs:Observation, rv: ResultVal)
{
++rv$sample_elements;
if ( |rv$samples| < rv$num_samples )
rv$samples[|rv$samples|] = obs;
else
{
local ra = rand(rv$sample_elements);
if ( ra < rv$num_samples )
rv$samples[ra] = obs;
}
}
hook register_observe_plugins()
{
register_observe_plugin(SAMPLE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
sample_add_sample(obs, rv);
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1$num_samples != rv2$num_samples )
{
Reporter::error("Merging sample sets with differing sizes is not supported");
return;
}
local num_samples = rv1$num_samples;
result$num_samples = num_samples;
if ( |rv1$samples| > num_samples || |rv2$samples| > num_samples )
{
Reporter::error("Sample vector with too many elements. Aborting.");
return;
}
if ( |rv1$samples| != num_samples && |rv2$samples| < num_samples )
{
if ( |rv1$samples| != rv1$sample_elements || |rv2$samples| < rv2$sample_elements )
{
Reporter::error("Mismatch in sample element size and tracking. Aborting merge");
return;
}
for ( i in rv1$samples )
sample_add_sample(rv1$samples[i], result);
for ( i in rv2$samples)
sample_add_sample(rv2$samples[i], result);
}
else
{
local other_vector: vector of Observation;
local othercount: count;
if ( rv1$sample_elements > rv2$sample_elements )
{
result$samples = copy(rv1$samples);
other_vector = rv2$samples;
othercount = rv2$sample_elements;
}
else
{
result$samples = copy(rv2$samples);
other_vector = rv1$samples;
othercount = rv1$sample_elements;
}
local totalcount = rv1$sample_elements + rv2$sample_elements;
result$sample_elements = totalcount;
for ( i in other_vector )
{
if ( rand(totalcount) <= othercount )
result$samples[i] = other_vector[i];
}
}
}

View file

@ -0,0 +1,41 @@
@load ./variance
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Find the standard deviation of the values.
STD_DEV
};
redef record ResultVal += {
## For numeric data, this calculates the standard deviation.
std_dev: double &default=0.0;
};
}
function calc_std_dev(rv: ResultVal)
{
if ( rv?$variance )
rv$std_dev = sqrt(rv$variance);
}
hook std_dev_hook(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
calc_std_dev(rv);
}
hook register_observe_plugins() &priority=-10
{
register_observe_plugin(STD_DEV, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
calc_std_dev(rv);
});
add_observe_plugin_dependency(STD_DEV, VARIANCE);
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-10
{
calc_std_dev(result);
}

View file

@ -0,0 +1,53 @@
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Sums the values given. For string values,
## this will be the number of strings given.
SUM
};
redef record ResultVal += {
## For numeric data, this tracks the sum of all values.
sum: double &default=0.0;
};
#type threshold_function: function(key: SumStats::Key, result: SumStats::Result): count;
#global sum_threshold: function(data_id: string): threshold_function;
}
#function sum_threshold(data_id: string): threshold_function
# {
# return function(key: SumStats::Key, result: SumStats::Result): count
# {
# print fmt("data_id: %s", data_id);
# print result;
# return double_to_count(result[data_id]$sum);
# };
# }
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
if ( SUM in r$apply && ! rv?$sum )
rv$sum = 0;
}
hook register_observe_plugins()
{
register_observe_plugin(SUM, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
rv$sum += val;
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$sum || rv2?$sum )
{
result$sum = rv1?$sum ? rv1$sum : 0;
if ( rv2?$sum )
result$sum += rv2$sum;
}
}

View file

@ -0,0 +1,52 @@
@load base/frameworks/sumstats
module SumStats;
export {
redef record Reducer += {
## number of elements to keep in the top-k list
topk_size: count &default=500;
};
redef enum Calculation += {
TOPK
};
redef record ResultVal += {
topk: opaque of topk &optional;
};
}
hook register_observe_plugins()
{
register_observe_plugin(TOPK, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
topk_add(rv$topk, obs);
});
}
hook init_resultval_hook(r: Reducer, rv: ResultVal)
{
if ( TOPK in r$apply && ! rv?$topk )
rv$topk = topk_init(r$topk_size);
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$topk )
{
result$topk = topk_init(topk_size(rv1$topk));
topk_merge(result$topk, rv1$topk);
if ( rv2?$topk )
topk_merge(result$topk, rv2$topk);
}
else if ( rv2?$topk )
{
result$topk = topk_init(topk_size(rv2$topk));
topk_merge(result$topk, rv2$topk);
}
}

View file

@ -0,0 +1,53 @@
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Calculate the number of unique values.
UNIQUE
};
redef record ResultVal += {
## If cardinality is being tracked, the number of unique
## items is tracked here.
unique: count &default=0;
};
}
redef record ResultVal += {
# Internal use only. This is not meant to be publically available
# because we don't want to trust that we can inspect the values
# since we will like move to a probalistic data structure in the future.
# TODO: in the future this will optionally be a hyperloglog structure
unique_vals: set[Observation] &optional;
};
hook register_observe_plugins()
{
register_observe_plugin(UNIQUE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( ! rv?$unique_vals )
rv$unique_vals=set();
add rv$unique_vals[obs];
rv$unique = |rv$unique_vals|;
});
}
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal)
{
if ( rv1?$unique_vals || rv2?$unique_vals )
{
if ( rv1?$unique_vals )
result$unique_vals = copy(rv1$unique_vals);
if ( rv2?$unique_vals )
if ( ! result?$unique_vals )
result$unique_vals = copy(rv2$unique_vals);
else
for ( val2 in rv2$unique_vals )
add result$unique_vals[copy(val2)];
result$unique = |result$unique_vals|;
}
}

View file

@ -0,0 +1,69 @@
@load ./average
@load ../main
module SumStats;
export {
redef enum Calculation += {
## Find the variance of the values.
VARIANCE
};
redef record ResultVal += {
## For numeric data, this calculates the variance.
variance: double &optional;
};
}
redef record ResultVal += {
# Internal use only. Used for incrementally calculating variance.
prev_avg: double &optional;
# Internal use only. For calculating incremental variance.
var_s: double &default=0.0;
};
function calc_variance(rv: ResultVal)
{
rv$variance = (rv$num > 1) ? rv$var_s/(rv$num-1) : 0.0;
}
hook register_observe_plugins() &priority=-5
{
register_observe_plugin(VARIANCE, function(r: Reducer, val: double, obs: Observation, rv: ResultVal)
{
if ( rv$num > 1 )
rv$var_s += ((val - rv$prev_avg) * (val - rv$average));
calc_variance(rv);
rv$prev_avg = rv$average;
});
add_observe_plugin_dependency(VARIANCE, AVERAGE);
}
# Reduced priority since this depends on the average
hook compose_resultvals_hook(result: ResultVal, rv1: ResultVal, rv2: ResultVal) &priority=-5
{
if ( rv1?$var_s && rv1?$average &&
rv2?$var_s && rv2?$average )
{
local rv1_avg_sq = (rv1$average - result$average);
rv1_avg_sq = rv1_avg_sq*rv1_avg_sq;
local rv2_avg_sq = (rv2$average - result$average);
rv2_avg_sq = rv2_avg_sq*rv2_avg_sq;
result$var_s = rv1$num*(rv1$var_s/rv1$num + rv1_avg_sq) + rv2$num*(rv2$var_s/rv2$num + rv2_avg_sq);
}
else if ( rv1?$var_s )
result$var_s = rv1$var_s;
else if ( rv2?$var_s )
result$var_s = rv2$var_s;
if ( rv1?$prev_avg && rv2?$prev_avg )
result$prev_avg = ((rv1$prev_avg*rv1$num) + (rv2$prev_avg*rv2$num))/(rv1$num+rv2$num);
else if ( rv1?$prev_avg )
result$prev_avg = rv1$prev_avg;
else if ( rv2?$prev_avg )
result$prev_avg = rv2$prev_avg;
calc_variance(result);
}

View file

@ -83,19 +83,17 @@ export {
} }
const ayiya_ports = { 5072/udp }; const ayiya_ports = { 5072/udp };
redef dpd_config += { [ANALYZER_AYIYA] = [$ports = ayiya_ports] };
const teredo_ports = { 3544/udp }; const teredo_ports = { 3544/udp };
redef dpd_config += { [ANALYZER_TEREDO] = [$ports = teredo_ports] };
const gtpv1_ports = { 2152/udp, 2123/udp }; const gtpv1_ports = { 2152/udp, 2123/udp };
redef dpd_config += { [ANALYZER_GTPV1] = [$ports = gtpv1_ports] };
redef likely_server_ports += { ayiya_ports, teredo_ports, gtpv1_ports }; redef likely_server_ports += { ayiya_ports, teredo_ports, gtpv1_ports };
event bro_init() &priority=5 event bro_init() &priority=5
{ {
Log::create_stream(Tunnel::LOG, [$columns=Info]); Log::create_stream(Tunnel::LOG, [$columns=Info]);
Analyzer::register_for_ports(Analyzer::ANALYZER_AYIYA, ayiya_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_TEREDO, teredo_ports);
Analyzer::register_for_ports(Analyzer::ANALYZER_GTPV1, gtpv1_ports);
} }
function register_all(ecv: EncapsulatingConnVector) function register_all(ecv: EncapsulatingConnVector)

View file

@ -1,5 +1,5 @@
@load base/const.bif @load base/bif/const.bif.bro
@load base/types.bif @load base/bif/types.bif
# Type declarations # Type declarations
@ -222,17 +222,6 @@ type endpoint_stats: record {
endian_type: count; endian_type: count;
}; };
## A unique analyzer instance ID. Each time instantiates a protocol analyzers
## for a connection, it assigns it a unique ID that can be used to reference
## that instance.
##
## .. bro:see:: analyzer_name disable_analyzer protocol_confirmation
## protocol_violation
##
## .. todo::While we declare an alias for the type here, the events/functions still
## use ``count``. That should be changed.
type AnalyzerID: count;
module Tunnel; module Tunnel;
export { export {
## Records the identity of an encapsulating parent of a tunneled connection. ## Records the identity of an encapsulating parent of a tunneled connection.
@ -300,7 +289,7 @@ type connection: record {
## one protocol analyzer is able to parse the same data. If so, all will ## one protocol analyzer is able to parse the same data. If so, all will
## be recorded. Also note that the recorced services are independent of any ## be recorded. Also note that the recorced services are independent of any
## transport-level protocols. ## transport-level protocols.
service: set[string]; service: set[string];
addl: string; ##< Deprecated. addl: string; ##< Deprecated.
hot: count; ##< Deprecated. hot: count; ##< Deprecated.
history: string; ##< State history of connections. See *history* in :bro:see:`Conn::Info`. history: string; ##< State history of connections. See *history* in :bro:see:`Conn::Info`.
@ -316,6 +305,73 @@ type connection: record {
tunnel: EncapsulatingConnVector &optional; tunnel: EncapsulatingConnVector &optional;
}; };
## Default amount of time a file can be inactive before the file analysis
## gives up and discards any internal state related to the file.
const default_file_timeout_interval: interval = 2 mins &redef;
## Default amount of bytes that file analysis will buffer before raising
## :bro:see:`file_new`.
const default_file_bof_buffer_size: count = 1024 &redef;
## A file that Bro is analyzing. This is Bro's type for describing the basic
## internal metadata collected about a "file", which is essentially just a
## byte stream that is e.g. pulled from a network connection or possibly
## some other input source.
type fa_file: record {
## An identifier associated with a single file.
id: string;
## Identifier associated with a container file from which this one was
## extracted as part of the file analysis.
parent_id: string &optional;
## An identification of the source of the file data. E.g. it may be
## a network protocol over which it was transferred, or a local file
## path which was read, or some other input source.
source: string;
## If the source of this file is is a network connection, this field
## may be set to indicate the directionality.
is_orig: bool &optional;
## The set of connections over which the file was transferred.
conns: table[conn_id] of connection &optional;
## The time at which the last activity for the file was seen.
last_active: time;
## Number of bytes provided to the file analysis engine for the file.
seen_bytes: count &default=0;
## Total number of bytes that are supposed to comprise the full file.
total_bytes: count &optional;
## The number of bytes in the file stream that were completely missed
## during the process of analysis e.g. due to dropped packets.
missing_bytes: count &default=0;
## The number of not all-in-sequence bytes in the file stream that
## were delivered to file analyzers due to reassembly buffer overflow.
overflow_bytes: count &default=0;
## The amount of time between receiving new data for this file that
## the analysis engine will wait before giving up on it.
timeout_interval: interval &default=default_file_timeout_interval;
## The number of bytes at the beginning of a file to save for later
## inspection in *bof_buffer* field.
bof_buffer_size: count &default=default_file_bof_buffer_size;
## The content of the beginning of a file up to *bof_buffer_size* bytes.
## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or
## in the cases where no buffering of the beginning of file occurs,
## an initial guess of the mime type based on the first data seen.
mime_type: string &optional;
} &redef;
## Fields of a SYN packet. ## Fields of a SYN packet.
## ##
## .. bro:see:: connection_SYN_packet ## .. bro:see:: connection_SYN_packet
@ -475,22 +531,19 @@ type record_field_table: table[string] of record_field;
# dependent on the names remaining as they are now. # dependent on the names remaining as they are now.
## Set of BPF capture filters to use for capturing, indexed by a user-definable ## Set of BPF capture filters to use for capturing, indexed by a user-definable
## ID (which must be unique). If Bro is *not* configured to examine ## ID (which must be unique). If Bro is *not* configured with
## :bro:id:`PacketFilter::all_packets`, all packets matching at least ## :bro:id:`PacketFilter::enable_auto_protocol_capture_filters`,
## one of the filters in this table (and all in :bro:id:`restrict_filters`) ## all packets matching at least one of the filters in this table (and all in
## will be analyzed. ## :bro:id:`restrict_filters`) will be analyzed.
## ##
## .. bro:see:: PacketFilter PacketFilter::all_packets ## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters
## PacketFilter::unrestricted_filter restrict_filters ## PacketFilter::unrestricted_filter restrict_filters
global capture_filters: table[string] of string &redef; global capture_filters: table[string] of string &redef;
## Set of BPF filters to restrict capturing, indexed by a user-definable ID (which ## Set of BPF filters to restrict capturing, indexed by a user-definable ID (which
## must be unique). If Bro is *not* configured to examine ## must be unique).
## :bro:id:`PacketFilter::all_packets`, only packets matching *all* of the
## filters in this table (and any in :bro:id:`capture_filters`) will be
## analyzed.
## ##
## .. bro:see:: PacketFilter PacketFilter::all_packets ## .. bro:see:: PacketFilter PacketFilter::enable_auto_protocol_capture_filters
## PacketFilter::unrestricted_filter capture_filters ## PacketFilter::unrestricted_filter capture_filters
global restrict_filters: table[string] of string &redef; global restrict_filters: table[string] of string &redef;
@ -646,9 +699,10 @@ type entropy_test_result: record {
}; };
# Prototypes of Bro built-in functions. # Prototypes of Bro built-in functions.
@load base/strings.bif @load base/bif/strings.bif
@load base/bro.bif @load base/bif/bro.bif
@load base/reporter.bif @load base/bif/reporter.bif
@load base/bif/bloom-filter.bif
## Deprecated. This is superseded by the new logging framework. ## Deprecated. This is superseded by the new logging framework.
global log_file_name: function(tag: string): string &redef; global log_file_name: function(tag: string): string &redef;
@ -710,19 +764,6 @@ global signature_files = "" &add_func = add_signature_file;
## ``p0f`` fingerprint file to use. Will be searched relative to ``BROPATH``. ## ``p0f`` fingerprint file to use. Will be searched relative to ``BROPATH``.
const passive_fingerprint_file = "base/misc/p0f.fp" &redef; const passive_fingerprint_file = "base/misc/p0f.fp" &redef;
# todo::testing to see if I can remove these without causing problems.
#const ftp = 21/tcp;
#const ssh = 22/tcp;
#const telnet = 23/tcp;
#const smtp = 25/tcp;
#const domain = 53/tcp; # note, doesn't include UDP version
#const gopher = 70/tcp;
#const finger = 79/tcp;
#const http = 80/tcp;
#const ident = 113/tcp;
#const bgp = 179/tcp;
#const rlogin = 513/tcp;
# TCP values for :bro:see:`endpoint` *state* field. # TCP values for :bro:see:`endpoint` *state* field.
# todo::these should go into an enum to make them autodoc'able. # todo::these should go into an enum to make them autodoc'able.
const TCP_INACTIVE = 0; ##< Endpoint is still inactive. const TCP_INACTIVE = 0; ##< Endpoint is still inactive.
@ -2656,7 +2697,7 @@ export {
} }
module GLOBAL; module GLOBAL;
@load base/event.bif @load base/bif/event.bif
## BPF filter the user has set via the -f command line options. Empty if none. ## BPF filter the user has set via the -f command line options. Empty if none.
const cmd_line_bpf_filter = "" &redef; const cmd_line_bpf_filter = "" &redef;
@ -2846,34 +2887,11 @@ const remote_trace_sync_peers = 0 &redef;
## consistency check. ## consistency check.
const remote_check_sync_consistency = F &redef; const remote_check_sync_consistency = F &redef;
## Analyzer tags. The core automatically defines constants
## ``ANALYZER_<analyzer-name>*``, e.g., ``ANALYZER_HTTP``.
##
## .. bro:see:: dpd_config
##
## .. todo::We should autodoc these automaticallty generated constants.
type AnalyzerTag: count;
## Set of ports activating a particular protocol analysis.
##
## .. bro:see:: dpd_config
type dpd_protocol_config: record {
ports: set[port] &optional; ##< Set of ports.
};
## Port configuration for Bro's "dynamic protocol detection". Protocol
## analyzers can be activated via either well-known ports or content analysis.
## This table defines the ports.
##
## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
## dpd_match_only_beginning dpd_ignore_ports
const dpd_config: table[AnalyzerTag] of dpd_protocol_config = {} &redef;
## Reassemble the beginning of all TCP connections before doing ## Reassemble the beginning of all TCP connections before doing
## signature-matching. Enabling this provides more accurate matching at the ## signature-matching. Enabling this provides more accurate matching at the
## expensive of CPU cycles. ## expensive of CPU cycles.
## ##
## .. bro:see:: dpd_config dpd_buffer_size ## .. bro:see:: dpd_buffer_size
## dpd_match_only_beginning dpd_ignore_ports ## dpd_match_only_beginning dpd_ignore_ports
## ##
## .. note:: Despite the name, this option affects *all* signature matching, not ## .. note:: Despite the name, this option affects *all* signature matching, not
@ -2888,24 +2906,24 @@ const dpd_reassemble_first_packets = T &redef;
## activated afterwards. Then only analyzers that can deal with partial ## activated afterwards. Then only analyzers that can deal with partial
## connections will be able to analyze the session. ## connections will be able to analyze the session.
## ##
## .. bro:see:: dpd_reassemble_first_packets dpd_config dpd_match_only_beginning ## .. bro:see:: dpd_reassemble_first_packets dpd_match_only_beginning
## dpd_ignore_ports ## dpd_ignore_ports
const dpd_buffer_size = 1024 &redef; const dpd_buffer_size = 1024 &redef;
## If true, stops signature matching if dpd_buffer_size has been reached. ## If true, stops signature matching if dpd_buffer_size has been reached.
## ##
## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size ## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
## dpd_config dpd_ignore_ports ## dpd_ignore_ports
## ##
## .. note:: Despite the name, this option affects *all* signature matching, not ## .. note:: Despite the name, this option affects *all* signature matching, not
## only signatures used for dynamic protocol detection. ## only signatures used for dynamic protocol detection.
const dpd_match_only_beginning = T &redef; const dpd_match_only_beginning = T &redef;
## If true, don't consider any ports for deciding which protocol analyzer to ## If true, don't consider any ports for deciding which protocol analyzer to
## use. If so, the value of :bro:see:`dpd_config` is ignored. ## use.
## ##
## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size ## .. bro:see:: dpd_reassemble_first_packets dpd_buffer_size
## dpd_match_only_beginning dpd_config ## dpd_match_only_beginning
const dpd_ignore_ports = F &redef; const dpd_ignore_ports = F &redef;
## Ports which the core considers being likely used by servers. For ports in ## Ports which the core considers being likely used by servers. For ports in
@ -2913,13 +2931,6 @@ const dpd_ignore_ports = F &redef;
## connection if it misses the initial handshake. ## connection if it misses the initial handshake.
const likely_server_ports: set[port] &redef; const likely_server_ports: set[port] &redef;
## Deprated. Set of all ports for which we know an analyzer, built by
## :doc:`/scripts/base/frameworks/dpd/main`.
##
## .. todo::This should be defined by :doc:`/scripts/base/frameworks/dpd/main`
## itself we still need it.
global dpd_analyzer_ports: table[port] of set[AnalyzerTag];
## Per-incident timer managers are drained after this amount of inactivity. ## Per-incident timer managers are drained after this amount of inactivity.
const timer_mgr_inactivity_timeout = 1 min &redef; const timer_mgr_inactivity_timeout = 1 min &redef;
@ -3028,9 +3039,19 @@ module GLOBAL;
## Number of bytes per packet to capture from live interfaces. ## Number of bytes per packet to capture from live interfaces.
const snaplen = 8192 &redef; const snaplen = 8192 &redef;
# Load the logging framework here because it uses fairly deep integration with ## Seed for hashes computed internally for probabilistic data structures. Using
## the same value here will make the hashes compatible between independent Bro
## instances. If left unset, Bro will use a temporary local seed.
const global_hash_seed: string = "" &redef;
# Load BiFs defined by plugins.
@load base/bif/plugins
# Load these frameworks here because they use fairly deep integration with
# BiFs and script-land defined types. # BiFs and script-land defined types.
@load base/frameworks/logging @load base/frameworks/logging
@load base/frameworks/input @load base/frameworks/input
@load base/frameworks/analyzer
@load base/frameworks/files
@load base/bif

View file

@ -5,21 +5,27 @@
##! you actually want. ##! you actually want.
@load base/utils/site @load base/utils/site
@load base/utils/active-http
@load base/utils/addrs @load base/utils/addrs
@load base/utils/conn-ids @load base/utils/conn-ids
@load base/utils/dir
@load base/utils/directions-and-hosts @load base/utils/directions-and-hosts
@load base/utils/exec
@load base/utils/files @load base/utils/files
@load base/utils/numbers @load base/utils/numbers
@load base/utils/paths @load base/utils/paths
@load base/utils/patterns @load base/utils/patterns
@load base/utils/queue
@load base/utils/strings @load base/utils/strings
@load base/utils/thresholds @load base/utils/thresholds
@load base/utils/time
@load base/utils/urls @load base/utils/urls
# This has some deep interplay between types and BiFs so it's # This has some deep interplay between types and BiFs so it's
# loaded in base/init-bare.bro # loaded in base/init-bare.bro
#@load base/frameworks/logging #@load base/frameworks/logging
@load base/frameworks/notice @load base/frameworks/notice
@load base/frameworks/analyzer
@load base/frameworks/dpd @load base/frameworks/dpd
@load base/frameworks/signatures @load base/frameworks/signatures
@load base/frameworks/packet-filter @load base/frameworks/packet-filter
@ -27,21 +33,28 @@
@load base/frameworks/communication @load base/frameworks/communication
@load base/frameworks/control @load base/frameworks/control
@load base/frameworks/cluster @load base/frameworks/cluster
@load base/frameworks/metrics
@load base/frameworks/intel @load base/frameworks/intel
@load base/frameworks/reporter @load base/frameworks/reporter
@load base/frameworks/sumstats
@load base/frameworks/tunnels @load base/frameworks/tunnels
@load base/protocols/conn @load base/protocols/conn
@load base/protocols/dhcp
@load base/protocols/dnp3
@load base/protocols/dns @load base/protocols/dns
@load base/protocols/ftp @load base/protocols/ftp
@load base/protocols/http @load base/protocols/http
@load base/protocols/irc @load base/protocols/irc
@load base/protocols/modbus @load base/protocols/modbus
@load base/protocols/pop3
@load base/protocols/smtp @load base/protocols/smtp
@load base/protocols/socks @load base/protocols/socks
@load base/protocols/ssh @load base/protocols/ssh
@load base/protocols/ssl @load base/protocols/ssl
@load base/protocols/syslog @load base/protocols/syslog
@load base/protocols/tunnels
@load base/files/hash
@load base/files/extract
@load base/misc/find-checksum-offloading @load base/misc/find-checksum-offloading

View file

@ -6,9 +6,9 @@ module Conn;
export { export {
## Define inactivity timeouts by the service detected being used over ## Define inactivity timeouts by the service detected being used over
## the connection. ## the connection.
const analyzer_inactivity_timeouts: table[AnalyzerTag] of interval = { const analyzer_inactivity_timeouts: table[Analyzer::Tag] of interval = {
# For interactive services, allow longer periods of inactivity. # For interactive services, allow longer periods of inactivity.
[[ANALYZER_SSH, ANALYZER_FTP]] = 1 hrs, [[Analyzer::ANALYZER_SSH, Analyzer::ANALYZER_FTP]] = 1 hrs,
} &redef; } &redef;
## Define inactivity timeouts based on common protocol ports. ## Define inactivity timeouts based on common protocol ports.
@ -18,7 +18,7 @@ export {
} }
event protocol_confirmation(c: connection, atype: count, aid: count) event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count)
{ {
if ( atype in analyzer_inactivity_timeouts ) if ( atype in analyzer_inactivity_timeouts )
set_inactivity_timeout(c$id, analyzer_inactivity_timeouts[atype]); set_inactivity_timeout(c$id, analyzer_inactivity_timeouts[atype]);

View file

@ -0,0 +1,4 @@
@load ./consts
@load ./main
@load-sigs ./dpd.sig

View file

@ -0,0 +1,20 @@
##! Types, errors, and fields for analyzing DHCP data. A helper file
##! for DHCP analysis scripts.
module DHCP;
export {
## Types of DHCP messages. See RFC 1533.
const message_types = {
[1] = "DHCP_DISCOVER",
[2] = "DHCP_OFFER",
[3] = "DHCP_REQUEST",
[4] = "DHCP_DECLINE",
[5] = "DHCP_ACK",
[6] = "DHCP_NAK",
[7] = "DHCP_RELEASE",
[8] = "DHCP_INFORM",
} &default = function(n: count): string { return fmt("unknown-message-type-%d", n); };
}

View file

@ -0,0 +1,5 @@
signature dhcp_cookie {
ip-proto == udp
payload /^.*\x63\x82\x53\x63/
enable "dhcp"
}

View file

@ -0,0 +1,75 @@
##! Analyzes DHCP traffic in order to log DHCP leases given to clients.
##! This script ignores large swaths of the protocol, since it is rather
##! noisy on most networks, and focuses on the end-result: assigned leases.
##!
##! If you'd like to track known DHCP devices and to log the hostname
##! supplied by the client, see policy/protocols/dhcp/known-devices.bro
@load ./utils.bro
module DHCP;
export {
redef enum Log::ID += { LOG };
## The record type which contains the column fields of the DHCP log.
type Info: record {
## The earliest time at which a DHCP message over the
## associated connection is observed.
ts: time &log;
## A unique identifier of the connection over which DHCP is
## occuring.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## Client's hardware address.
mac: string &log &optional;
## Client's actual assigned IP address.
assigned_ip: addr &log &optional;
## IP address lease interval.
lease_time: interval &log &optional;
## A random number choosen by the client for this transaction.
trans_id: count &log;
};
## Event that can be handled to access the DHCP
## record as it is sent on to the logging framework.
global log_dhcp: event(rec: Info);
}
# Add the dhcp info to the connection record
redef record connection += {
dhcp: Info &optional;
};
# 67/udp is the server's port, 68/udp the client.
const ports = { 67/udp, 68/udp };
redef likely_server_ports += { 67/udp };
event bro_init()
{
Log::create_stream(DHCP::LOG, [$columns=Info, $ev=log_dhcp]);
Analyzer::register_for_ports(Analyzer::ANALYZER_DHCP, ports);
}
event dhcp_ack(c: connection, msg: dhcp_msg, mask: addr, router: dhcp_router_list, lease: interval, serv_addr: addr, host_name: string)
{
local info: Info;
info$ts = network_time();
info$id = c$id;
info$uid = c$uid;
info$lease_time = lease;
info$trans_id = msg$xid;
if ( msg$h_addr != "" )
info$mac = msg$h_addr;
if ( reverse_ip(msg$yiaddr) != 0.0.0.0 )
info$assigned_ip = reverse_ip(msg$yiaddr);
else
info$assigned_ip = c$id$orig_h;
c$dhcp = info;
Log::write(DHCP::LOG, c$dhcp);
}

View file

@ -0,0 +1,21 @@
##! Utilities specific for DHCP processing.
@load ./main
module DHCP;
export {
## Reverse the octets of an IPv4 IP.
##
## ip: An :bro:type:`addr` IPv4 address.
##
## Returns: A reversed addr.
global reverse_ip: function(ip: addr): addr;
}
function reverse_ip(ip: addr): addr
{
local octets = split(cat(ip), /\./);
return to_addr(cat(octets[4], ".", octets[3], ".", octets[2], ".", octets[1]));
}

View file

@ -0,0 +1,3 @@
@load ./main
@load-sigs ./dpd.sig

View file

@ -0,0 +1,49 @@
module DNP3;
export {
## Standard defined Modbus function codes.
const function_codes = {
# Requests.
[0x00] = "CONFIRM",
[0x01] = "READ",
[0x02] = "WRITE",
[0x03] = "SELECT",
[0x04] = "OPERATE",
[0x05] = "DIRECT_OPERATE",
[0x06] = "DIRECT_OPERATE_NR",
[0x07] = "IMMED_FREEZE",
[0x08] = "IMMED_FREEZE_NR",
[0x09] = "FREEZE_CLEAR",
[0x0a] = "FREEZE_CLEAR_NR",
[0x0b] = "FREEZE_AT_TIME",
[0x0c] = "FREEZE_AT_TIME_NR",
[0x0d] = "COLD_RESTART",
[0x0e] = "WARM_RESTART",
[0x0f] = "INITIALIZE_DATA",
[0x10] = "INITIALIZE_APPL",
[0x11] = "START_APPL",
[0x12] = "STOP_APPL",
[0x13] = "SAVE_CONFIG",
[0x14] = "ENABLE_UNSOLICITED",
[0x15] = "DISABLE_UNSOLICITED",
[0x16] = "ASSIGN_CLASS",
[0x17] = "DELAY_MEASURE",
[0x18] = "RECORD_CURRENT_TIME",
[0x19] = "OPEN_FILE",
[0x1a] = "CLOSE_FILE",
[0x1b] = "DELETE_FILE",
[0x1c] = "GET_FILE_INFO",
[0x1d] = "AUTHENTICATE_FILE",
[0x1e] = "ABORT_FILE",
[0x1f] = "ACTIVATE_CONFIG",
[0x20] = "AUTHENTICATE_REQ",
[0x21] = "AUTHENTICATE_ERR",
# Responses.
[0x81] = "RESPONSE",
[0x82] = "UNSOLICITED_RESPONSE",
[0x83] = "AUTHENTICATE_RESP",
} &default=function(i: count):string { return fmt("unknown-%d", i); } &redef;
}

View file

@ -0,0 +1,9 @@
# DNP3 packets always starts with 0x05 0x64 .
signature dpd_dnp3_server {
ip-proto == tcp
payload /\x05\x64/
tcp-state responder
enable "dnp3"
}

View file

@ -0,0 +1,73 @@
##! A very basic DNP3 analysis script that just logs requests and replies.
module DNP3;
@load ./consts
export {
redef enum Log::ID += { LOG };
type Info: record {
## Time of the request.
ts: time &log;
## Unique identifier for the connnection.
uid: string &log;
## Identifier for the connection.
id: conn_id &log;
## The name of the function message in the request.
fc_request: string &log &optional;
## The name of the function message in the reply.
fc_reply: string &log &optional;
## The response's "internal indication number".
iin: count &log &optional;
};
## Event that can be handled to access the DNP3 record as it is sent on
## to the logging framework.
global log_dnp3: event(rec: Info);
}
redef record connection += {
dnp3: Info &optional;
};
const ports = { 20000/tcp };
redef likely_server_ports += { ports };
event bro_init() &priority=5
{
Log::create_stream(DNP3::LOG, [$columns=Info, $ev=log_dnp3]);
Analyzer::register_for_ports(Analyzer::ANALYZER_DNP3, ports);
}
event dnp3_application_request_header(c: connection, is_orig: bool, fc: count)
{
if ( ! c?$dnp3 )
c$dnp3 = [$ts=network_time(), $uid=c$uid, $id=c$id];
c$dnp3$ts = network_time();
c$dnp3$fc_request = function_codes[fc];
}
event dnp3_application_response_header(c: connection, is_orig: bool, fc: count, iin: count)
{
if ( ! c?$dnp3 )
c$dnp3 = [$ts=network_time(), $uid=c$uid, $id=c$id];
c$dnp3$ts = network_time();
c$dnp3$fc_reply = function_codes[fc];
c$dnp3$iin = iin;
Log::write(LOG, c$dnp3);
delete c$dnp3;
}
event connection_state_remove(c: connection) &priority=-5
{
if ( ! c?$dnp3 )
return;
Log::write(LOG, c$dnp3);
delete c$dnp3;
}

View file

@ -1,6 +1,7 @@
##! Base DNS analysis script which tracks and logs DNS queries along with ##! Base DNS analysis script which tracks and logs DNS queries along with
##! their responses. ##! their responses.
@load base/utils/queue
@load ./consts @load ./consts
module DNS; module DNS;
@ -73,19 +74,6 @@ export {
total_replies: count &optional; total_replies: count &optional;
}; };
## A record type which tracks the status of DNS queries for a given
## :bro:type:`connection`.
type State: record {
## Indexed by query id, returns Info record corresponding to
## query/response which haven't completed yet.
pending: table[count] of Info &optional;
## This is the list of DNS responses that have completed based on the
## number of responses declared and the number received. The contents
## of the set are transaction IDs.
finished_answers: set[count] &optional;
};
## An event that can be handled to access the :bro:type:`DNS::Info` ## An event that can be handled to access the :bro:type:`DNS::Info`
## record as it is sent to the logging framework. ## record as it is sent to the logging framework.
global log_dns: event(rec: Info); global log_dns: event(rec: Info);
@ -102,46 +90,49 @@ export {
## ##
## reply: The specific response information according to RR type/class. ## reply: The specific response information according to RR type/class.
global do_reply: event(c: connection, msg: dns_msg, ans: dns_answer, reply: string); global do_reply: event(c: connection, msg: dns_msg, ans: dns_answer, reply: string);
## A hook that is called whenever a session is being set.
## This can be used if additional initialization logic needs to happen
## when creating a new session value.
##
## c: The connection involved in the new session
##
## msg: The DNS message header information.
##
## is_query: Indicator for if this is being called for a query or a response.
global set_session: hook(c: connection, msg: dns_msg, is_query: bool);
## A record type which tracks the status of DNS queries for a given
## :bro:type:`connection`.
type State: record {
## Indexed by query id, returns Info record corresponding to
## query/response which haven't completed yet.
pending: table[count] of Queue::Queue;
## This is the list of DNS responses that have completed based on the
## number of responses declared and the number received. The contents
## of the set are transaction IDs.
finished_answers: set[count];
};
} }
redef record connection += { redef record connection += {
dns: Info &optional; dns: Info &optional;
dns_state: State &optional; dns_state: State &optional;
}; };
# DPD configuration. const ports = { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
redef capture_filters += { redef likely_server_ports += { ports };
["dns"] = "port 53",
["mdns"] = "udp and port 5353",
["llmns"] = "udp and port 5355",
["netbios-ns"] = "udp port 137",
};
const dns_ports = { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
redef dpd_config += { [ANALYZER_DNS] = [$ports = dns_ports] };
const dns_udp_ports = { 53/udp, 137/udp, 5353/udp, 5355/udp };
const dns_tcp_ports = { 53/tcp };
redef dpd_config += { [ANALYZER_DNS_UDP_BINPAC] = [$ports = dns_udp_ports] };
redef dpd_config += { [ANALYZER_DNS_TCP_BINPAC] = [$ports = dns_tcp_ports] };
redef likely_server_ports += { 53/udp, 53/tcp, 137/udp, 5353/udp, 5355/udp };
event bro_init() &priority=5 event bro_init() &priority=5
{ {
Log::create_stream(DNS::LOG, [$columns=Info, $ev=log_dns]); Log::create_stream(DNS::LOG, [$columns=Info, $ev=log_dns]);
Analyzer::register_for_ports(Analyzer::ANALYZER_DNS, ports);
} }
function new_session(c: connection, trans_id: count): Info function new_session(c: connection, trans_id: count): Info
{ {
if ( ! c?$dns_state )
{
local state: State;
state$pending=table();
state$finished_answers=set();
c$dns_state = state;
}
local info: Info; local info: Info;
info$ts = network_time(); info$ts = network_time();
info$id = c$id; info$id = c$id;
@ -151,18 +142,37 @@ function new_session(c: connection, trans_id: count): Info
return info; return info;
} }
function set_session(c: connection, msg: dns_msg, is_query: bool) hook set_session(c: connection, msg: dns_msg, is_query: bool) &priority=5
{ {
if ( ! c?$dns_state || msg$id !in c$dns_state$pending ) if ( ! c?$dns_state )
{ {
c$dns_state$pending[msg$id] = new_session(c, msg$id); local state: State;
# Try deleting this transaction id from the set of finished answers. c$dns_state = state;
# Sometimes hosts will reuse ports and transaction ids and this should
# be considered to be a legit scenario (although bad practice).
delete c$dns_state$finished_answers[msg$id];
} }
c$dns = c$dns_state$pending[msg$id]; if ( msg$id !in c$dns_state$pending )
c$dns_state$pending[msg$id] = Queue::init();
local info: Info;
# If this is either a query or this is the reply but
# no Info records are in the queue (we missed the query?)
# we need to create an Info record and put it in the queue.
if ( is_query ||
Queue::len(c$dns_state$pending[msg$id]) == 0 )
{
info = new_session(c, msg$id);
Queue::put(c$dns_state$pending[msg$id], info);
}
if ( is_query )
# If this is a query, assign the newly created info variable
# so that the world looks correct to anything else handling
# this query.
c$dns = info;
else
# Peek at the next item in the queue for this trans_id and
# assign it to c$dns since this is a response.
c$dns = Queue::peek(c$dns_state$pending[msg$id]);
if ( ! is_query ) if ( ! is_query )
{ {
@ -190,19 +200,21 @@ function set_session(c: connection, msg: dns_msg, is_query: bool)
event dns_message(c: connection, is_orig: bool, msg: dns_msg, len: count) &priority=5 event dns_message(c: connection, is_orig: bool, msg: dns_msg, len: count) &priority=5
{ {
set_session(c, msg, is_orig); hook set_session(c, msg, is_orig);
} }
event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) &priority=5 event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string) &priority=5
{ {
if ( ans$answer_type == DNS_ANS ) if ( ans$answer_type == DNS_ANS )
{ {
if ( ! c?$dns )
{
event conn_weird("dns_unmatched_reply", c, "");
hook set_session(c, msg, F);
}
c$dns$AA = msg$AA; c$dns$AA = msg$AA;
c$dns$RA = msg$RA; c$dns$RA = msg$RA;
if ( msg$id in c$dns_state$finished_answers )
event conn_weird("dns_reply_seen_after_done", c, "");
if ( reply != "" ) if ( reply != "" )
{ {
if ( ! c$dns?$answers ) if ( ! c$dns?$answers )
@ -217,7 +229,6 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
if ( c$dns?$answers && c$dns?$total_answers && if ( c$dns?$answers && c$dns?$total_answers &&
|c$dns$answers| == c$dns$total_answers ) |c$dns$answers| == c$dns$total_answers )
{ {
add c$dns_state$finished_answers[c$dns$trans_id];
# Indicate this request/reply pair is ready to be logged. # Indicate this request/reply pair is ready to be logged.
c$dns$ready = T; c$dns$ready = T;
} }
@ -230,7 +241,7 @@ event DNS::do_reply(c: connection, msg: dns_msg, ans: dns_answer, reply: string)
{ {
Log::write(DNS::LOG, c$dns); Log::write(DNS::LOG, c$dns);
# This record is logged and no longer pending. # This record is logged and no longer pending.
delete c$dns_state$pending[c$dns$trans_id]; Queue::get(c$dns_state$pending[c$dns$trans_id]);
delete c$dns; delete c$dns;
} }
} }
@ -243,15 +254,14 @@ event dns_request(c: connection, msg: dns_msg, query: string, qtype: count, qcla
c$dns$qclass_name = classes[qclass]; c$dns$qclass_name = classes[qclass];
c$dns$qtype = qtype; c$dns$qtype = qtype;
c$dns$qtype_name = query_types[qtype]; c$dns$qtype_name = query_types[qtype];
c$dns$Z = msg$Z;
# Decode netbios name queries # Decode netbios name queries
# Note: I'm ignoring the name type for now. Not sure if this should be # Note: I'm ignoring the name type for now. Not sure if this should be
# worked into the query/response in some fashion. # worked into the query/response in some fashion.
if ( c$id$resp_p == 137/udp ) if ( c$id$resp_p == 137/udp )
query = decode_netbios_name(query); query = decode_netbios_name(query);
c$dns$query = query; c$dns$query = query;
c$dns$Z = msg$Z;
} }
event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priority=5 event dns_A_reply(c: connection, msg: dns_msg, ans: dns_answer, a: addr) &priority=5
@ -339,6 +349,13 @@ event connection_state_remove(c: connection) &priority=-5
# If Bro is expiring state, we should go ahead and log all unlogged # If Bro is expiring state, we should go ahead and log all unlogged
# request/response pairs now. # request/response pairs now.
for ( trans_id in c$dns_state$pending ) for ( trans_id in c$dns_state$pending )
Log::write(DNS::LOG, c$dns_state$pending[trans_id]); {
local infos: vector of Info;
Queue::get_vector(c$dns_state$pending[trans_id], infos);
for ( i in infos )
{
Log::write(DNS::LOG, infos[i]);
}
}
} }

View file

@ -1,4 +1,8 @@
@load ./utils-commands @load ./utils-commands
@load ./info
@load ./main @load ./main
@load ./file-extract @load ./utils
@load ./files
@load ./gridftp @load ./gridftp
@load-sigs ./dpd.sig

View file

@ -0,0 +1,15 @@
signature dpd_ftp_client {
ip-proto == tcp
payload /(|.*[\n\r]) *[uU][sS][eE][rR] /
tcp-state originator
}
# Match for server greeting (220, 120) and for login or passwd
# required (230, 331).
signature dpd_ftp_server {
ip-proto == tcp
payload /[\n\r ]*(120|220)[^0-9].*[\n\r] *(230|331)[^0-9]/
tcp-state responder
requires-reverse-signature dpd_ftp_client
enable "ftp"
}

View file

@ -1,70 +0,0 @@
##! File extraction support for FTP.
@load ./main
@load base/utils/files
module FTP;
export {
## Pattern of file mime types to extract from FTP transfers.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from FTP-data transfers.
const extraction_prefix = "ftp-item" &redef;
}
redef record Info += {
## On disk file where it was extracted to.
extraction_file: file &log &optional;
## Indicates if the current command/response pair should attempt to
## extract the file if a file was transferred.
extract_file: bool &default=F;
## Internal tracking of the total number of files extracted during this
## session.
num_extracted_files: count &default=0;
};
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=3
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
return;
local s = ftp_data_expected[id$resp_h, id$resp_p];
if ( extract_file_types in s$mime_type )
{
s$extract_file = T;
++s$num_extracted_files;
}
}
event file_transferred(c: connection, prefix: string, descr: string,
mime_type: string) &priority=-4
{
local id = c$id;
if ( [id$resp_h, id$resp_p] !in ftp_data_expected )
return;
local s = ftp_data_expected[id$resp_h, id$resp_p];
if ( s$extract_file )
{
local suffix = fmt("%d.dat", s$num_extracted_files);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
s$extraction_file = open(fname);
if ( s$passive )
set_contents_file(id, CONTENTS_RESP, s$extraction_file);
else
set_contents_file(id, CONTENTS_ORIG, s$extraction_file);
}
}
event log_ftp(rec: Info) &priority=-10
{
delete rec$extraction_file;
delete rec$extract_file;
}

View file

@ -0,0 +1,61 @@
@load ./info
@load ./main
@load ./utils
@load base/utils/conn-ids
@load base/frameworks/files
module FTP;
export {
redef record Info += {
## File unique ID.
fuid: string &optional &log;
};
## Default file handle provider for FTP.
global get_file_handle: function(c: connection, is_orig: bool): string;
## Describe the file being transferred.
global describe_file: function(f: fa_file): string;
}
function get_file_handle(c: connection, is_orig: bool): string
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected )
return "";
return cat(Analyzer::ANALYZER_FTP_DATA, c$start_time, c$id, is_orig);
}
function describe_file(f: fa_file): string
{
# This shouldn't be needed, but just in case...
if ( f$source != "FTP" )
return "";
for ( cid in f$conns )
{
if ( f$conns[cid]?$ftp )
return FTP::describe(f$conns[cid]$ftp);
}
return "";
}
event bro_init() &priority=5
{
Files::register_protocol(Analyzer::ANALYZER_FTP_DATA,
[$get_file_handle = FTP::get_file_handle,
$describe = FTP::describe_file]);
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( [c$id$resp_h, c$id$resp_p] !in ftp_data_expected )
return;
local ftp = ftp_data_expected[c$id$resp_h, c$id$resp_p];
ftp$fuid = f$id;
if ( f?$mime_type )
ftp$mime_type = f$mime_type;
}

View file

@ -19,6 +19,7 @@
##! sizes are not logged, but at the benefit of saving CPU cycles that ##! sizes are not logged, but at the benefit of saving CPU cycles that
##! otherwise go to analyzing the large (and likely benign) connections. ##! otherwise go to analyzing the large (and likely benign) connections.
@load ./info
@load ./main @load ./main
@load base/protocols/conn @load base/protocols/conn
@load base/protocols/ssl @load base/protocols/ssl

View file

@ -0,0 +1,72 @@
##! Defines data structures for tracking and logging FTP sessions.
module FTP;
@load ./utils-commands
export {
## This setting changes if passwords used in FTP sessions are
## captured or not.
const default_capture_password = F &redef;
## The expected endpoints of an FTP data channel.
type ExpectedDataChannel: record {
## Whether PASV mode is toggled for control channel.
passive: bool &log;
## The host that will be initiating the data connection.
orig_h: addr &log;
## The host that will be accepting the data connection.
resp_h: addr &log;
## The port at which the acceptor is listening for the data connection.
resp_p: port &log;
};
type Info: record {
## Time when the command was sent.
ts: time &log;
## Unique ID for the connection.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## User name for the current FTP session.
user: string &log &default="<unknown>";
## Password for the current FTP session if captured.
password: string &log &optional;
## Command given by the client.
command: string &log &optional;
## Argument for the command if one is given.
arg: string &log &optional;
## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional;
## Size of the file if the command indicates a file transfer.
file_size: count &log &optional;
## Reply code from the server in response to the command.
reply_code: count &log &optional;
## Reply message from the server in response to the command.
reply_msg: string &log &optional;
## Expected FTP data channel.
data_channel: ExpectedDataChannel &log &optional;
## Current working directory that this session is in. By making
## the default value '.', we can indicate that unless something
## more concrete is discovered that the existing but unknown
## directory is ok to use.
cwd: string &default=".";
## Command that is currently waiting for a response.
cmdarg: CmdArg &optional;
## Queue for commands that have been sent but not yet responded to
## are tracked here.
pending_commands: PendingCmds;
## Indicates if the session is in active or passive mode.
passive: bool &default=F;
## Determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password;
};
}

View file

@ -3,6 +3,8 @@
##! will take on the full path that the client is at along with the requested ##! will take on the full path that the client is at along with the requested
##! file name. ##! file name.
@load ./info
@load ./utils
@load ./utils-commands @load ./utils-commands
@load base/utils/paths @load base/utils/paths
@load base/utils/numbers @load base/utils/numbers
@ -16,64 +18,13 @@ export {
## List of commands that should have their command/response pairs logged. ## List of commands that should have their command/response pairs logged.
const logged_commands = { const logged_commands = {
"APPE", "DELE", "RETR", "STOR", "STOU", "ACCT" "APPE", "DELE", "RETR", "STOR", "STOU", "ACCT", "PORT", "PASV", "EPRT",
"EPSV"
} &redef; } &redef;
## This setting changes if passwords used in FTP sessions are captured or not.
const default_capture_password = F &redef;
## User IDs that can be considered "anonymous". ## User IDs that can be considered "anonymous".
const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef; const guest_ids = { "anonymous", "ftp", "ftpuser", "guest" } &redef;
type Info: record {
## Time when the command was sent.
ts: time &log;
## Unique ID for the connection.
uid: string &log;
## The connection's 4-tuple of endpoint addresses/ports.
id: conn_id &log;
## User name for the current FTP session.
user: string &log &default="<unknown>";
## Password for the current FTP session if captured.
password: string &log &optional;
## Command given by the client.
command: string &log &optional;
## Argument for the command if one is given.
arg: string &log &optional;
## Libmagic "sniffed" file type if the command indicates a file transfer.
mime_type: string &log &optional;
## Libmagic "sniffed" file description if the command indicates a file transfer.
mime_desc: string &log &optional;
## Size of the file if the command indicates a file transfer.
file_size: count &log &optional;
## Reply code from the server in response to the command.
reply_code: count &log &optional;
## Reply message from the server in response to the command.
reply_msg: string &log &optional;
## Arbitrary tags that may indicate a particular attribute of this command.
tags: set[string] &log &default=set();
## Current working directory that this session is in. By making
## the default value '/.', we can indicate that unless something
## more concrete is discovered that the existing but unknown
## directory is ok to use.
cwd: string &default="/.";
## Command that is currently waiting for a response.
cmdarg: CmdArg &optional;
## Queue for commands that have been sent but not yet responded to
## are tracked here.
pending_commands: PendingCmds;
## Indicates if the session is in active or passive mode.
passive: bool &default=F;
## Determines if the password will be captured for this request.
capture_password: bool &default=default_capture_password;
};
## This record is to hold a parsed FTP reply code. For example, for the ## This record is to hold a parsed FTP reply code. For example, for the
## 201 status code, the digits would be parsed as: x->2, y->0, z=>1. ## 201 status code, the digits would be parsed as: x->2, y->0, z=>1.
type ReplyCode: record { type ReplyCode: record {
@ -93,23 +44,21 @@ export {
# Add the state tracking information variable to the connection record # Add the state tracking information variable to the connection record
redef record connection += { redef record connection += {
ftp: Info &optional; ftp: Info &optional;
ftp_data_reuse: bool &default=F;
}; };
# Configure DPD const ports = { 21/tcp, 2811/tcp };
const ports = { 21/tcp, 2811/tcp } &redef; # 2811/tcp is GridFTP. redef likely_server_ports += { ports };
redef capture_filters += { ["ftp"] = "port 21 and port 2811" };
redef dpd_config += { [ANALYZER_FTP] = [$ports = ports] };
redef likely_server_ports += { 21/tcp, 2811/tcp };
# Establish the variable for tracking expected connections.
global ftp_data_expected: table[addr, port] of Info &create_expire=5mins;
event bro_init() &priority=5 event bro_init() &priority=5
{ {
Log::create_stream(FTP::LOG, [$columns=Info, $ev=log_ftp]); Log::create_stream(FTP::LOG, [$columns=Info, $ev=log_ftp]);
Analyzer::register_for_ports(Analyzer::ANALYZER_FTP, ports);
} }
# Establish the variable for tracking expected connections.
global ftp_data_expected: table[addr, port] of Info &read_expire=5mins;
## A set of commands where the argument can be expected to refer ## A set of commands where the argument can be expected to refer
## to a file or directory. ## to a file or directory.
const file_cmds = { const file_cmds = {
@ -159,39 +108,43 @@ function set_ftp_session(c: connection)
function ftp_message(s: Info) function ftp_message(s: Info)
{ {
# If it either has a tag associated with it (something detected) s$ts=s$cmdarg$ts;
# or it's a deliberately logged command. s$command=s$cmdarg$cmd;
if ( |s$tags| > 0 || (s?$cmdarg && s$cmdarg$cmd in logged_commands) )
s$arg = s$cmdarg$arg;
if ( s$cmdarg$cmd in file_cmds )
s$arg = build_url_ftp(s);
if ( s$arg == "" )
delete s$arg;
if ( s?$password &&
! s$capture_password &&
to_lower(s$user) !in guest_ids )
{ {
if ( s?$password && s$password = "<hidden>";
! s$capture_password &&
to_lower(s$user) !in guest_ids )
{
s$password = "<hidden>";
}
local arg = s$cmdarg$arg;
if ( s$cmdarg$cmd in file_cmds )
arg = fmt("ftp://%s%s", addr_to_uri(s$id$resp_h), build_path_compressed(s$cwd, arg));
s$ts=s$cmdarg$ts;
s$command=s$cmdarg$cmd;
if ( arg == "" )
delete s$arg;
else
s$arg=arg;
Log::write(FTP::LOG, s);
} }
if ( s?$cmdarg && s$command in logged_commands)
Log::write(FTP::LOG, s);
# The MIME and file_size fields are specific to file transfer commands # The MIME and file_size fields are specific to file transfer commands
# and may not be used in all commands so they need reset to "blank" # and may not be used in all commands so they need reset to "blank"
# values after logging. # values after logging.
delete s$mime_type; delete s$mime_type;
delete s$mime_desc;
delete s$file_size; delete s$file_size;
# Tags are cleared everytime too. # Same with data channel.
delete s$tags; delete s$data_channel;
}
function add_expected_data_channel(s: Info, chan: ExpectedDataChannel)
{
s$passive = chan$passive;
s$data_channel = chan;
ftp_data_expected[chan$resp_h, chan$resp_p] = s;
Analyzer::schedule_analyzer(chan$orig_h, chan$resp_h, chan$resp_p,
Analyzer::ANALYZER_FTP_DATA,
5mins);
} }
event ftp_request(c: connection, command: string, arg: string) &priority=5 event ftp_request(c: connection, command: string, arg: string) &priority=5
@ -226,9 +179,8 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
if ( data$valid ) if ( data$valid )
{ {
c$ftp$passive=F; add_expected_data_channel(c$ftp, [$passive=F, $orig_h=id$resp_h,
ftp_data_expected[data$h, data$p] = c$ftp; $resp_h=data$h, $resp_p=data$p]);
expect_connection(id$resp_h, data$h, data$p, ANALYZER_FILE, 5mins);
} }
else else
{ {
@ -240,17 +192,14 @@ event ftp_request(c: connection, command: string, arg: string) &priority=5
event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=5 event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &priority=5
{ {
# TODO: figure out what to do with continued FTP response (not used much)
#if ( cont_resp ) return;
local id = c$id;
set_ftp_session(c); set_ftp_session(c);
c$ftp$cmdarg = get_pending_cmd(c$ftp$pending_commands, code, msg); c$ftp$cmdarg = get_pending_cmd(c$ftp$pending_commands, code, msg);
c$ftp$reply_code = code; c$ftp$reply_code = code;
c$ftp$reply_msg = msg; c$ftp$reply_msg = msg;
# TODO: figure out what to do with continued FTP response (not used much)
if ( cont_resp ) return;
# TODO: do some sort of generic clear text login processing here. # TODO: do some sort of generic clear text login processing here.
local response_xyz = parse_ftp_reply_code(code); local response_xyz = parse_ftp_reply_code(code);
#if ( response_xyz$x == 2 && # successful #if ( response_xyz$x == 2 && # successful
@ -278,10 +227,10 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
c$ftp$passive=T; c$ftp$passive=T;
if ( code == 229 && data$h == [::] ) if ( code == 229 && data$h == [::] )
data$h = id$resp_h; data$h = c$id$resp_h;
ftp_data_expected[data$h, data$p] = c$ftp; add_expected_data_channel(c$ftp, [$passive=T, $orig_h=c$id$orig_h,
expect_connection(id$orig_h, data$h, data$p, ANALYZER_FILE, 5mins); $resp_h=data$h, $resp_p=data$p]);
} }
else else
{ {
@ -311,8 +260,7 @@ event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool) &prior
} }
} }
event scheduled_analyzer_applied(c: connection, a: Analyzer::Tag) &priority=10
event expected_connection_seen(c: connection, a: count) &priority=10
{ {
local id = c$id; local id = c$id;
if ( [id$resp_h, id$resp_p] in ftp_data_expected ) if ( [id$resp_h, id$resp_p] in ftp_data_expected )
@ -327,16 +275,19 @@ event file_transferred(c: connection, prefix: string, descr: string,
{ {
local s = ftp_data_expected[id$resp_h, id$resp_p]; local s = ftp_data_expected[id$resp_h, id$resp_p];
s$mime_type = split1(mime_type, /;/)[1]; s$mime_type = split1(mime_type, /;/)[1];
s$mime_desc = descr;
} }
} }
event file_transferred(c: connection, prefix: string, descr: string, event connection_reused(c: connection) &priority=5
mime_type: string) &priority=-5
{ {
local id = c$id; if ( "ftp-data" in c$service )
if ( [id$resp_h, id$resp_p] in ftp_data_expected ) c$ftp_data_reuse = T;
delete ftp_data_expected[id$resp_h, id$resp_p]; }
event connection_state_remove(c: connection) &priority=-5
{
if ( c$ftp_data_reuse ) return;
delete ftp_data_expected[c$id$resp_h, c$id$resp_p];
} }
# Use state remove event to cover connections terminated by RST. # Use state remove event to cover connections terminated by RST.

View file

@ -0,0 +1,48 @@
##! Utilities specific for FTP processing.
@load ./info
@load base/utils/addrs
@load base/utils/paths
module FTP;
export {
## Creates a URL from an :bro:type:`FTP::Info` record.
##
## rec: An :bro:type:`FTP::Info` record.
##
## Returns: A URL, not prefixed by "ftp://".
global build_url: function(rec: Info): string;
## Creates a URL from an :bro:type:`FTP::Info` record.
##
## rec: An :bro:type:`FTP::Info` record.
##
## Returns: A URL prefixed with "ftp://".
global build_url_ftp: function(rec: Info): string;
## Create an extremely shortened representation of a log line.
global describe: function(rec: Info): string;
}
function build_url(rec: Info): string
{
if ( !rec?$arg )
return "";
local comp_path = build_path_compressed(rec$cwd, rec$arg);
if ( comp_path[0] != "/" )
comp_path = cat("/", comp_path);
return fmt("%s%s", addr_to_uri(rec$id$resp_h), comp_path);
}
function build_url_ftp(rec: Info): string
{
return fmt("ftp://%s", build_url(rec));
}
function describe(rec: Info): string
{
return build_url_ftp(rec);
}

View file

@ -1,5 +1,6 @@
@load ./main @load ./main
@load ./entities
@load ./utils @load ./utils
@load ./file-ident @load ./files
@load ./file-hash
@load ./file-extract @load-sigs ./dpd.sig

View file

@ -0,0 +1,13 @@
signature dpd_http_client {
ip-proto == tcp
payload /^[[:space:]]*(GET|HEAD|POST)[[:space:]]*/
tcp-state originator
}
signature dpd_http_server {
ip-proto == tcp
payload /^HTTP\/[0-9]/
tcp-state responder
requires-reverse-signature dpd_http_client
enable "http"
}

View file

@ -0,0 +1,109 @@
##! Analysis and logging for MIME entities found in HTTP sessions.
@load base/frameworks/files
@load base/utils/strings
@load base/utils/files
@load ./main
module HTTP;
export {
type Entity: record {
## Filename for the entity if discovered from a header.
filename: string &optional;
};
redef record Info += {
## An ordered vector of file unique IDs.
orig_fuids: vector of string &log &optional;
## An ordered vector of mime types.
orig_mime_types: vector of string &log &optional;
## An ordered vector of file unique IDs.
resp_fuids: vector of string &log &optional;
## An ordered vector of mime types.
resp_mime_types: vector of string &log &optional;
## The current entity.
current_entity: Entity &optional;
## Current number of MIME entities in the HTTP request message body.
orig_mime_depth: count &default=0;
## Current number of MIME entities in the HTTP response message body.
resp_mime_depth: count &default=0;
};
}
event http_begin_entity(c: connection, is_orig: bool) &priority=10
{
set_state(c, F, is_orig);
if ( is_orig )
++c$http$orig_mime_depth;
else
++c$http$resp_mime_depth;
c$http$current_entity = Entity();
}
event http_header(c: connection, is_orig: bool, name: string, value: string) &priority=3
{
if ( name == "CONTENT-DISPOSITION" &&
/[fF][iI][lL][eE][nN][aA][mM][eE]/ in value )
{
c$http$current_entity$filename = extract_filename_from_content_disposition(value);
}
else if ( name == "CONTENT-TYPE" &&
/[nN][aA][mM][eE][:blank:]*=/ in value )
{
c$http$current_entity$filename = extract_filename_from_content_disposition(value);
}
}
event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priority=5
{
if ( f$source == "HTTP" && c?$http )
{
if ( c$http?$current_entity && c$http$current_entity?$filename )
f$info$filename = c$http$current_entity$filename;
if ( f$is_orig )
{
if ( ! c$http?$orig_mime_types )
c$http$orig_fuids = string_vec(f$id);
else
c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
if ( f?$mime_type )
{
if ( ! c$http?$orig_mime_types )
c$http$orig_mime_types = string_vec(f$mime_type);
else
c$http$orig_mime_types[|c$http$orig_mime_types|] = f$mime_type;
}
}
else
{
if ( ! c$http?$resp_mime_types )
c$http$resp_fuids = string_vec(f$id);
else
c$http$resp_fuids[|c$http$resp_fuids|] = f$id;
if ( f?$mime_type )
{
if ( ! c$http?$resp_mime_types )
c$http$resp_mime_types = string_vec(f$mime_type);
else
c$http$resp_mime_types[|c$http$resp_mime_types|] = f$mime_type;
}
}
}
}
event http_end_entity(c: connection, is_orig: bool) &priority=5
{
if ( c?$http && c$http?$current_entity )
delete c$http$current_entity;
}

View file

@ -1,60 +0,0 @@
##! Extracts the items from HTTP traffic, one per file. At this time only
##! the message body from the server can be extracted with this script.
@load ./main
@load ./file-ident
@load base/utils/files
module HTTP;
export {
## Pattern of file mime types to extract from HTTP response entity bodies.
const extract_file_types = /NO_DEFAULT/ &redef;
## The on-disk prefix for files to be extracted from HTTP entity bodies.
const extraction_prefix = "http-item" &redef;
redef record Info += {
## On-disk file where the response body was extracted to.
extraction_file: file &log &optional;
## Indicates if the response body is to be extracted or not. Must be
## set before or by the first :bro:id:`http_entity_data` event for the
## content.
extract_file: bool &default=F;
};
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-5
{
# Client body extraction is not currently supported in this script.
if ( is_orig )
return;
if ( c$http$first_chunk )
{
if ( c$http?$mime_type &&
extract_file_types in c$http$mime_type )
{
c$http$extract_file = T;
}
if ( c$http$extract_file )
{
local suffix = fmt("%s_%d.dat", is_orig ? "orig" : "resp", c$http_state$current_response);
local fname = generate_extraction_filename(extraction_prefix, c, suffix);
c$http$extraction_file = open(fname);
enable_raw_output(c$http$extraction_file);
}
}
if ( c$http?$extraction_file )
print c$http$extraction_file, data;
}
event http_end_entity(c: connection, is_orig: bool)
{
if ( c$http?$extraction_file )
close(c$http$extraction_file);
}

View file

@ -1,92 +0,0 @@
##! Calculate hashes for HTTP body transfers.
@load ./file-ident
module HTTP;
export {
redef enum Notice::Type += {
## Indicates that an MD5 sum was calculated for an HTTP response body.
MD5,
};
redef record Info += {
## MD5 sum for a file transferred over HTTP calculated from the
## response body.
md5: string &log &optional;
## This value can be set per-transfer to determine per request
## if a file should have an MD5 sum generated. It must be
## set to T at the time of or before the first chunk of body data.
calc_md5: bool &default=F;
## Indicates if an MD5 sum is being calculated for the current
## request/response pair.
md5_handle: opaque of md5 &optional;
};
## Generate MD5 sums for these filetypes.
const generate_md5 = /application\/x-dosexec/ # Windows and DOS executables
| /application\/x-executable/ # *NIX executable binary
&redef;
}
## Initialize and calculate the hash.
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{
if ( is_orig || ! c?$http ) return;
if ( c$http$first_chunk )
{
if ( c$http$calc_md5 ||
(c$http?$mime_type && generate_md5 in c$http$mime_type) )
{
c$http$md5_handle = md5_hash_init();
}
}
if ( c$http?$md5_handle )
md5_hash_update(c$http$md5_handle, data);
}
## In the event of a content gap during a file transfer, detect the state for
## the MD5 sum calculation and stop calculating the MD5 since it would be
## incorrect anyway.
event content_gap(c: connection, is_orig: bool, seq: count, length: count) &priority=5
{
if ( is_orig || ! c?$http || ! c$http?$md5_handle ) return;
set_state(c, F, is_orig);
md5_hash_finish(c$http$md5_handle); # Ignore return value.
delete c$http$md5_handle;
}
## When the file finishes downloading, finish the hash and generate a notice.
event http_message_done(c: connection, is_orig: bool, stat: http_message_stat) &priority=-3
{
if ( is_orig || ! c?$http ) return;
if ( c$http?$md5_handle )
{
local url = build_url_http(c$http);
c$http$md5 = md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
NOTICE([$note=MD5, $msg=fmt("%s %s %s", c$id$orig_h, c$http$md5, url),
$sub=c$http$md5, $conn=c]);
}
}
event connection_state_remove(c: connection) &priority=-5
{
if ( c?$http_state &&
c$http_state$current_response in c$http_state$pending &&
c$http_state$pending[c$http_state$current_response]?$md5_handle )
{
# The MD5 sum isn't going to be saved anywhere since the entire
# body wouldn't have been seen anyway and we'd just be giving an
# incorrect MD5 sum.
md5_hash_finish(c$http$md5_handle);
delete c$http$md5_handle;
}
}

View file

@ -1,85 +0,0 @@
##! Identification of file types in HTTP response bodies with file content sniffing.
@load base/frameworks/signatures
@load base/frameworks/notice
@load ./main
@load ./utils
# Add the magic number signatures to the core signature set.
@load-sigs ./file-ident.sig
# Ignore the signatures used to match files
redef Signatures::ignored_ids += /^matchfile-/;
module HTTP;
export {
redef enum Notice::Type += {
## Indicates when the file extension doesn't seem to match the file contents.
Incorrect_File_Type,
};
redef record Info += {
## Mime type of response body identified by content sniffing.
mime_type: string &log &optional;
## Indicates that no data of the current file transfer has been
## seen yet. After the first :bro:id:`http_entity_data` event, it
## will be set to F.
first_chunk: bool &default=T;
};
## Mapping between mime types and regular expressions for URLs
## The :bro:enum:`HTTP::Incorrect_File_Type` notice is generated if the pattern
## doesn't match the mime type that was discovered.
const mime_types_extensions: table[string] of pattern = {
["application/x-dosexec"] = /\.([eE][xX][eE]|[dD][lL][lL])/,
} &redef;
## A pattern for filtering out :bro:enum:`HTTP::Incorrect_File_Type` urls
## that are not noteworthy before a notice is created. Each
## pattern added should match the complete URL (the matched URLs include
## "http://" at the beginning).
const ignored_incorrect_file_type_urls = /^$/ &redef;
}
event signature_match(state: signature_state, msg: string, data: string) &priority=5
{
# Only signatures matching file types are dealt with here.
if ( /^matchfile-/ !in state$sig_id ) return;
local c = state$conn;
set_state(c, F, F);
# Not much point in any of this if we don't know about the HTTP session.
if ( ! c?$http ) return;
# Set the mime type that was detected.
c$http$mime_type = msg;
if ( msg in mime_types_extensions &&
c$http?$uri && mime_types_extensions[msg] !in c$http$uri )
{
local url = build_url_http(c$http);
if ( url == ignored_incorrect_file_type_urls )
return;
local message = fmt("%s %s %s", msg, c$http$method, url);
NOTICE([$note=Incorrect_File_Type,
$msg=message,
$conn=c]);
}
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=5
{
if ( c$http$first_chunk && ! c$http?$mime_type )
c$http$mime_type = split1(identify_data(data, T), /;/)[1];
}
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) &priority=-10
{
if ( c$http$first_chunk )
c$http$first_chunk=F;
}

Some files were not shown because too many files have changed in this diff Show more