Change doc/ subdir into a git submodule

The docs now live at https://github.com/zeek/zeek-docs
This commit is contained in:
Jon Siwek 2019-01-17 14:09:29 -06:00
parent 0d685efbf5
commit 2ff746fea7
693 changed files with 26 additions and 105609 deletions

3
.gitmodules vendored
View file

@ -28,3 +28,6 @@
[submodule "aux/bifcl"]
path = aux/bifcl
url = https://github.com/zeek/bifcl
[submodule "doc"]
path = doc
url = https://github.com/zeek/zeek-docs

View file

@ -1,4 +0,0 @@
formats: []
python:
version: 3

View file

@ -1,4 +1,10 @@
2.6-82 | 2019-01-17 14:09:29 -0600
* Change doc/ subdir into a git submodule (Jon Siwek, Corelight)
The docs now live at https://github.com/zeek/zeek-docs
2.6-81 | 2019-01-16 19:03:07 -0600
* Add Broker::peer_counts_as_iosource option (Jon Siwek, Corelight)

View file

@ -307,7 +307,6 @@ include_directories(BEFORE ${CAF_INCLUDE_DIR_OPENSSL})
add_subdirectory(src)
add_subdirectory(scripts)
add_subdirectory(doc)
add_subdirectory(man)
include(CheckOptionalBuildSources)

View file

@ -23,15 +23,14 @@ install-aux: configured
clean: configured docclean
$(MAKE) -C $(BUILD) $@
doc: configured
$(MAKE) -C $(BUILD) $@
doc:
$(MAKE) -C doc $@
docclean: configured
$(MAKE) -C $(BUILD) $@
docclean:
(cd doc && make clean)
livehtml:
@mkdir -p build/doc/html
sphinx-autobuild --ignore "testing/*" --ignore "*.git/*" --ignore "*.lock" --ignore "*.pyc" --ignore "*.swp" --ignore "*.swpx" --ignore "*.swx" -b html ./doc ./build/doc/html
$(MAKE) -C doc $@
dist:
@test -e ../$(VERSION_FULL) && rm -ri ../$(VERSION_FULL) || true

2371
NEWS

File diff suppressed because it is too large Load diff

1
NEWS Symbolic link
View file

@ -0,0 +1 @@
doc/install/NEWS.rst

View file

@ -1 +1 @@
2.6-81
2.6-82

1
doc Submodule

@ -0,0 +1 @@
Subproject commit c0092fab7b28c029eddb6b9b654f6096d8e4456a

2
doc/.gitignore vendored
View file

@ -1,2 +0,0 @@
html
*.pyc

View file

@ -1,20 +0,0 @@
set(html_output_dir ${CMAKE_CURRENT_BINARY_DIR}/html)
add_custom_target(zeek-doc-html
COMMAND sphinx-build
-b html
-c ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
${html_output_dir}
# Create symlink to the html output directory for convenience.
COMMAND "${CMAKE_COMMAND}" -E create_symlink
${html_output_dir}
${CMAKE_BINARY_DIR}/html
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "[Sphinx] Generate Bro HTML documentation in ${html_output_dir}")
if (NOT TARGET doc)
add_custom_target(doc)
endif ()
add_dependencies(doc zeek-doc-html)

View file

@ -1,5 +0,0 @@
This work is licensed under the Creative Commons
Attribution 4.0 International License. To view a copy of this
license, visit https://creativecommons.org/licenses/by/4.0/ or send
a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain
View, California, 94041, USA.

View file

@ -1,28 +0,0 @@
Documentation
=============
This directory contains documentation in reStructuredText format
(see http://docutils.sourceforge.net/rst.html).
It is the root of a Sphinx source tree and can be modified to add more
documentation, style sheets, JavaScript, etc. The Sphinx config file
is ``conf.py``.
There is also a custom Sphinx domain implemented in ``ext/bro.py``
which adds some reST directives and roles that aid in generating useful
index entries and cross-references. Other extensions can be added in
a similar fashion.
The ``make doc`` target in the top-level Makefile can be used to locally
render the reST files into HTML. That target depends on:
* Python interpreter >= 2.7
* `Sphinx <http://sphinx-doc.org/>`_
* `Read the Docs Sphinx Theme <https://github.com/rtfd/sphinx_rtd_theme>`_
After the build completes, HTML documentation is symlinked in ``build/html``.
There's also a ``make livehtml`` target in the top-level Makefile that
is useful for editing the reST files and seeing changes rendered out live
to a separate HTML browser.

View file

@ -1,15 +0,0 @@
{% extends "!breadcrumbs.html" %}
{% block breadcrumbs_aside %}
<li class="wy-breadcrumbs-aside">
{% if pagename != "search" %}
{% if display_github %}
{% if github_version == "master" %}
<a href="https://{{ github_host|default("github.com") }}/{{ github_user }}/{{ github_repo }}/blob/{{ github_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }}" class="fa fa-github"> {{ _('Edit on GitHub') }}</a>
{% endif %}
{% elif show_source and has_source and sourcename %}
<a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow"> {{ _('View page source') }}</a>
{% endif %}
{% endif %}
</li>
{% endblock %}

View file

@ -1,7 +0,0 @@
{% extends "!layout.html" %}
{% if READTHEDOCS and current_version %}
{% if current_version == "latest" or current_version == "stable" %}
{% set current_version = current_version ~ " (" ~ version ~ ")" %}
{% endif %}
{% endif %}

View file

@ -1,188 +0,0 @@
====================
Cluster Architecture
====================
Bro is not multithreaded, so once the limitations of a single processor core
are reached the only option currently is to spread the workload across many
cores, or even many physical computers. The cluster deployment scenario for
Bro is the current solution to build these larger systems. The tools and
scripts that accompany Bro provide the structure to easily manage many Bro
processes examining packets and doing correlation activities but acting as
a singular, cohesive entity. This document describes the Bro cluster
architecture. For information on how to configure a Bro cluster,
see the documentation for
:doc:`BroControl <../components/broctl/README>`.
Architecture
---------------
The figure below illustrates the main components of a Bro cluster.
.. image:: /images/deployment.png
Tap
***
The tap is a mechanism that splits the packet stream in order to make a copy
available for inspection. Examples include the monitoring port on a switch
and an optical splitter on fiber networks.
Frontend
********
The frontend is a discrete hardware device or on-host technique that splits
traffic into many streams or flows. The Bro binary does not do this job.
There are numerous ways to accomplish this task, some of which are described
below in `Frontend Options`_.
Manager
*******
The manager is a Bro process that has two primary jobs. It receives log
messages and notices from the rest of the nodes in the cluster using the Bro
communications protocol (note that if you are using a logger, then the
logger receives all logs instead of the manager). The result
is a single log instead of many discrete logs that you have to
combine in some manner with post-processing. The manager also takes
the opportunity to de-duplicate notices, and it has the
ability to do so since it's acting as the choke point for notices and how
notices might be processed into actions (e.g., emailing, paging, or blocking).
The manager process is started first by BroControl and it only opens its
designated port and waits for connections, it doesn't initiate any
connections to the rest of the cluster. Once the workers are started and
connect to the manager, logs and notices will start arriving to the manager
process from the workers.
Logger
******
The logger is an optional Bro process that receives log messages from the
rest of the nodes in the cluster using the Bro communications protocol.
The purpose of having a logger receive logs instead of the manager is
to reduce the load on the manager. If no logger is needed, then the
manager will receive logs instead.
The logger process is started first by BroControl and it only opens its
designated port and waits for connections, it doesn't initiate any
connections to the rest of the cluster. Once the rest of the cluster is
started and connect to the logger, logs will start arriving to the logger
process.
Proxy
*****
The proxy is a Bro process that manages synchronized state. Variables can
be synchronized across connected Bro processes automatically. Proxies help
the workers by alleviating the need for all of the workers to connect
directly to each other.
Examples of synchronized state from the scripts that ship with Bro include
the full list of "known" hosts and services (which are hosts or services
identified as performing full TCP handshakes) or an analyzed protocol has been
found on the connection. If worker A detects host 1.2.3.4 as an active host,
it would be beneficial for worker B to know that as well. So worker A shares
that information as an insertion to a set which travels to the cluster's
proxy and the proxy sends that same set insertion to worker B. The result
is that worker A and worker B have shared knowledge about host and services
that are active on the network being monitored.
The proxy model extends to having multiple proxies when necessary for
performance reasons. It only adds one additional step for the Bro processes.
Each proxy connects to another proxy in a ring and the workers are shared
between them as evenly as possible. When a proxy receives some new bit of
state it will share that with its proxy, which is then shared around the
ring of proxies, and down to all of the workers. From a practical standpoint,
there are no rules of thumb established for the number of proxies
necessary for the number of workers they are serving. It is best to start
with a single proxy and add more if communication performance problems are
found.
Bro processes acting as proxies don't tend to be extremely hard on CPU
or memory and users frequently run proxy processes on the same physical
host as the manager.
Worker
******
The worker is the Bro process that sniffs network traffic and does protocol
analysis on the reassembled traffic streams. Most of the work of an active
cluster takes place on the workers and as such, the workers typically
represent the bulk of the Bro processes that are running in a cluster.
The fastest memory and CPU core speed you can afford is recommended
since all of the protocol parsing and most analysis will take place here.
There are no particular requirements for the disks in workers since almost all
logging is done remotely to the manager, and normally very little is written
to disk.
The rule of thumb we have followed recently is to allocate approximately 1
core for every 250Mbps of traffic that is being analyzed. However, this
estimate could be extremely traffic mix-specific. It has generally worked
for mixed traffic with many users and servers. For example, if your traffic
peaks around 2Gbps (combined) and you want to handle traffic at peak load,
you may want to have 8 cores available (2048 / 250 == 8.2). If the 250Mbps
estimate works for your traffic, this could be handled by 2 physical hosts
dedicated to being workers with each one containing a quad-core processor.
Once a flow-based load balancer is put into place this model is extremely
easy to scale. It is recommended that you estimate the amount of
hardware you will need to fully analyze your traffic. If more is needed it's
relatively easy to increase the size of the cluster in most cases.
Frontend Options
----------------
There are many options for setting up a frontend flow distributor. In many
cases it is beneficial to do multiple stages of flow distribution
on the network and on the host.
Discrete hardware flow balancers
********************************
cPacket
^^^^^^^
If you are monitoring one or more 10G physical interfaces, the recommended
solution is to use either a cFlow or cVu device from cPacket because they
are used successfully at a number of sites. These devices will perform
layer-2 load balancing by rewriting the destination Ethernet MAC address
to cause each packet associated with a particular flow to have the same
destination MAC. The packets can then be passed directly to a monitoring
host where each worker has a BPF filter to limit its visibility to only that
stream of flows, or onward to a commodity switch to split the traffic out to
multiple 1G interfaces for the workers. This greatly reduces
costs since workers can use relatively inexpensive 1G interfaces.
OpenFlow Switches
^^^^^^^^^^^^^^^^^
We are currently exploring the use of OpenFlow based switches to do flow-based
load balancing directly on the switch, which greatly reduces frontend
costs for many users. This document will be updated when we have more
information.
On host flow balancing
**********************
PF_RING
^^^^^^^
The PF_RING software for Linux has a "clustering" feature which will do
flow-based load balancing across a number of processes that are sniffing the
same interface. This allows you to easily take advantage of multiple
cores in a single physical host because Bro's main event loop is single
threaded and can't natively utilize all of the cores. If you want to use
PF_RING, see the documentation on `how to configure Bro with PF_RING
<https://www.zeek.org/documentation/load-balancing.html>`_.
Netmap
^^^^^^
FreeBSD has an in-progress project named Netmap which will enable flow-based
load balancing as well. When it becomes viable for real world use, this
document will be updated.
Click! Software Router
^^^^^^^^^^^^^^^^^^^^^^
Click! can be used for flow based load balancing with a simple configuration.
This solution is not recommended on
Linux due to Bro's PF_RING support and only as a last resort on other
operating systems since it causes a lot of overhead due to context switching
back and forth between kernel and userland several times per packet.

View file

@ -1 +0,0 @@
../../../aux/binpac/README

View file

@ -1 +0,0 @@
../../../aux/bro-aux/README

View file

@ -1 +0,0 @@
../../../aux/broctl/doc/broctl.rst

View file

@ -1 +0,0 @@
../../../aux/broker/README

View file

@ -1 +0,0 @@
../../../aux/btest/README

View file

@ -1 +0,0 @@
../../../aux/broctl/aux/capstats/README

View file

@ -1,22 +0,0 @@
=============
Subcomponents
=============
The following are snapshots of documentation for components that come
with this version of Bro (|version|). Since they can also be used
independently, see the `download page
<http://www.zeek.org/download/index.html>`_ for documentation of any
current, independent component releases.
.. toctree::
:maxdepth: 1
BinPAC - A protocol parser generator <binpac/README>
Broker - Bro's (New) Messaging Library <broker/README>
BroControl - Interactive Bro management shell <broctl/README>
Bro-Aux - Small auxiliary tools for Bro <bro-aux/README>
BTest - A unit testing framework <btest/README>
Capstats - Command-line packet statistic tool <capstats/README>
PySubnetTree - Python module for CIDR lookups<pysubnettree/README>
trace-summary - Script for generating break-downs of network traffic <trace-summary/README>

View file

@ -1 +0,0 @@
../../../aux/broctl/aux/pysubnettree/README

View file

@ -1 +0,0 @@
../../../aux/broctl/aux/trace-summary/README

View file

@ -1,235 +0,0 @@
# -*- coding: utf-8 -*-
#
# Zeek documentation build configuration file, created by sphinx-quickstart
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
extensions = []
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('ext'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions += ['bro', 'sphinx.ext.todo']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Zeek'
copyright = u'2018, The Zeek Project'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
with open('../VERSION', 'r') as f:
version = f.readline().strip()
# The full version, including alpha/beta/rc tags.
release = version
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = [".#*"]
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
show_authors = True
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
highlight_language = 'none'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
if not on_rtd:
# only import and set the theme if we're building docs locally
import sphinx_rtd_theme
html_theme = 'sphinx_rtd_theme'
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
html_last_updated_fmt = '%B %d, %Y'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
'collapse_navigation': False,
'display_version': True,
}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> Documentation".
html_title = u'Zeek User Manual v' + release
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
#html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {
#'**': ['localtoc.html', 'sourcelink.html', 'searchbox.html'],
#}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'zeek-docs'
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'Zeek.tex', u'Zeek Documentation',
u'The Zeek Project', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'bro', u'Zeek Documentation',
[u'The Zeek Project'], 1)
]
# -- Options for todo plugin --------------------------------------------
todo_include_todos=True

View file

@ -1,253 +0,0 @@
.. _configuration:
=====================
Cluster Configuration
=====================
A *Bro Cluster* is a set of systems jointly analyzing the traffic of
a network link in a coordinated fashion. You can operate such a setup from
a central manager system easily using BroControl because BroControl
hides much of the complexity of the multi-machine installation.
This section gives examples of how to setup common cluster configurations
using BroControl. For a full reference on BroControl, see the
:doc:`BroControl <../components/broctl/README>` documentation.
Preparing to Setup a Cluster
============================
In this document we refer to the user account used to set up the cluster
as the "Bro user". When setting up a cluster the Bro user must be set up
on all hosts, and this user must have ssh access from the manager to all
machines in the cluster, and it must work without being prompted for a
password/passphrase (for example, using ssh public key authentication).
Also, on the worker nodes this user must have access to the target
network interface in promiscuous mode.
Additional storage must be available on all hosts under the same path,
which we will call the cluster's prefix path. We refer to this directory
as ``<prefix>``. If you build Bro from source, then ``<prefix>`` is
the directory specified with the ``--prefix`` configure option,
or ``/usr/local/bro`` by default. The Bro user must be able to either
create this directory or, where it already exists, must have write
permission inside this directory on all hosts.
When trying to decide how to configure the Bro nodes, keep in mind that
there can be multiple Bro instances running on the same host. For example,
it's possible to run a proxy and the manager on the same host. However, it is
recommended to run workers on a different machine than the manager because
workers can consume a lot of CPU resources. The maximum recommended
number of workers to run on a machine should be one or two less than
the number of CPU cores available on that machine. Using a load-balancing
method (such as PF_RING) along with CPU pinning can decrease the load on
the worker machines. Also, in order to reduce the load on the manager
process, it is recommended to have a logger in your configuration. If a
logger is defined in your cluster configuration, then it will receive logs
instead of the manager process.
Basic Cluster Configuration
===========================
With all prerequisites in place, perform the following steps to setup
a Bro cluster (do this as the Bro user on the manager host only):
- Edit the BroControl configuration file, ``<prefix>/etc/broctl.cfg``,
and change the value of any BroControl options to be more suitable for
your environment. You will most likely want to change the value of
the ``MailTo`` and ``LogRotationInterval`` options. A complete
reference of all BroControl options can be found in the
:doc:`BroControl <../components/broctl/README>` documentation.
- Edit the BroControl node configuration file, ``<prefix>/etc/node.cfg``
to define where logger, manager, proxies, and workers are to run. For a
cluster configuration, you must comment-out (or remove) the standalone node
in that file, and either uncomment or add node entries for each node
in your cluster (logger, manager, proxy, and workers). For example, if you
wanted to run five Bro nodes (two workers, one proxy, a logger, and a
manager) on a cluster consisting of three machines, your cluster
configuration would look like this::
[logger]
type=logger
host=10.0.0.10
[manager]
type=manager
host=10.0.0.10
[proxy-1]
type=proxy
host=10.0.0.10
[worker-1]
type=worker
host=10.0.0.11
interface=eth0
[worker-2]
type=worker
host=10.0.0.12
interface=eth0
For a complete reference of all options that are allowed in the ``node.cfg``
file, see the :doc:`BroControl <../components/broctl/README>` documentation.
- Edit the network configuration file ``<prefix>/etc/networks.cfg``. This
file lists all of the networks which the cluster should consider as local
to the monitored environment.
- Install Bro on all machines in the cluster using BroControl::
> broctl install
- See the :doc:`BroControl <../components/broctl/README>` documentation
for information on setting up a cron job on the manager host that can
monitor the cluster.
PF_RING Cluster Configuration
=============================
`PF_RING <http://www.ntop.org/products/pf_ring/>`_ allows speeding up the
packet capture process by installing a new type of socket in Linux systems.
It supports 10Gbit hardware packet filtering using standard network adapters,
and user-space DNA (Direct NIC Access) for fast packet capture/transmission.
Installing PF_RING
^^^^^^^^^^^^^^^^^^
1. Download and install PF_RING for your system following the instructions
`here <http://www.ntop.org/get-started/download/#PF_RING>`_. The following
commands will install the PF_RING libraries and kernel module (replace
the version number 5.6.2 in this example with the version that you
downloaded)::
cd /usr/src
tar xvzf PF_RING-5.6.2.tar.gz
cd PF_RING-5.6.2/userland/lib
./configure --prefix=/opt/pfring
make install
cd ../libpcap
./configure --prefix=/opt/pfring
make install
cd ../tcpdump-4.1.1
./configure --prefix=/opt/pfring
make install
cd ../../kernel
make install
modprobe pf_ring enable_tx_capture=0 min_num_slots=32768
Refer to the documentation for your Linux distribution on how to load the
pf_ring module at boot time. You will need to install the PF_RING
library files and kernel module on all of the workers in your cluster.
2. Download the Bro source code.
3. Configure and install Bro using the following commands::
./configure --with-pcap=/opt/pfring
make
make install
4. Make sure Bro is correctly linked to the PF_RING libpcap libraries::
ldd /usr/local/bro/bin/bro | grep pcap
libpcap.so.1 => /opt/pfring/lib/libpcap.so.1 (0x00007fa6d7d24000)
5. Configure BroControl to use PF_RING (explained below).
6. Run "broctl install" on the manager. This command will install Bro and
required scripts to all machines in your cluster.
Using PF_RING
^^^^^^^^^^^^^
In order to use PF_RING, you need to specify the correct configuration
options for your worker nodes in BroControl's node configuration file.
Edit the ``node.cfg`` file and specify ``lb_method=pf_ring`` for each of
your worker nodes. Next, use the ``lb_procs`` node option to specify how
many Bro processes you'd like that worker node to run, and optionally pin
those processes to certain CPU cores with the ``pin_cpus`` option (CPU
numbering starts at zero). The correct ``pin_cpus`` setting to use is
dependent on your CPU architecture (Intel and AMD systems enumerate
processors in different ways). Using the wrong ``pin_cpus`` setting
can cause poor performance. Here is what a worker node entry should
look like when using PF_RING and CPU pinning::
[worker-1]
type=worker
host=10.0.0.50
interface=eth0
lb_method=pf_ring
lb_procs=10
pin_cpus=2,3,4,5,6,7,8,9,10,11
Using PF_RING+DNA with symmetric RSS
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You must have a PF_RING+DNA license in order to do this. You can sniff
each packet only once.
1. Load the DNA NIC driver (i.e. ixgbe) on each worker host.
2. Run "ethtool -L dna0 combined 10" (this will establish 10 RSS queues
on your NIC) on each worker host. You must make sure that you set the
number of RSS queues to the same as the number you specify for the
lb_procs option in the node.cfg file.
3. On the manager, configure your worker(s) in node.cfg::
[worker-1]
type=worker
host=10.0.0.50
interface=dna0
lb_method=pf_ring
lb_procs=10
Using PF_RING+DNA with pfdnacluster_master
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You must have a PF_RING+DNA license and a libzero license in order to do
this. You can load balance between multiple applications and sniff the
same packets multiple times with different tools.
1. Load the DNA NIC driver (i.e. ixgbe) on each worker host.
2. Run "ethtool -L dna0 1" (this will establish 1 RSS queues on your NIC)
on each worker host.
3. Run the pfdnacluster_master command on each worker host. For example::
pfdnacluster_master -c 21 -i dna0 -n 10
Make sure that your cluster ID (21 in this example) matches the interface
name you specify in the node.cfg file. Also make sure that the number
of processes you're balancing across (10 in this example) matches
the lb_procs option in the node.cfg file.
4. If you are load balancing to other processes, you can use the
pfringfirstappinstance variable in broctl.cfg to set the first
application instance that Bro should use. For example, if you are running
pfdnacluster_master with "-n 10,4" you would set
pfringfirstappinstance=4. Unfortunately that's still a global setting
in broctl.cfg at the moment but we may change that to something you can
set in node.cfg eventually.
5. On the manager, configure your worker(s) in node.cfg::
[worker-1]
type=worker
host=10.0.0.50
interface=dnacluster:21
lb_method=pf_ring
lb_procs=10

View file

@ -1,488 +0,0 @@
===============
Writing Plugins
===============
Bro internally provides a plugin API that enables extending
the system dynamically, without modifying the core code base. That way
custom code remains self-contained and can be maintained, compiled,
and installed independently. Currently, plugins can add the following
functionality to Bro:
- Bro scripts.
- Builtin functions/events/types for the scripting language.
- Protocol analyzers.
- File analyzers.
- Packet sources and packet dumpers.
- Logging framework backends.
- Input framework readers.
A plugin's functionality is available to the user just as if Bro had
the corresponding code built-in. Indeed, internally many of Bro's
pieces are structured as plugins as well, they are just statically
compiled into the binary rather than loaded dynamically at runtime.
Quick Start
===========
Writing a basic plugin is quite straight-forward as long as one
follows a few conventions. In the following we create a simple example
plugin that adds a new built-in function (bif) to Bro: we'll add
``rot13(s: string) : string``, a function that rotates every character
in a string by 13 places.
Generally, a plugin comes in the form of a directory following a
certain structure. To get started, Bro's distribution provides a
helper script ``aux/bro-aux/plugin-support/init-plugin`` that creates
a skeleton plugin that can then be customized. Let's use that::
# init-plugin ./rot13-plugin Demo Rot13
As you can see, the script takes three arguments. The first is a
directory inside which the plugin skeleton will be created. The second
is the namespace the plugin will live in, and the third is a descriptive
name for the plugin itself relative to the namespace. Bro uses the
combination of namespace and name to identify a plugin. The namespace
serves to avoid naming conflicts between plugins written by independent
developers; pick, e.g., the name of your organisation. The namespace
``Bro`` is reserved for functionality distributed by the Bro Project. In
our example, the plugin will be called ``Demo::Rot13``.
The ``init-plugin`` script puts a number of files in place. The full
layout is described later. For now, all we need is
``src/rot13.bif``. It's initially empty, but we'll add our new bif
there as follows::
# cat src/rot13.bif
module Demo;
function rot13%(s: string%) : string
%{
char* rot13 = copy_string(s->CheckString());
for ( char* p = rot13; *p; p++ )
{
char b = islower(*p) ? 'a' : 'A';
*p = (*p - b + 13) % 26 + b;
}
BroString* bs = new BroString(1, reinterpret_cast<byte_vec>(rot13),
strlen(rot13));
return new StringVal(bs);
%}
The syntax of this file is just like any other ``*.bif`` file; we
won't go into it here.
Now we can already compile our plugin, we just need to tell the
configure script (that ``init-plugin`` created) where the Bro
source tree is located (Bro needs to have been built there first)::
# cd rot13-plugin
# ./configure --bro-dist=/path/to/bro/dist && make
[... cmake output ...]
This builds the plugin in a subdirectory ``build/``. In fact, that
subdirectory *becomes* the plugin: when ``make`` finishes, ``build/``
has everything it needs for Bro to recognize it as a dynamic plugin.
Let's try that. Once we point Bro to the ``build/`` directory, it will
pull in our new plugin automatically, as we can check with the ``-N``
option::
# export BRO_PLUGIN_PATH=/path/to/rot13-plugin/build
# bro -N
[...]
Demo::Rot13 - <Insert description> (dynamic, version 0.1.0)
[...]
That looks quite good, except for the dummy description that we should
replace with something nicer so that users will know what our plugin
is about. We do this by editing the ``config.description`` line in
``src/Plugin.cc``, like this::
[...]
plugin::Configuration Plugin::Configure()
{
plugin::Configuration config;
config.name = "Demo::Rot13";
config.description = "Caesar cipher rotating a string's characters by 13 places.";
config.version.major = 0;
config.version.minor = 1;
config.version.patch = 0;
return config;
}
[...]
Now rebuild and verify that the description is visible::
# make
[...]
# bro -N | grep Rot13
Demo::Rot13 - Caesar cipher rotating a string's characters by 13 places. (dynamic, version 0.1.0)
Bro can also show us what exactly the plugin provides with the
more verbose option ``-NN``::
# bro -NN
[...]
Demo::Rot13 - Caesar cipher rotating a string's characters by 13 places. (dynamic, version 0.1.0)
[Function] Demo::rot13
[...]
There's our function. Now let's use it::
# bro -e 'print Demo::rot13("Hello")'
Uryyb
It works. We next install the plugin along with Bro itself, so that it
will find it directly without needing the ``BRO_PLUGIN_PATH``
environment variable. If we first unset the variable, the function
will no longer be available::
# unset BRO_PLUGIN_PATH
# bro -e 'print Demo::rot13("Hello")'
error in <command line>, line 1: unknown identifier Demo::rot13, at or near "Demo::rot13"
Once we install it, it works again::
# make install
# bro -e 'print Demo::rot13("Hello")'
Uryyb
The installed version went into
``<bro-install-prefix>/lib/bro/plugins/Demo_Rot13``.
One can distribute the plugin independently of Bro for others to use.
To distribute in source form, just remove the ``build/`` directory
(``make distclean`` does that) and then tar up the whole ``rot13-plugin/``
directory. Others then follow the same process as above after
unpacking.
To distribute the plugin in binary form, the build process
conveniently creates a corresponding tarball in ``build/dist/``. In
this case, it's called ``Demo_Rot13-0.1.0.tar.gz``, with the version
number coming out of the ``VERSION`` file that ``init-plugin`` put
into place. The binary tarball has everything needed to run the
plugin, but no further source files. Optionally, one can include
further files by specifying them in the plugin's ``CMakeLists.txt``
through the ``bro_plugin_dist_files`` macro; the skeleton does that
for ``README``, ``VERSION``, ``CHANGES``, and ``COPYING``. To use the
plugin through the binary tarball, just unpack it into
``<bro-install-prefix>/lib/bro/plugins/``. Alternatively, if you unpack
it in another location, then you need to point ``BRO_PLUGIN_PATH`` there.
Before distributing your plugin, you should edit some of the meta
files that ``init-plugin`` puts in place. Edit ``README`` and
``VERSION``, and update ``CHANGES`` when you make changes. Also put a
license file in place as ``COPYING``; if BSD is fine, you will find a
template in ``COPYING.edit-me``.
Plugin Directory Layout
=======================
A plugin's directory needs to follow a set of conventions so that Bro
(1) recognizes it as a plugin, and (2) knows what to load. While
``init-plugin`` takes care of most of this, the following is the full
story. We'll use ``<base>`` to represent a plugin's top-level
directory. With the skeleton, ``<base>`` corresponds to ``build/``.
``<base>/__bro_plugin__``
A file that marks a directory as containing a Bro plugin. The file
must exist, and its content must consist of a single line with the
qualified name of the plugin (e.g., "Demo::Rot13").
``<base>/lib/<plugin-name>.<os>-<arch>.so``
The shared library containing the plugin's compiled code. Bro will
load this in dynamically at run-time if OS and architecture match
the current platform.
``scripts/``
A directory with the plugin's custom Bro scripts. When the plugin
gets activated, this directory will be automatically added to
``BROPATH``, so that any scripts/modules inside can be
"@load"ed.
``scripts``/__load__.bro
A Bro script that will be loaded when the plugin gets activated.
When this script executes, any BiF elements that the plugin
defines will already be available. See below for more information
on activating plugins.
``scripts``/__preload__.bro
A Bro script that will be loaded when the plugin gets activated,
but before any BiF elements become available. See below for more
information on activating plugins.
``lib/bif/``
Directory with auto-generated Bro scripts that declare the plugin's
bif elements. The files here are produced by ``bifcl``.
Any other files in ``<base>`` are ignored by Bro.
By convention, a plugin should put its custom scripts into sub folders
of ``scripts/``, i.e., ``scripts/<plugin-namespace>/<plugin-name>/<script>.bro``
to avoid conflicts. As usual, you can then put a ``__load__.bro`` in
there as well so that, e.g., ``@load Demo/Rot13`` could load a whole
module in the form of multiple individual scripts.
Note that in addition to the paths above, the ``init-plugin`` helper
puts some more files and directories in place that help with
development and installation (e.g., ``CMakeLists.txt``, ``Makefile``,
and source code in ``src/``). However, all these do not have a special
meaning for Bro at runtime and aren't necessary for a plugin to
function.
``init-plugin``
===============
``init-plugin`` puts a basic plugin structure in place that follows
the above layout and augments it with a CMake build and installation
system. Plugins with this structure can be used both directly out of
their source directory (after ``make`` and setting Bro's
``BRO_PLUGIN_PATH``), and when installed alongside Bro (after ``make
install``).
``make install`` copies over the ``lib`` and ``scripts`` directories,
as well as the ``__bro_plugin__`` magic file and any further
distribution files specified in ``CMakeLists.txt`` (e.g., README,
VERSION). You can find a full list of files installed in
``build/MANIFEST``. Behind the scenes, ``make install`` really just
unpacks the binary tarball from ``build/dist`` into the destination
directory.
``init-plugin`` will never overwrite existing files. If its target
directory already exists, it will by default decline to do anything.
You can run it with ``-u`` instead to update an existing plugin,
however it will never overwrite any existing files; it will only put
in place files it doesn't find yet. To revert a file back to what
``init-plugin`` created originally, delete it first and then rerun
with ``-u``.
``init-plugin`` puts a ``configure`` script in place that wraps
``cmake`` with a more familiar configure-style configuration. By
default, the script provides two options for specifying paths to the
Bro source (``--bro-dist``) and to the plugin's installation directory
(``--install-root``). To extend ``configure`` with plugin-specific
options (such as search paths for its dependencies) don't edit the
script directly but instead extend ``configure.plugin``, which
``configure`` includes. That way you will be able to more easily
update ``configure`` in the future when the distribution version
changes. In ``configure.plugin`` you can use the predefined shell
function ``append_cache_entry`` to seed values into the CMake cache;
see the installed skeleton version and existing plugins for examples.
Activating a Plugin
===================
A plugin needs to be *activated* to make it available to the user.
Activating a plugin will:
1. Load the dynamic module
2. Make any bif items available
3. Add the ``scripts/`` directory to ``BROPATH``
4. Load ``scripts/__preload__.bro``
5. Make BiF elements available to scripts.
6. Load ``scripts/__load__.bro``
By default, Bro will automatically activate all dynamic plugins found
in its search path ``BRO_PLUGIN_PATH``. However, in bare mode (``bro
-b``), no dynamic plugins will be activated by default; instead the
user can selectively enable individual plugins in scriptland using the
``@load-plugin <qualified-plugin-name>`` directive (e.g.,
``@load-plugin Demo::Rot13``). Alternatively, one can activate a
plugin from the command-line by specifying its full name
(``Demo::Rot13``), or set the environment variable
``BRO_PLUGIN_ACTIVATE`` to a list of comma(!)-separated names of
plugins to unconditionally activate, even in bare mode.
``bro -N`` shows activated plugins separately from found but not yet
activated plugins. Note that plugins compiled statically into Bro are
always activated, and hence show up as such even in bare mode.
Plugin Components
=================
The following subsections detail providing individual types of
functionality via plugins. Note that a single plugin can provide more
than one component type. For example, a plugin could provide multiple
protocol analyzers at once; or both a logging backend and input reader
at the same time.
.. todo::
These subsections are mostly missing right now, as much of their
content isn't actually plugin-specific, but concerns generally
writing such functionality for Bro. The best way to get started
right now is to look at existing code implementing similar
functionality, either as a plugin or inside Bro proper. Also, for
each component type there's a unit test in
``testing/btest/plugins`` creating a basic plugin skeleton with a
corresponding component.
Bro Scripts
-----------
Scripts are easy: just put them into ``scripts/``, as described above.
The CMake infrastructure will automatically install them, as well
include them into the source and binary plugin distributions.
Builtin Language Elements
-------------------------
Functions
TODO
Events
TODO
Types
TODO
Protocol Analyzers
------------------
TODO.
File Analyzers
--------------
TODO.
Logging Writer
--------------
TODO.
Input Reader
------------
TODO.
Packet Sources
--------------
TODO.
Packet Dumpers
--------------
TODO.
Hooks
=====
TODO.
Testing Plugins
===============
A plugin should come with a test suite to exercise its functionality.
The ``init-plugin`` script puts in place a basic
:doc:`BTest <../../components/btest/README>` setup
to start with. Initially, it comes with a single test that just checks
that Bro loads the plugin correctly. It won't have a baseline yet, so
let's get that in place::
# cd tests
# btest -d
[ 0%] rot13.show-plugin ... failed
% 'btest-diff output' failed unexpectedly (exit code 100)
% cat .diag
== File ===============================
Demo::Rot13 - Caesar cipher rotating a string's characters by 13 places. (dynamic, version 0.1.0)
[Function] Demo::rot13
== Error ===============================
test-diff: no baseline found.
=======================================
# btest -U
all 1 tests successful
# cd ..
# make test
make -C tests
make[1]: Entering directory `tests'
all 1 tests successful
make[1]: Leaving directory `tests'
Now let's add a custom test that ensures that our bif works
correctly::
# cd tests
# cat >rot13/bif-rot13.bro
# @TEST-EXEC: bro %INPUT >output
# @TEST-EXEC: btest-diff output
event bro_init()
{
print Demo::rot13("Hello");
}
Check the output::
# btest -d rot13/bif-rot13.bro
[ 0%] rot13.bif-rot13 ... failed
% 'btest-diff output' failed unexpectedly (exit code 100)
% cat .diag
== File ===============================
Uryyb
== Error ===============================
test-diff: no baseline found.
=======================================
% cat .stderr
1 of 1 test failed
Install the baseline::
# btest -U rot13/bif-rot13.bro
all 1 tests successful
Run the test-suite::
# btest
all 2 tests successful
Debugging Plugins
=================
If your plugin isn't loading as expected, Bro's debugging facilities
can help illuminate what's going on. To enable, recompile Bro
with debugging support (``./configure --enable-debug``), and
afterwards rebuild your plugin as well. If you then run Bro with ``-B
plugins``, it will produce a file ``debug.log`` that records details
about the process for searching, loading, and activating plugins.
To generate your own debugging output from inside your plugin, you can
add a custom debug stream by using the ``PLUGIN_DBG_LOG(<plugin>,
<args>)`` macro (defined in ``DebugLogger.h``), where ``<plugin>`` is
the ``Plugin`` instance and ``<args>`` are printf-style arguments,
just as with Bro's standard debugging macros (grep for ``DBG_LOG`` in
Bro's ``src/`` to see examples). At runtime, you can then activate
your plugin's debugging output with ``-B plugin-<name>``, where
``<name>`` is the name of the plugin as returned by its
``Configure()`` method, yet with the namespace-separator ``::``
replaced with a simple dash. Example: If the plugin is called
``Demo::Rot13``, use ``-B plugin-Demo-Rot13``. As usual, the debugging
output will be recorded to ``debug.log`` if Bro's compiled in debug
mode.
Documenting Plugins
===================
.. todo::
Integrate all this with Broxygen.

View file

@ -1,24 +0,0 @@
global mime_to_ext: table[string] of string = {
["application/x-dosexec"] = "exe",
["text/plain"] = "txt",
["image/jpeg"] = "jpg",
["image/png"] = "png",
["text/html"] = "html",
};
event file_sniff(f: fa_file, meta: fa_metadata)
{
if ( f$source != "HTTP" )
return;
if ( ! meta?$mime_type )
return;
if ( meta$mime_type !in mime_to_ext )
return;
local fname = fmt("%s-%s.%s", f$source, f$id, mime_to_ext[meta$mime_type]);
print fmt("Extracting file %s", fname);
Files::add_analyzer(f, Files::ANALYZER_EXTRACT, [$extract_filename=fname]);
}

View file

@ -1,5 +0,0 @@
event http_reply(c: connection, version: string, code: count, reason: string)
{
if ( /^[hH][tT][tT][pP]:/ in c$http$uri && c$http$status_code == 200 )
print fmt("A local server is acting as an open proxy: %s", c$id$resp_h);
}

View file

@ -1,26 +0,0 @@
module HTTP;
export {
global success_status_codes: set[count] = {
200,
201,
202,
203,
204,
205,
206,
207,
208,
226,
304
};
}
event http_reply(c: connection, version: string, code: count, reason: string)
{
if ( /^[hH][tT][tT][pP]:/ in c$http$uri &&
c$http$status_code in HTTP::success_status_codes )
print fmt("A local server is acting as an open proxy: %s", c$id$resp_h);
}

View file

@ -1,31 +0,0 @@
@load base/utils/site
redef Site::local_nets += { 192.168.0.0/16 };
module HTTP;
export {
global success_status_codes: set[count] = {
200,
201,
202,
203,
204,
205,
206,
207,
208,
226,
304
};
}
event http_reply(c: connection, version: string, code: count, reason: string)
{
if ( Site::is_local_addr(c$id$resp_h) &&
/^[hH][tT][tT][pP]:/ in c$http$uri &&
c$http$status_code in HTTP::success_status_codes )
print fmt("A local server is acting as an open proxy: %s", c$id$resp_h);
}

View file

@ -1,40 +0,0 @@
@load base/utils/site
@load base/frameworks/notice
redef Site::local_nets += { 192.168.0.0/16 };
module HTTP;
export {
redef enum Notice::Type += {
Open_Proxy
};
global success_status_codes: set[count] = {
200,
201,
202,
203,
204,
205,
206,
207,
208,
226,
304
};
}
event http_reply(c: connection, version: string, code: count, reason: string)
{
if ( Site::is_local_addr(c$id$resp_h) &&
/^[hH][tT][tT][pP]:/ in c$http$uri &&
c$http$status_code in HTTP::success_status_codes )
NOTICE([$note=HTTP::Open_Proxy,
$msg=fmt("A local server is acting as an open proxy: %s",
c$id$resp_h),
$conn=c,
$identifier=cat(c$id$resp_h),
$suppress_for=1day]);
}

View file

@ -1,196 +0,0 @@
.. _http-monitor:
=======================
Monitoring HTTP Traffic
=======================
Bro can be used to log the entire HTTP traffic from your network to the
http.log file. This file can then be used for analysis and auditing
purposes.
In the sections below we briefly explain the structure of the http.log
file, then we show you how to perform basic HTTP traffic monitoring and
analysis tasks with Bro. Some of these ideas and techniques can later be
applied to monitor different protocols in a similar way.
----------------------------
Introduction to the HTTP log
----------------------------
The http.log file contains a summary of all HTTP requests and responses
sent over a Bro-monitored network. Here are the first few columns of
``http.log``::
# ts uid orig_h orig_p resp_h resp_p
1311627961.8 HSH4uV8KVJg 192.168.1.100 52303 192.150.187.43 80
Every single line in this log starts with a timestamp, a unique
connection identifier (UID), and a connection 4-tuple (originator
host/port and responder host/port). The UID can be used to identify all
logged activity (possibly across multiple log files) associated with a
given connection 4-tuple over its lifetime.
The remaining columns detail the activity that's occurring. For
example, the columns on the line below (shortened for brevity) show a
request to the root of Bro website::
# method host uri referrer user_agent
GET bro.org / - <...>Chrome/12.0.742.122<...>
Network administrators and security engineers, for instance, can use the
information in this log to understand the HTTP activity on the network
and troubleshoot network problems or search for anomalous activities. We must
stress that there is no single right way to perform an analysis. It will
depend on the expertise of the person performing the analysis and the
specific details of the task.
For more information about how to handle the HTTP protocol in Bro,
including a complete list of the fields available in http.log, go to
Bro's :doc:`HTTP script reference
</scripts/base/protocols/http/main.bro>`.
------------------------
Detecting a Proxy Server
------------------------
A proxy server is a device on your network configured to request a
service on behalf of a third system; one of the most common examples is
a Web proxy server. A client without Internet access connects to the
proxy and requests a web page, the proxy sends the request to the web
server, which receives the response, and passes it to the original
client.
Proxies were conceived to help manage a network and provide better
encapsulation. Proxies by themselves are not a security threat, but a
misconfigured or unauthorized proxy can allow others, either inside or
outside the network, to access any web site and even conduct malicious
activities anonymously using the network's resources.
What Proxy Server traffic looks like
-------------------------------------
In general, when a client starts talking with a proxy server, the
traffic consists of two parts: (i) a GET request, and (ii) an HTTP/
reply::
Request: GET http://www.bro.org/ HTTP/1.1
Reply: HTTP/1.0 200 OK
This will differ from traffic between a client and a normal Web server
because GET requests should not include "http" on the string. So we can
use this to identify a proxy server.
We can write a basic script in Bro to handle the http_reply event and
detect a reply for a ``GET http://`` request.
.. literalinclude:: http_proxy_01.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r http/proxy.pcap http_proxy_01.bro
A local server is acting as an open proxy: 192.168.56.101
Basically, the script is checking for a "200 OK" status code on a reply
for a request that includes "http:" (case insensitive). In reality, the
HTTP protocol defines several success status codes other than 200, so we
will extend our basic script to also consider the additional codes.
.. literalinclude:: http_proxy_02.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r http/proxy.pcap http_proxy_02.bro
A local server is acting as an open proxy: 192.168.56.101
Next, we will make sure that the responding proxy is part of our local
network.
.. literalinclude:: http_proxy_03.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r http/proxy.pcap http_proxy_03.bro
A local server is acting as an open proxy: 192.168.56.101
.. note::
The redefinition of :bro:see:`Site::local_nets` is only done inside
this script to make it a self-contained example. It's typically
redefined somewhere else.
Finally, our goal should be to generate an alert when a proxy has been
detected instead of printing a message on the console output. For that,
we will tag the traffic accordingly and define a new ``Open_Proxy``
``Notice`` type to alert of all tagged communications. Once a
notification has been fired, we will further suppress it for one day.
Below is the complete script.
.. literalinclude:: http_proxy_04.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r http/proxy.pcap http_proxy_04.bro
$ cat notice.log
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path notice
#open 2018-12-13-22-56-39
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p fuid file_mime_type file_desc proto note msg sub src dst p n peer_descr actions suppress_for dropped remote_location.country_code remote_location.region remote_location.city remote_location.latitude remote_location.longitude
#types time string addr port addr port string string string enum enum string string addr addr port count string set[enum] interval bool string string string double double
1389654450.449603 CHhAvVGS1DHFjwGM9 192.168.56.1 52679 192.168.56.101 80 - - - tcp HTTP::Open_Proxy A local server is acting as an open proxy: 192.168.56.101 - 192.168.56.1 192.168.56.101 80 - - Notice::ACTION_LOG 86400.000000 F - - - - -
#close 2018-12-13-22-56-40
Note that this script only logs the presence of the proxy to
``notice.log``, but if an additional email is desired (and email
functionality is enabled), then that's done simply by redefining
:bro:see:`Notice::emailed_types` to add the ``Open_proxy`` notice type
to it.
----------------
Inspecting Files
----------------
Files are often transmitted on regular HTTP conversations between a
client and a server. Most of the time these files are harmless, just
images and some other multimedia content, but there are also types of
files, specially executable files, that can damage your system. We can
instruct Bro to create a copy of all files of certain types that it sees
using the :ref:`File Analysis Framework <file-analysis-framework>`
(introduced with Bro 2.2):
.. literalinclude:: file_extraction.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r bro.org.pcap file_extraction.bro
Extracting file HTTP-FiIpIB2hRQSDBOSJRg.html
Extracting file HTTP-FMG4bMmVV64eOsCb.txt
Extracting file HTTP-FnaT2a3UDd093opCB9.txt
Extracting file HTTP-FfQGqj4Fhh3pH7nVQj.txt
Extracting file HTTP-FsvATF146kf1Emc21j.txt
[...]
Here, the ``mime_to_ext`` table serves two purposes. It defines which
mime types to extract and also the file suffix of the extracted files.
Extracted files are written to a new ``extract_files`` subdirectory.
Also note that the first conditional in the :bro:see:`file_new` event
handler can be removed to make this behavior generic to other protocols
besides HTTP.

View file

@ -1,203 +0,0 @@
.. _bro-ids:
===
IDS
===
An Intrusion Detection System (IDS) allows you to detect suspicious
activities happening on your network as a result of a past or active
attack. Because of its programming capabilities, Bro can easily be
configured to behave like traditional IDSs and detect common attacks
with well known patterns, or you can create your own scripts to detect
conditions specific to your particular case.
In the following sections, we present a few examples of common uses of
Bro as an IDS.
-------------------------------------------------
Detecting an FTP Brute-force Attack and Notifying
-------------------------------------------------
For the purpose of this exercise, we define FTP brute-forcing as too many
rejected usernames and passwords occurring from a single address. We
start by defining a threshold for the number of attempts, a monitoring
interval (in minutes), and a new notice type.
.. sourcecode:: bro
:caption: detect-bruteforcing.bro
module FTP;
export {
redef enum Notice::Type += {
## Indicates a host bruteforcing FTP logins by watching for too
## many rejected usernames or failed passwords.
Bruteforcing
};
## How many rejected usernames or passwords are required before being
## considered to be bruteforcing.
const bruteforce_threshold: double = 20 &redef;
## The time period in which the threshold needs to be crossed before
## being reset.
const bruteforce_measurement_interval = 15mins &redef;
}
Using the ftp_reply event, we check for error codes from the `500
series <http://en.wikipedia.org/wiki/List_of_FTP_server_return_codes>`_
for the "USER" and "PASS" commands, representing rejected usernames or
passwords. For this, we can use the :bro:see:`FTP::parse_ftp_reply_code`
function to break down the reply code and check if the first digit is a
"5" or not. If true, we then use the :ref:`Summary Statistics Framework
<sumstats-framework>` to keep track of the number of failed attempts.
.. sourcecode:: bro
:caption: detect-bruteforcing.bro
event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool)
{
local cmd = c$ftp$cmdarg$cmd;
if ( cmd == "USER" || cmd == "PASS" )
{
if ( FTP::parse_ftp_reply_code(code)$x == 5 )
SumStats::observe("ftp.failed_auth", [$host=c$id$orig_h], [$str=cat(c$id$resp_h)]);
}
}
Next, we use the SumStats framework to raise a notice of the attack when
the number of failed attempts exceeds the specified threshold during the
measuring interval.
.. sourcecode:: bro
:caption: detect-bruteforcing.bro
event bro_init()
{
local r1: SumStats::Reducer = [$stream="ftp.failed_auth", $apply=set(SumStats::UNIQUE), $unique_max=double_to_count(bruteforce_threshold+2)];
SumStats::create([$name="ftp-detect-bruteforcing",
$epoch=bruteforce_measurement_interval,
$reducers=set(r1),
$threshold_val(key: SumStats::Key, result: SumStats::Result) =
{
return result["ftp.failed_auth"]$num+0.0;
},
$threshold=bruteforce_threshold,
$threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
{
local r = result["ftp.failed_auth"];
local dur = duration_to_mins_secs(r$end-r$begin);
local plural = r$unique>1 ? "s" : "";
local message = fmt("%s had %d failed logins on %d FTP server%s in %s", key$host, r$num, r$unique, plural, dur);
NOTICE([$note=FTP::Bruteforcing,
$src=key$host,
$msg=message,
$identifier=cat(key$host)]);
}]);
}
Below is the final code for our script.
.. sourcecode:: bro
:caption: detect-bruteforcing.bro
##! FTP brute-forcing detector, triggering when too many rejected usernames or
##! failed passwords have occurred from a single address.
@load base/protocols/ftp
@load base/frameworks/sumstats
@load base/utils/time
module FTP;
export {
redef enum Notice::Type += {
## Indicates a host bruteforcing FTP logins by watching for too
## many rejected usernames or failed passwords.
Bruteforcing
};
## How many rejected usernames or passwords are required before being
## considered to be bruteforcing.
const bruteforce_threshold: double = 20 &redef;
## The time period in which the threshold needs to be crossed before
## being reset.
const bruteforce_measurement_interval = 15mins &redef;
}
event bro_init()
{
local r1: SumStats::Reducer = [$stream="ftp.failed_auth", $apply=set(SumStats::UNIQUE), $unique_max=double_to_count(bruteforce_threshold+2)];
SumStats::create([$name="ftp-detect-bruteforcing",
$epoch=bruteforce_measurement_interval,
$reducers=set(r1),
$threshold_val(key: SumStats::Key, result: SumStats::Result) =
{
return result["ftp.failed_auth"]$num+0.0;
},
$threshold=bruteforce_threshold,
$threshold_crossed(key: SumStats::Key, result: SumStats::Result) =
{
local r = result["ftp.failed_auth"];
local dur = duration_to_mins_secs(r$end-r$begin);
local plural = r$unique>1 ? "s" : "";
local message = fmt("%s had %d failed logins on %d FTP server%s in %s", key$host, r$num, r$unique, plural, dur);
NOTICE([$note=FTP::Bruteforcing,
$src=key$host,
$msg=message,
$identifier=cat(key$host)]);
}]);
}
event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool)
{
local cmd = c$ftp$cmdarg$cmd;
if ( cmd == "USER" || cmd == "PASS" )
{
if ( FTP::parse_ftp_reply_code(code)$x == 5 )
SumStats::observe("ftp.failed_auth", [$host=c$id$orig_h], [$str=cat(c$id$resp_h)]);
}
}
.. sourcecode:: console
$ bro -r ftp/bruteforce.pcap protocols/ftp/detect-bruteforcing.bro
$ cat notice.log
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path notice
#open 2018-12-13-22-56-21
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p fuid file_mime_type file_desc proto note msg sub src dst p n peer_descr actions suppress_for dropped remote_location.country_code remote_location.region remote_location.city remote_location.latitude remote_location.longitude
#types time string addr port addr port string string string enum enum string string addr addr port count string set[enum] interval bool string string string double double
1389721084.522861 - - - - - - - - - FTP::Bruteforcing 192.168.56.1 had 20 failed logins on 1 FTP server in 0m37s - 192.168.56.1 - - - - Notice::ACTION_LOG 3600.000000 F - - - - -
#close 2018-12-13-22-56-21
As a final note, the :doc:`detect-bruteforcing.bro
</scripts/policy/protocols/ftp/detect-bruteforcing.bro>` script above is
included with Bro out of the box. Use this feature by loading this script
during startup.
-------------
Other Attacks
-------------
Detecting SQL Injection Attacks
-------------------------------
Checking files against known malware hashes
-------------------------------------------
Files transmitted on your network could either be completely harmless or
contain viruses and other threats. One possible action against this
threat is to compute the hashes of the files and compare them against a
list of known malware hashes. Bro simplifies this task by offering a
:doc:`detect-MHR.bro </scripts/policy/frameworks/files/detect-MHR.bro>`
script that creates and compares hashes against the `Malware Hash
Registry <https://www.team-cymru.org/Services/MHR/>`_ maintained by Team
Cymru. Use this feature by loading this script during startup.

View file

@ -1,13 +0,0 @@
======================
Examples and Use Cases
======================
.. toctree::
:maxdepth: 2
logs/index.rst
httpmonitor/index.rst
ids/index.rst
mimestats/index.rst
scripting/index.rst

View file

@ -1,308 +0,0 @@
.. _bro-logging:
=======
Logging
=======
Once Bro has been deployed in an environment and monitoring live
traffic, it will, in its default configuration, begin to produce
human-readable ASCII logs. Each log file, produced by Bro's
:ref:`framework-logging`, is populated with organized, mostly
connection-oriented data. As the standard log files are simple ASCII
data, working with the data contained in them can be done from a
command line terminal once you have been familiarized with the types
of data that can be found in each file. In the following, we work
through the logs general structure and then examine some standard ways
of working with them.
----------------------
Working with Log Files
----------------------
Generally, all of Bro's log files are produced by a corresponding
script that defines their individual structure. However, as each log
file flows through the Logging Framework, they share a set of
structural similarities. Without breaking into the scripting aspect of
Bro here, a bird's eye view of how the log files are produced
progresses as follows. The script's author defines the kinds of data,
such as the originating IP address or the duration of a connection,
which will make up the fields (i.e., columns) of the log file. The
author then decides what network activity should generate a single log
file entry (i.e., one line). For example, this could be a connection
having been completed or an HTTP ``GET`` request being issued by an
originator. When these behaviors are observed during operation, the
data is passed to the Logging Framework which adds the entry
to the appropriate log file.
As the fields of the log entries can be further customized by the
user, the Logging Framework makes use of a header block to ensure that
it remains self-describing. Here's the first few lines of a ``conn.log``.
.. sourcecode:: console
$ cat conn.log
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path conn
#open 2018-12-10-22-18-00
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig local_resp missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes tunnel_parents
#types time string addr port addr port enum string interval count count string bool bool count string count count count count set[string]
1300475167.096535 CHhAvVGS1DHFjwGM9 141.142.220.202 5353 224.0.0.251 5353 udp dns - - - S0 - - 0 D 1 73 0 0 -
1300475167.097012 ClEkJM2Vm5giqnMf4h fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 udp dns - - - S0 - - 0 D 1 199 0 0 -
1300475167.099816 C4J4Th3PJpwUYZZ6gc 141.142.220.50 5353 224.0.0.251 5353 udp dns - - - S0 - - 0 D 1 179 0 0 -
[...]
As you can see, the header consists of lines prefixed by ``#`` and
includes information such as what separators are being used for
various types of data, what an empty field looks like and what an
unset field looks like. In this example, the default TAB separator is
being used as the delimiter between fields (``\x09`` is the tab
character in hex). It also lists the comma as the separator for set
data, the string ``(empty)`` as the indicator for an empty field and
the ``-`` character as the indicator for a field that hasn't been set.
The timestamp for when the file was created is included under
``#open``. The header then goes on to detail the fields being listed
in the file and the data types of those fields, in ``#fields`` and
``#types``, respectively. These two entries are often the two most
significant points of interest as they detail not only the field names
but the data types used. When navigating through the different log
files with tools like ``sed``, ``awk``, or ``grep``, having the field
definitions readily available saves the user some mental leg work. The
field names are also a key resource for using the :ref:`bro-cut
<bro-cut>` utility included with Bro, see below.
Next to the header follows the main content. In this example we see 7
connections with their key properties, such as originator and
responder IP addresses (note how Bro transparently handles both IPv4 and
IPv6), transport-layer ports, application-layer services ( - the
``service`` field is filled in as Bro determines a specific protocol to
be in use, independent of the connection's ports), payload size, and
more. See :bro:type:`Conn::Info` for a description of all fields.
In addition to ``conn.log``, Bro generates many further logs by
default, including:
``dpd.log``
A summary of protocols encountered on non-standard ports.
``dns.log``
All DNS activity.
``ftp.log``
A log of FTP session-level activity.
``files.log``
Summaries of files transferred over the network. This information
is aggregated from different protocols, including HTTP, FTP, and
SMTP.
``http.log``
A summary of all HTTP requests with their replies.
``known_certs.log``
SSL certificates seen in use.
``smtp.log``
A summary of SMTP activity.
``ssl.log``
A record of SSL sessions, including certificates being used.
``weird.log``
A log of unexpected protocol-level activity. Whenever Bro's
protocol analysis encounters a situation it would not expect
(e.g., an RFC violation) it logs it in this file. Note that in
practice, real-world networks tend to exhibit a large number of
such "crud" that is usually not worth following up on.
As you can see, some log files are specific to a particular protocol,
while others aggregate information across different types of activity.
For a complete list of log files and a description of its purpose,
see :doc:`Log Files </script-reference/log-files>`.
.. _bro-cut:
Using ``bro-cut``
-----------------
The ``bro-cut`` utility can be used in place of other tools to build
terminal commands that remain flexible and accurate independent of
possible changes to the log file itself. It accomplishes this by parsing
the header in each file and allowing the user to refer to the specific
columnar data available (in contrast to tools like ``awk`` that
require the user to refer to fields referenced by their position).
For example, the following command extracts just the given columns
from a ``conn.log``:
.. sourcecode:: console
$ cat conn.log | bro-cut id.orig_h id.orig_p id.resp_h duration
141.142.220.202 5353 224.0.0.251 -
fe80::217:f2ff:fed7:cf65 5353 ff02::fb -
141.142.220.50 5353 224.0.0.251 -
141.142.220.118 43927 141.142.2.2 0.000435
141.142.220.118 37676 141.142.2.2 0.000420
141.142.220.118 40526 141.142.2.2 0.000392
141.142.220.118 32902 141.142.2.2 0.000317
141.142.220.118 59816 141.142.2.2 0.000343
141.142.220.118 59714 141.142.2.2 0.000375
141.142.220.118 58206 141.142.2.2 0.000339
[...]
The corresponding ``awk`` command will look like this:
.. sourcecode:: console
$ awk '/^[^#]/ {print $3, $4, $5, $6, $9}' conn.log
141.142.220.202 5353 224.0.0.251 5353 -
fe80::217:f2ff:fed7:cf65 5353 ff02::fb 5353 -
141.142.220.50 5353 224.0.0.251 5353 -
141.142.220.118 43927 141.142.2.2 53 0.000435
141.142.220.118 37676 141.142.2.2 53 0.000420
141.142.220.118 40526 141.142.2.2 53 0.000392
141.142.220.118 32902 141.142.2.2 53 0.000317
141.142.220.118 59816 141.142.2.2 53 0.000343
141.142.220.118 59714 141.142.2.2 53 0.000375
141.142.220.118 58206 141.142.2.2 53 0.000339
[...]
While the output is similar, the advantages to using bro-cut over
``awk`` lay in that, while ``awk`` is flexible and powerful, ``bro-cut``
was specifically designed to work with Bro's log files. Firstly, the
``bro-cut`` output includes only the log file entries, while the
``awk`` solution needs to skip the header manually. Secondly, since
``bro-cut`` uses the field descriptors to identify and extract data,
it allows for flexibility independent of the format and contents of
the log file. It's not uncommon for a Bro configuration to add extra
fields to various log files as required by the environment. In this
case, the fields in the ``awk`` command would have to be altered to
compensate for the new position whereas the ``bro-cut`` output would
not change.
.. note::
The sequence of field names given to ``bro-cut`` determines the
output order, which means you can also use ``bro-cut`` to reorder
fields. That can be helpful when piping into, e.g., ``sort``.
As you may have noticed, the command for ``bro-cut`` uses the output
redirection through the ``cat`` command and ``|`` operator. Whereas
tools like ``awk`` allow you to indicate the log file as a command
line option, bro-cut only takes input through redirection such as
``|`` and ``<``. There are a couple of ways to direct log file data
into ``bro-cut``, each dependent upon the type of log file you're
processing. A caveat of its use, however, is that all of the
header lines must be present.
.. note::
``bro-cut`` provides an option ``-c`` to include a corresponding
format header into the output, which allows to chain multiple
``bro-cut`` instances or perform further post-processing that
evaluates the header information.
In its default setup, Bro will rotate log files on an hourly basis,
moving the current log file into a directory with format
``YYYY-MM-DD`` and gzip compressing the file with a file format that
includes the log file type and time range of the file. In the case of
processing a compressed log file you simply adjust your command line
tools to use the complementary ``z*`` versions of commands such as ``cat``
(``zcat``) or ``grep`` (``zgrep``).
Working with Timestamps
-----------------------
``bro-cut`` accepts the flag ``-d`` to convert the epoch time values
in the log files to human-readable format. The following command
includes the human readable time stamp, the unique identifier, the
HTTP ``Host``, and HTTP ``URI`` as extracted from the ``http.log``
file:
.. sourcecode:: console
$ bro-cut -d ts uid host uri < http.log
2011-03-18T19:06:08+0000 CUM0KZ3MLUfNB0cl11 bits.wikimedia.org /skins-1.5/monobook/main.css
2011-03-18T19:06:08+0000 CwjjYJ2WqgTbAqiHl6 upload.wikimedia.org /wikipedia/commons/6/63/Wikipedia-logo.png
2011-03-18T19:06:08+0000 C3eiCBGOLw3VtHfOj upload.wikimedia.org /wikipedia/commons/thumb/b/bb/Wikipedia_wordmark.svg/174px-Wikipedia_wordmark.svg.png
2011-03-18T19:06:08+0000 Ck51lg1bScffFj34Ri upload.wikimedia.org /wikipedia/commons/b/bd/Bookshelf-40x201_6.png
2011-03-18T19:06:08+0000 CtxTCR2Yer0FR1tIBg upload.wikimedia.org /wikipedia/commons/thumb/8/8a/Wikinews-logo.png/35px-Wikinews-logo.png
[...]
Often times log files from multiple sources are stored in UTC time to
allow easy correlation. Converting the timestamp from a log file to
UTC can be accomplished with the ``-u`` option:
.. sourcecode:: console
$ bro-cut -u ts uid host uri < http.log
2011-03-18T19:06:08+0000 CUM0KZ3MLUfNB0cl11 bits.wikimedia.org /skins-1.5/monobook/main.css
2011-03-18T19:06:08+0000 CwjjYJ2WqgTbAqiHl6 upload.wikimedia.org /wikipedia/commons/6/63/Wikipedia-logo.png
2011-03-18T19:06:08+0000 C3eiCBGOLw3VtHfOj upload.wikimedia.org /wikipedia/commons/thumb/b/bb/Wikipedia_wordmark.svg/174px-Wikipedia_wordmark.svg.png
2011-03-18T19:06:08+0000 Ck51lg1bScffFj34Ri upload.wikimedia.org /wikipedia/commons/b/bd/Bookshelf-40x201_6.png
2011-03-18T19:06:08+0000 CtxTCR2Yer0FR1tIBg upload.wikimedia.org /wikipedia/commons/thumb/8/8a/Wikinews-logo.png/35px-Wikinews-logo.png
[...]
The default time format when using the ``-d`` or ``-u`` is the
``strftime`` format string ``%Y-%m-%dT%H:%M:%S%z`` which results in a
string with year, month, day of month, followed by hour, minutes,
seconds and the timezone offset. The default format can be altered by
using the ``-D`` and ``-U`` flags, using the standard ``strftime``
syntax. For example, to format the timestamp in the US-typical "Middle
Endian" you could use a format string of: ``%d-%m-%YT%H:%M:%S%z``
.. sourcecode:: console
$ bro-cut -D %d-%m-%YT%H:%M:%S%z ts uid host uri < http.log
18-03-2011T19:06:08+0000 CUM0KZ3MLUfNB0cl11 bits.wikimedia.org /skins-1.5/monobook/main.css
18-03-2011T19:06:08+0000 CwjjYJ2WqgTbAqiHl6 upload.wikimedia.org /wikipedia/commons/6/63/Wikipedia-logo.png
18-03-2011T19:06:08+0000 C3eiCBGOLw3VtHfOj upload.wikimedia.org /wikipedia/commons/thumb/b/bb/Wikipedia_wordmark.svg/174px-Wikipedia_wordmark.svg.png
18-03-2011T19:06:08+0000 Ck51lg1bScffFj34Ri upload.wikimedia.org /wikipedia/commons/b/bd/Bookshelf-40x201_6.png
18-03-2011T19:06:08+0000 CtxTCR2Yer0FR1tIBg upload.wikimedia.org /wikipedia/commons/thumb/8/8a/Wikinews-logo.png/35px-Wikinews-logo.png
[...]
See ``man strfime`` for more options for the format string.
Using UIDs
----------
While Bro can do signature-based analysis, its primary focus is on
behavioral detection which alters the practice of log review from
"reactionary review" to a process a little more akin to a hunting
trip. A common progression of review includes correlating a session
across multiple log files. As a connection is processed by Bro, a
unique identifier is assigned to each session. This unique identifier
is generally included in any log file entry associated with that
connection and can be used to cross-reference different log files.
A simple example would be to cross-reference a UID seen in a
``conn.log`` file. Here, we're looking for the connection with the
largest number of bytes from the responder by redirecting the output
for ``cat conn.log`` into bro-cut to extract the UID and the
resp_bytes, then sorting that output by the resp_bytes field.
.. sourcecode:: console
$ cat conn.log | bro-cut uid resp_bytes | sort -nrk2 | head -5
CwjjYJ2WqgTbAqiHl6 734
CtxTCR2Yer0FR1tIBg 734
Ck51lg1bScffFj34Ri 734
CLNN1k2QMum1aexUK7 734
CykQaM33ztNt0csB9a 733
Taking the UID of the first of the top responses, we can now
crossreference that with the UIDs in the ``http.log`` file.
.. sourcecode:: console
$ cat http.log | bro-cut uid id.resp_h method status_code host uri | grep UM0KZ3MLUfNB0cl11
CUM0KZ3MLUfNB0cl11 208.80.152.118 GET 304 bits.wikimedia.org /skins-1.5/monobook/main.css
As you can see there are two HTTP ``GET`` requests within the
session that Bro identified and logged. Given that HTTP is a stream
protocol, it can have multiple ``GET``/``POST``/etc requests in a
stream and Bro is able to extract and track that information for you,
giving you an in-depth and structured view into HTTP traffic on your
network.

View file

@ -1,108 +0,0 @@
.. _mime-stats:
====================
MIME Type Statistics
====================
Files are constantly transmitted over HTTP on regular networks. These
files belong to a specific category (e.g., executable, text, image)
identified by a `Multipurpose Internet Mail Extension (MIME)
<http://en.wikipedia.org/wiki/MIME>`_. Although MIME was originally
developed to identify the type of non-text attachments on email, it is
also used by a web browser to identify the type of files transmitted and
present them accordingly.
In this tutorial, we will demonstrate how to use the Sumstats Framework
to collect statistical information based on MIME types; specifically,
the total number of occurrences, size in bytes, and number of unique
hosts transmitting files over HTTP per each type. For instructions on
extracting and creating a local copy of these files, visit :ref:`this
tutorial <http-monitor>`.
------------------------------------------------
MIME Statistics with Sumstats
------------------------------------------------
When working with the :ref:`Summary Statistics Framework
<sumstats-framework>`, you need to define three different pieces: (i)
Observations, where the event is observed and fed into the framework.
(ii) Reducers, where observations are collected and measured. (iii)
Sumstats, where the main functionality is implemented.
We start by defining our observation along with a record to store
all statistical values and an observation interval. We are conducting our
observation on the :bro:see:`HTTP::log_http` event and are interested
in the MIME type, size of the file ("response_body_len"), and the
originator host ("orig_h"). We use the MIME type as our key and create
observers for the other two values.
.. literalinclude:: mimestats.bro
:caption:
:language: bro
:linenos:
:lines: 6-29
:lineno-start: 6
.. literalinclude:: mimestats.bro
:caption:
:language: bro
:linenos:
:lines: 54-64
:lineno-start: 54
Next, we create the reducers. The first will accumulate file sizes
and the second will make sure we only store a host ID once. Below is
the partial code from a :bro:see:`bro_init` handler.
.. literalinclude:: mimestats.bro
:caption:
:language: bro
:linenos:
:lines: 34-37
:lineno-start: 34
In our final step, we create the SumStats where we check for the
observation interval. Once it expires, we populate the record
(defined above) with all the relevant data and write it to a log.
.. literalinclude:: mimestats.bro
:caption:
:language: bro
:linenos:
:lines: 38-51
:lineno-start: 38
After putting the three pieces together we end up with the following
final code for our script.
.. literalinclude:: mimestats.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r http/bro.org.pcap mimestats.bro
#separator \x09
#set_separator ,
#empty_field (empty)
#unset_field -
#path mime_metrics
#open 2018-12-14-16-25-06
#fields ts ts_delta mtype uniq_hosts hits bytes
#types time interval string count count count
1389719059.311698 300.000000 image/png 1 9 82176
1389719059.311698 300.000000 image/gif 1 1 172
1389719059.311698 300.000000 image/x-icon 1 2 2300
1389719059.311698 300.000000 text/html 1 2 42231
1389719059.311698 300.000000 text/plain 1 15 128001
1389719059.311698 300.000000 image/jpeg 1 1 186859
1389719059.311698 300.000000 application/pgp-signature 1 1 836
#close 2018-12-14-16-25-06
.. note::
The redefinition of :bro:see:`Site::local_nets` is only done inside
this script to make it a self-contained example. It's typically
redefined somewhere else.

View file

@ -1,64 +0,0 @@
@load base/utils/site
@load base/frameworks/sumstats
redef Site::local_nets += { 10.0.0.0/8 };
module MimeMetrics;
export {
redef enum Log::ID += { LOG };
type Info: record {
## Timestamp when the log line was finished and written.
ts: time &log;
## Time interval that the log line covers.
ts_delta: interval &log;
## The mime type
mtype: string &log;
## The number of unique local hosts that fetched this mime type
uniq_hosts: count &log;
## The number of hits to the mime type
hits: count &log;
## The total number of bytes received by this mime type
bytes: count &log;
};
## The frequency of logging the stats collected by this script.
const break_interval = 5mins &redef;
}
event bro_init() &priority=3
{
Log::create_stream(MimeMetrics::LOG, [$columns=Info, $path="mime_metrics"]);
local r1: SumStats::Reducer = [$stream="mime.bytes",
$apply=set(SumStats::SUM)];
local r2: SumStats::Reducer = [$stream="mime.hits",
$apply=set(SumStats::UNIQUE)];
SumStats::create([$name="mime-metrics",
$epoch=break_interval,
$reducers=set(r1, r2),
$epoch_result(ts: time, key: SumStats::Key, result: SumStats::Result) =
{
local l: Info;
l$ts = network_time();
l$ts_delta = break_interval;
l$mtype = key$str;
l$bytes = double_to_count(floor(result["mime.bytes"]$sum));
l$hits = result["mime.hits"]$num;
l$uniq_hosts = result["mime.hits"]$unique;
Log::write(MimeMetrics::LOG, l);
}]);
}
event HTTP::log_http(rec: HTTP::Info)
{
if ( Site::is_local_addr(rec$id$orig_h) && rec?$resp_mime_types )
{
local mime_type = rec$resp_mime_types[0];
SumStats::observe("mime.bytes", [$str=mime_type],
[$num=rec$response_body_len]);
SumStats::observe("mime.hits", [$str=mime_type],
[$str=cat(rec$id$orig_h)]);
}
}

View file

@ -1,6 +0,0 @@
@load base/protocols/conn
event connection_state_remove(c: connection)
{
print c;
}

View file

@ -1,7 +0,0 @@
@load base/protocols/conn
@load base/protocols/http
event connection_state_remove(c: connection)
{
print c;
}

View file

@ -1,22 +0,0 @@
type Service: record {
name: string;
ports: set[port];
rfc: count;
};
function print_service(serv: Service)
{
print fmt("Service: %s(RFC%d)",serv$name, serv$rfc);
for ( p in serv$ports )
print fmt(" port: %s", p);
}
event bro_init()
{
local dns: Service = [$name="dns", $ports=set(53/udp, 53/tcp), $rfc=1035];
local http: Service = [$name="http", $ports=set(80/tcp, 8080/tcp), $rfc=2616];
print_service(dns);
print_service(http);
}

View file

@ -1,41 +0,0 @@
type Service: record {
name: string;
ports: set[port];
rfc: count;
};
type System: record {
name: string;
services: set[Service];
};
function print_service(serv: Service)
{
print fmt(" Service: %s(RFC%d)",serv$name, serv$rfc);
for ( p in serv$ports )
print fmt(" port: %s", p);
}
function print_system(sys: System)
{
print fmt("System: %s", sys$name);
for ( s in sys$services )
print_service(s);
}
event bro_init()
{
local server01: System;
server01$name = "morlock";
add server01$services[[ $name="dns", $ports=set(53/udp, 53/tcp), $rfc=1035]];
add server01$services[[ $name="http", $ports=set(80/tcp, 8080/tcp), $rfc=2616]];
print_system(server01);
# local dns: Service = [ $name="dns", $ports=set(53/udp, 53/tcp), $rfc=1035];
# local http: Service = [ $name="http", $ports=set(80/tcp, 8080/tcp), $rfc=2616];
# print_service(dns);
# print_service(http);
}

View file

@ -1,22 +0,0 @@
event bro_init()
{
local ssl_ports: set[port];
local non_ssl_ports = set( 23/tcp, 80/tcp, 143/tcp, 25/tcp );
# SSH
add ssl_ports[22/tcp];
# HTTPS
add ssl_ports[443/tcp];
# IMAPS
add ssl_ports[993/tcp];
# Check for SMTPS
if ( 587/tcp !in ssl_ports )
add ssl_ports[587/tcp];
for ( i in ssl_ports )
print fmt("SSL Port: %s", i);
for ( i in non_ssl_ports )
print fmt("Non-SSL Port: %s", i);
}

View file

@ -1,13 +0,0 @@
event bro_init()
{
local samurai_flicks: table[string, string, count, string] of string;
samurai_flicks["Kihachi Okamoto", "Toho", 1968, "Tatsuya Nakadai"] = "Kiru";
samurai_flicks["Hideo Gosha", "Fuji", 1969, "Tatsuya Nakadai"] = "Goyokin";
samurai_flicks["Masaki Kobayashi", "Shochiku Eiga", 1962, "Tatsuya Nakadai" ] = "Harakiri";
samurai_flicks["Yoji Yamada", "Eisei Gekijo", 2002, "Hiroyuki Sanada" ] = "Tasogare Seibei";
for ( [d, s, y, a] in samurai_flicks )
print fmt("%s was released in %d by %s studios, directed by %s and starring %s", samurai_flicks[d, s, y, a], y, s, d, a);
}

View file

@ -1,19 +0,0 @@
event bro_init()
{
# Declaration of the table.
local ssl_services: table[string] of port;
# Initialize the table.
ssl_services = table(["SSH"] = 22/tcp, ["HTTPS"] = 443/tcp);
# Insert one key-yield pair into the table.
ssl_services["IMAPS"] = 993/tcp;
# Check if the key "SMTPS" is not in the table.
if ( "SMTPS" !in ssl_services )
ssl_services["SMTPS"] = 587/tcp;
# Iterate over each key in the table.
for ( k in ssl_services )
print fmt("Service Name: %s - Common Port: %s", k, ssl_services[k]);
}

View file

@ -1,7 +0,0 @@
event bro_init()
{
local v: vector of count = vector(1, 2, 3, 4);
local w = vector(1, 2, 3, 4);
print v;
print w;
}

View file

@ -1,15 +0,0 @@
event bro_init()
{
local v1: vector of count;
local v2 = vector(1, 2, 3, 4);
v1 += 1;
v1 += 2;
v1 += 3;
v1 += 4;
print fmt("contents of v1: %s", v1);
print fmt("length of v1: %d", |v1|);
print fmt("contents of v2: %s", v2);
print fmt("length of v2: %d", |v2|);
}

View file

@ -1,7 +0,0 @@
event bro_init()
{
local addr_vector: vector of addr = vector(1.2.3.4, 2.3.4.5, 3.4.5.6);
for (i in addr_vector)
print mask_addr(addr_vector[i], 18);
}

View file

@ -1,9 +0,0 @@
const port_list: table[port] of string &redef;
redef port_list += { [6666/tcp] = "IRC"};
redef port_list += { [80/tcp] = "WWW" };
event bro_init()
{
print port_list;
}

View file

@ -1,4 +0,0 @@
@load base/protocols/http
redef HTTP::default_capture_password = T;

View file

@ -1,9 +0,0 @@
event bro_init()
{
local a: int;
a = 10;
local b = 10;
if ( a == b )
print fmt("A: %d, B: %d", a, b);
}

View file

@ -1,18 +0,0 @@
# Store the time the previous connection was established.
global last_connection_time: time;
# boolean value to indicate whether we have seen a previous connection.
global connection_seen: bool = F;
event connection_established(c: connection)
{
local net_time: time = network_time();
print fmt("%s: New connection established from %s to %s", strftime("%Y/%M/%d %H:%m:%S", net_time), c$id$orig_h, c$id$resp_h);
if ( connection_seen )
print fmt(" Time since last connection: %s", net_time - last_connection_time);
last_connection_time = net_time;
connection_seen = T;
}

View file

@ -1,11 +0,0 @@
function add_two(i: count): count
{
local added_two = i+2;
print fmt("i + 2 = %d", added_two);
return added_two;
}
event bro_init()
{
local test = add_two(10);
}

View file

@ -1,13 +0,0 @@
event bro_init()
{
local test_string = "The quick brown fox jumps over the lazy dog.";
local test_pattern = /quick|lazy/;
if ( test_pattern in test_string )
{
local results = split(test_string, test_pattern);
print results[1];
print results[2];
print results[3];
}
}

View file

@ -1,10 +0,0 @@
event bro_init()
{
local test_string = "equality";
local test_pattern = /equal/;
print fmt("%s and %s %s equal", test_string, test_pattern, test_pattern == test_string ? "are" : "are not");
test_pattern = /equality/;
print fmt("%s and %s %s equal", test_string, test_pattern, test_pattern == test_string ? "are" : "are not");
}

View file

@ -1,25 +0,0 @@
module Conn;
export {
## The record type which contains column fields of the connection log.
type Info: record {
ts: time &log;
uid: string &log;
id: conn_id &log;
proto: transport_proto &log;
service: string &log &optional;
duration: interval &log &optional;
orig_bytes: count &log &optional;
resp_bytes: count &log &optional;
conn_state: string &log &optional;
local_orig: bool &log &optional;
local_resp: bool &log &optional;
missed_bytes: count &log &default=0;
history: string &log &optional;
orig_pkts: count &log &optional;
orig_ip_bytes: count &log &optional;
resp_pkts: count &log &optional;
resp_ip_bytes: count &log &optional;
tunnel_parents: set[string] &log;
};
}

View file

@ -1,15 +0,0 @@
event bro_init()
{
local subnets = vector(172.16.0.0/20, 172.16.16.0/20, 172.16.32.0/20, 172.16.48.0/20);
local addresses = vector(172.16.4.56, 172.16.47.254, 172.16.22.45, 172.16.1.1);
for ( a in addresses )
{
for ( s in subnets )
{
if ( addresses[a] in subnets[s] )
print fmt("%s belongs to subnet %s", addresses[a], subnets[s]);
}
}
}

View file

@ -1,4 +0,0 @@
event connection_established(c: connection)
{
print fmt("%s: New connection established from %s to %s\n", strftime("%Y/%M/%d %H:%m:%S", network_time()), c$id$orig_h, c$id$resp_h);
}

View file

@ -1,19 +0,0 @@
module Factor;
function factorial(n: count): count
{
if ( n == 0 )
return 1;
else
return ( n * factorial(n - 1) );
}
event bro_init()
{
local numbers: vector of count = vector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
for ( n in numbers )
print fmt("%d", factorial(numbers[n]));
}

View file

@ -1,35 +0,0 @@
module Factor;
export {
# Append the value LOG to the Log::ID enumerable.
redef enum Log::ID += { LOG };
# Define a new type called Factor::Info.
type Info: record {
num: count &log;
factorial_num: count &log;
};
}
function factorial(n: count): count
{
if ( n == 0 )
return 1;
else
return ( n * factorial(n - 1) );
}
event bro_init()
{
# Create the logging stream.
Log::create_stream(LOG, [$columns=Info, $path="factor"]);
}
event bro_done()
{
local numbers: vector of count = vector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
for ( n in numbers )
Log::write( Factor::LOG, [$num=numbers[n],
$factorial_num=factorial(numbers[n])]);
}

View file

@ -1,45 +0,0 @@
module Factor;
export {
redef enum Log::ID += { LOG };
type Info: record {
num: count &log;
factorial_num: count &log;
};
}
function factorial(n: count): count
{
if ( n == 0 )
return 1;
else
return (n * factorial(n - 1));
}
event bro_done()
{
local numbers: vector of count = vector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
for ( n in numbers )
Log::write( Factor::LOG, [$num=numbers[n],
$factorial_num=factorial(numbers[n])]);
}
function mod5(id: Log::ID, path: string, rec: Factor::Info) : string
{
if ( rec$factorial_num % 5 == 0 )
return "factor-mod5";
else
return "factor-non5";
}
event bro_init()
{
Log::create_stream(LOG, [$columns=Info, $path="factor"]);
local filter: Log::Filter = [$name="split-mod5s", $path_func=mod5];
Log::add_filter(Factor::LOG, filter);
Log::remove_filter(Factor::LOG, "default");
}

View file

@ -1,50 +0,0 @@
module Factor;
export {
redef enum Log::ID += { LOG };
type Info: record {
num: count &log;
factorial_num: count &log;
};
global log_factor: event(rec: Info);
}
function factorial(n: count): count
{
if ( n == 0 )
return 1;
else
return (n * factorial(n - 1));
}
event bro_init()
{
Log::create_stream(LOG, [$columns=Info, $ev=log_factor, $path="factor"]);
}
event bro_done()
{
local numbers: vector of count = vector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
for ( n in numbers )
Log::write( Factor::LOG, [$num=numbers[n],
$factorial_num=factorial(numbers[n])]);
}
function mod5(id: Log::ID, path: string, rec: Factor::Info) : string
{
if ( rec$factorial_num % 5 == 0 )
return "factor-mod5";
else
return "factor-non5";
}
event bro_init()
{
local filter: Log::Filter = [$name="split-mod5s", $path_func=mod5];
Log::add_filter(Factor::LOG, filter);
Log::remove_filter(Factor::LOG, "default");
}

View file

@ -1,7 +0,0 @@
@load policy/protocols/ssh/interesting-hostnames.bro
hook Notice::policy(n: Notice::Info)
{
if ( n$note == SSH::Interesting_Hostname_Login )
add n$actions[Notice::ACTION_EMAIL];
}

View file

@ -1,7 +0,0 @@
@load policy/protocols/ssl/expiring-certs.bro
hook Notice::policy(n: Notice::Info)
{
if ( n$note == SSL::Certificate_Expires_Soon )
n$suppress_for = 12hrs;
}

View file

@ -1,7 +0,0 @@
@load policy/protocols/ssh/interesting-hostnames.bro
@load base/protocols/ssh/
redef Notice::emailed_types += {
SSH::Interesting_Hostname_Login
};

View file

@ -1,6 +0,0 @@
@load policy/protocols/ssh/interesting-hostnames.bro
@load base/protocols/ssh/
redef Notice::type_suppression_intervals += {
[SSH::Interesting_Hostname_Login] = 1day,
};

View file

@ -1,7 +0,0 @@
module HTTP;
export {
## This setting changes if passwords used in Basic-Auth are captured or
## not.
const default_capture_password = F &redef;
}

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
# @TEST-EXEC: bro -r ${TRACES}/wikipedia.trace
# @TEST-EXEC: btest-diff conn.log
# @TEST-EXEC: btest-diff http.log

View file

@ -1,4 +0,0 @@
# @TEST-EXEC: bro -r ${TRACES}/workshop_2011_browse.trace
# @TEST-EXEC: btest-diff conn.log
# @TEST-EXEC: btest-diff http.log

View file

@ -1,298 +0,0 @@
"""
The Bro domain for Sphinx.
"""
def setup(Sphinx):
Sphinx.add_domain(BroDomain)
Sphinx.add_node(see)
Sphinx.add_directive_to_domain('bro', 'see', SeeDirective)
Sphinx.connect('doctree-resolved', process_see_nodes)
from sphinx import addnodes
from sphinx.domains import Domain, ObjType, Index
from sphinx.locale import l_, _
from sphinx.directives import ObjectDescription
from sphinx.roles import XRefRole
from sphinx.util.nodes import make_refnode
from sphinx import version_info
from docutils import nodes
from docutils.parsers.rst import Directive
from docutils.parsers.rst import directives
from docutils.parsers.rst.roles import set_classes
class see(nodes.General, nodes.Element):
refs = []
class SeeDirective(Directive):
has_content = True
def run(self):
n = see('')
n.refs = " ".join(self.content).split()
return [n]
# Wrapper for creating a tuple for index nodes, staying backwards
# compatible to Sphinx < 1.4:
def make_index_tuple(indextype, indexentry, targetname, targetname2):
if version_info >= (1, 4, 0, '', 0):
return (indextype, indexentry, targetname, targetname2, None)
else:
return (indextype, indexentry, targetname, targetname2)
def process_see_nodes(app, doctree, fromdocname):
for node in doctree.traverse(see):
content = []
para = nodes.paragraph()
para += nodes.Text("See also:", "See also:")
for name in node.refs:
join_str = " "
if name != node.refs[0]:
join_str = ", "
link_txt = join_str + name;
if name not in app.env.domaindata['bro']['idtypes']:
# Just create the text and issue warning
app.env.warn(fromdocname,
'unknown target for ".. bro:see:: %s"' % (name))
para += nodes.Text(link_txt, link_txt)
else:
# Create a reference
typ = app.env.domaindata['bro']['idtypes'][name]
todocname = app.env.domaindata['bro']['objects'][(typ, name)]
newnode = nodes.reference('', '')
innernode = nodes.literal(_(name), _(name))
newnode['refdocname'] = todocname
newnode['refuri'] = app.builder.get_relative_uri(
fromdocname, todocname)
newnode['refuri'] += '#' + typ + '-' + name
newnode.append(innernode)
para += nodes.Text(join_str, join_str)
para += newnode
content.append(para)
node.replace_self(content)
class BroGeneric(ObjectDescription):
def update_type_map(self, idname):
if 'idtypes' not in self.env.domaindata['bro']:
self.env.domaindata['bro']['idtypes'] = {}
self.env.domaindata['bro']['idtypes'][idname] = self.objtype
def add_target_and_index(self, name, sig, signode):
targetname = self.objtype + '-' + name
if targetname not in self.state.document.ids:
signode['names'].append(targetname)
signode['ids'].append(targetname)
signode['first'] = (not self.names)
self.state.document.note_explicit_target(signode)
objects = self.env.domaindata['bro']['objects']
key = (self.objtype, name)
if ( key in objects and self.objtype != "id" and
self.objtype != "type" ):
self.env.warn(self.env.docname,
'duplicate description of %s %s, ' %
(self.objtype, name) +
'other instance in ' +
self.env.doc2path(objects[key]),
self.lineno)
objects[key] = self.env.docname
self.update_type_map(name)
indextext = self.get_index_text(self.objtype, name)
if indextext:
self.indexnode['entries'].append(make_index_tuple('single',
indextext, targetname,
targetname))
def get_index_text(self, objectname, name):
return _('%s (%s)') % (name, self.objtype)
def handle_signature(self, sig, signode):
signode += addnodes.desc_name("", sig)
return sig
class BroNamespace(BroGeneric):
def add_target_and_index(self, name, sig, signode):
targetname = self.objtype + '-' + name
if targetname not in self.state.document.ids:
signode['names'].append(targetname)
signode['ids'].append(targetname)
signode['first'] = (not self.names)
self.state.document.note_explicit_target(signode)
objects = self.env.domaindata['bro']['objects']
key = (self.objtype, name)
objects[key] = self.env.docname
self.update_type_map(name)
indextext = self.get_index_text(self.objtype, name)
self.indexnode['entries'].append(make_index_tuple('single', indextext,
targetname, targetname))
self.indexnode['entries'].append(make_index_tuple('single',
"namespaces; %s" % (sig),
targetname, targetname))
def get_index_text(self, objectname, name):
return _('%s (namespace); %s') % (name, self.env.docname)
def handle_signature(self, sig, signode):
signode += addnodes.desc_name("", sig)
return sig
class BroEnum(BroGeneric):
def add_target_and_index(self, name, sig, signode):
targetname = self.objtype + '-' + name
if targetname not in self.state.document.ids:
signode['names'].append(targetname)
signode['ids'].append(targetname)
signode['first'] = (not self.names)
self.state.document.note_explicit_target(signode)
objects = self.env.domaindata['bro']['objects']
key = (self.objtype, name)
objects[key] = self.env.docname
self.update_type_map(name)
indextext = self.get_index_text(self.objtype, name)
#self.indexnode['entries'].append(make_index_tuple('single', indextext,
# targetname, targetname))
m = sig.split()
if len(m) < 2:
self.env.warn(self.env.docname,
"bro:enum directive missing argument(s)")
return
if m[1] == "Notice::Type":
if 'notices' not in self.env.domaindata['bro']:
self.env.domaindata['bro']['notices'] = []
self.env.domaindata['bro']['notices'].append(
(m[0], self.env.docname, targetname))
self.indexnode['entries'].append(make_index_tuple('single',
"%s (enum values); %s" % (m[1], m[0]),
targetname, targetname))
def handle_signature(self, sig, signode):
m = sig.split()
name = m[0]
signode += addnodes.desc_name("", name)
return name
class BroIdentifier(BroGeneric):
def get_index_text(self, objectname, name):
return name
class BroKeyword(BroGeneric):
def get_index_text(self, objectname, name):
return name
class BroAttribute(BroGeneric):
def get_index_text(self, objectname, name):
return _('%s (attribute)') % (name)
class BroNotices(Index):
"""
Index subclass to provide the Bro notices index.
"""
name = 'noticeindex'
localname = l_('Bro Notice Index')
shortname = l_('notices')
def generate(self, docnames=None):
content = {}
if 'notices' not in self.domain.env.domaindata['bro']:
return content, False
for n in self.domain.env.domaindata['bro']['notices']:
modname = n[0].split("::")[0]
entries = content.setdefault(modname, [])
entries.append([n[0], 0, n[1], n[2], '', '', ''])
content = sorted(content.items())
return content, False
class BroDomain(Domain):
"""Bro domain."""
name = 'bro'
label = 'Bro'
object_types = {
'type': ObjType(l_('type'), 'type'),
'namespace': ObjType(l_('namespace'), 'namespace'),
'id': ObjType(l_('id'), 'id'),
'keyword': ObjType(l_('keyword'), 'keyword'),
'enum': ObjType(l_('enum'), 'enum'),
'attr': ObjType(l_('attr'), 'attr'),
}
directives = {
'type': BroGeneric,
'namespace': BroNamespace,
'id': BroIdentifier,
'keyword': BroKeyword,
'enum': BroEnum,
'attr': BroAttribute,
}
roles = {
'type': XRefRole(),
'namespace': XRefRole(),
'id': XRefRole(),
'keyword': XRefRole(),
'enum': XRefRole(),
'attr': XRefRole(),
'see': XRefRole(),
}
indices = [
BroNotices,
]
initial_data = {
'objects': {}, # fullname -> docname, objtype
}
def clear_doc(self, docname):
to_delete = []
for (typ, name), doc in self.data['objects'].items():
if doc == docname:
to_delete.append((typ, name))
for (typ, name) in to_delete:
del self.data['objects'][typ, name]
def resolve_xref(self, env, fromdocname, builder, typ, target, node,
contnode):
objects = self.data['objects']
if typ == "see":
if target not in self.data['idtypes']:
self.env.warn(fromdocname,
'unknown target for ":bro:see:`%s`"' % (target))
return []
objtype = self.data['idtypes'][target]
return make_refnode(builder, fromdocname,
objects[objtype, target],
objtype + '-' + target,
contnode, target + ' ' + objtype)
else:
objtypes = self.objtypes_for_role(typ)
for objtype in objtypes:
if (objtype, target) in objects:
return make_refnode(builder, fromdocname,
objects[objtype, target],
objtype + '-' + target,
contnode, target + ' ' + objtype)
else:
self.env.warn(fromdocname,
'unknown target for ":bro:%s:`%s`"' % (typ, target))
def get_objects(self):
for (typ, name), docname in self.data['objects'].items():
yield name, name, typ, docname, typ + '-' + name, 1

View file

@ -1,618 +0,0 @@
.. _CAF: https://github.com/actor-framework/actor-framework
.. _brokercomm-framework:
==============================================
Broker-Enabled Communication/Cluster Framework
==============================================
.. rst-class:: opening
Bro now uses the `Broker Library
<../components/broker/README.html>`_ to exchange information with
other Bro processes. Broker itself uses CAF_ (C++ Actor Framework)
internally for connecting nodes and exchanging arbitrary data over
networks. Broker then introduces, on top of CAF, a topic-based
publish/subscribe communication pattern using a data model that is
compatible to Bro's. Broker itself can be utilized outside the
context of Bro, with Bro itself making use of only a few predefined
Broker message formats that represent Bro events, log entries, etc.
In summary, the Bro's Broker framework provides basic facilities for
connecting broker-enabled peers (e.g. Bro instances) to each other
and exchanging messages (e.g. events and logs). With this comes
changes in how clusters operate and, since Broker significantly
differs from the previous communication framework, there are several
changes in the set of scripts that Bro ships with that may break
your own customizations. This document aims to describe the changes
that have been made, making it easier to port your own scripts. It
also gives examples of Broker and the new cluster framework that
show off all the new features and capabilities.
Porting Guide
=============
Review and use the points below as a guide to port your own scripts
to the latest version of Bro, which uses the new cluster and Broker
communication framework.
General Porting Tips
--------------------
- ``@load policy/frameworks/communication/listen`` and
``@load base/frameworks/communication`` indicates use of the
old communication framework, consider porting to
``@load base/frameworks/broker`` and using the Broker API:
:doc:`/scripts/base/frameworks/broker/main.bro`
- The ``&synchronized`` and ``&persistent`` attributes are deprecated,
consider using `Data Stores`_ instead.
- Usages of the old communications system features are all deprecated,
however, they also do not work in the default Bro configuration unless
you manually take action to set up the old communication system.
To aid in porting, such usages will default to raising a fatal error
unless you explicitly acknowledge that such usages of the old system
are ok. Set the :bro:see:`old_comm_usage_is_ok` flag in this case.
- Instead of using e.g. ``Cluster::manager2worker_events`` (and all
permutations for every node type), what you'd now use is either
:bro:see:`Broker::publish` or :bro:see:`Broker::auto_publish` with
either the topic associated with a specific node or class of nodes,
like :bro:see:`Cluster::node_topic` or
:bro:see:`Cluster::worker_topic`.
- Instead of using the ``send_id`` BIF, use :bro:see:`Broker::publish_id`.
- Use :bro:see:`terminate` instead of :bro:see:`terminate_communication`.
The latter refers to the old communication system and no longer affects
the new Broker-based system.
- For replacing :bro:see:`remote_connection_established` and
:bro:see:`remote_connection_closed`, consider :bro:see:`Broker::peer_added`
or :bro:see:`Broker::peer_lost`. There's also :bro:see:`Cluster::node_up`
and :bro:see:`Cluster::node_down`.
Notable / Specific Script API Changes
-------------------------------------
- :bro:see:`Software::tracked` is now partitioned among proxy nodes
instead of synchronized in its entirety to all nodes.
- ``Known::known_hosts`` is renamed to :bro:see:`Known::host_store` and
implemented via the new Broker data store interface.
- ``Known::known_services`` is renamed to :bro:see:`Known::service_store`
and implemented via the new Broker data store interface.
- ``Known::certs`` is renamed to :bro:see:`Known::cert_store`
and implemented via the new Broker data store interface.
New Cluster Layout / API
========================
Layout / Topology
-----------------
The cluster topology has changed.
- Proxy nodes no longer connect with each other.
- Each worker node connects to all proxies.
- All node types connect to all logger nodes and the manager node.
This looks like:
.. figure:: broker/cluster-layout.png
Some general suggestions as to the purpose/utilization of each node type:
- Workers: are a good first choice for doing the brunt of any work you need
done. They should be spending a lot of time performing the actual job
of parsing/analyzing incoming data from packets, so you might choose
to look at them as doing a "first pass" analysis and then deciding how
the results should be shared with other nodes in the cluster.
- Proxies: serve as intermediaries for data storage and work/calculation
offloading. Good for helping offload work or data in a scalable and
distributed way. Since any given worker is connected to all
proxies and can agree on an "arbitrary key -> proxy node" mapping
(more on that later), you can partition work or data amongst them in a
uniform manner. e.g. you might choose to use proxies as a method of
sharing non-persistent state or as a "second pass" analysis for any
work that you don't want interfering with the workers' capacity to
keep up with capturing and parsing packets. Note that the default scripts
that come with Bro don't utilize proxies themselves, so if you are coming
from a previous BroControl deployment, you may want to try reducing down
to a single proxy node. If you come to have custom/community scripts
that utilize proxies, that would be the time to start considering scaling
up the number of proxies to meet demands.
- Manager: this node will be good at performing decisions that require a
global view of things since it is in a centralized location, connected
to everything. However, that also makes it easy to overload, so try
to use it sparingly and only for tasks that must be done in a
centralized or authoritative location. Optionally, for some
deployments, the Manager can also serve as the sole Logger.
- Loggers: these nodes should simply be spending their time writing out
logs to disk and not used for much else. In the default cluster
configuration, logs get distributed among available loggers in a
round-robin fashion, providing failover capability should any given
logger temporarily go offline.
Data Management/Sharing Strategies
==================================
There's maybe no single, best approach or pattern to use when you need a
Bro script to store or share long-term state and data. The two
approaches that were previously used were either using the ``&synchronized``
attribute on tables/sets or by explicitly sending events to specific
nodes on which you wanted data to be stored. The former is no longer
possible, though there are several new possibilities that the new
Broker/Cluster framework offer, namely distributed data store and data
partitioning APIs.
Data Stores
-----------
Broker provides a distributed key-value store interface with optional
choice of using a persistent backend. For more detail, see
:ref:`this example <data_store_example>`.
Some ideas/considerations/scenarios when deciding whether to use
a data store for your use-case:
* If you need the full data set locally in order to achieve low-latency
queries using data store "clones" can provide that.
* If you need data that persists across restarts of Bro processes, then
data stores can also provide that.
* If the data you want to store is complex (tables, sets, records) or
you expect to read, modify, and store back, then data stores may not
be able to provide simple, race-free methods of performing the pattern
of logic that you want.
* If the data set you want to store is excessively large, that's still
problematic even for stores that use a persistent backend as they are
implemented in a way that requires a full snapshot of the store's
contents to fit in memory (this limitation may change in the future).
Data Partitioning
-----------------
New data partitioning strategies are available using the API in
:doc:`/scripts/base/frameworks/cluster/pools.bro`. Using that API, developers
of custom Bro scripts can define a custom pool of nodes that best fits the
needs of their script.
One example strategy is to use Highest Random Weight (HRW) hashing to
partition data tables amongst the pool of all proxy nodes. e.g. using
:bro:see:`Cluster::publish_hrw`. This could allow clusters to
be scaled more easily than the approach of "the entire data set gets
synchronized to all nodes" as the solution to memory limitations becomes
"just add another proxy node". It may also take away some of the
messaging load that used to be required to synchronize data sets across
all nodes.
The tradeoff of this approach, is that nodes that leave the pool (due to
crashing, etc.) cause a temporary gap in the total data set until
workers start hashing keys to a new proxy node that is still alive,
causing data to now be located and updated there.
If the developer of a script expects its workload to be particularly
intensive, wants to ensure that their operations get exclusive
access to nodes, or otherwise set constraints on the number of nodes within
a pool utilized by their script, then the :bro:see:`Cluster::PoolSpec`
structure will allow them to do that while still allowing users of that script
to override the default suggestions made by the original developer.
Broker Framework Examples
=========================
The broker framework provides basic facilities for connecting Bro instances
to each other and exchanging messages, like events or logs.
See :doc:`/scripts/base/frameworks/broker/main.bro` for an overview
of the main Broker API.
.. _broker_topic_naming:
Topic Naming Conventions
------------------------
All Broker-based messaging involves two components: the information you
want to send (e.g. an event w/ its arguments) along with an associated
topic name string. The topic strings are used as a filtering mechanism:
Broker uses a publish/subscribe communication pattern where peers
advertise interest in topic **prefixes** and only receive messages which
match one of their prefix subscriptions.
Broker itself supports arbitrary topic strings, however Bro generally
follows certain conventions in choosing these topics to help avoid
conflicts and generally make them easier to remember.
As a reminder of how topic subscriptions work, subscribers advertise
interest in a topic **prefix** and then receive any messages published by a
peer to a topic name that starts with that prefix. E.g. Alice
subscribes to the "alice/dogs" prefix, then would receive the following
message topics published by Bob:
- topic "alice/dogs/corgi"
- topic "alice/dogs"
- topic "alice/dogsarecool/oratleastilikethem"
Alice would **not** receive the following message topics published by Bob:
- topic "alice/cats/siamese"
- topic "alice/cats"
- topic "alice/dog"
- topic "alice"
Note that the topics aren't required to form a slash-delimited hierarchy,
the subscription matching is purely a byte-per-byte prefix comparison.
However, Bro scripts generally will follow a topic naming hierarchy and
any given script will make the topic names it uses apparent via some
redef'able constant in its export section. Generally topics that Bro
scripts use will be along the lines of "bro/<namespace>/<specifics>"
with "<namespace>" being the script's module name (in all-undercase).
For example, you might expect an imaginary "Pretend" framework to
publish/subscribe using topic names like "bro/pretend/my_cool_event".
For scripts that use Broker as a means of cluster-aware analysis,
it's usually sufficient for them to make use of the topics declared
by the cluster framework. For scripts that are meant to establish
communication flows unrelated to Bro cluster, new topics are declared
(examples being the NetControl and Control frameworks).
For cluster operation, see :doc:`/scripts/base/frameworks/cluster/main.bro`
for a list of topics that are useful for steering published events to
the various node classes. E.g. you have the ability to broadcast
to all nodes of a given class (e.g. just workers) or just send to a
specific node within a class.
The topic names that logs get published under are a bit nuanced. In the
default cluster configuration, they are round-robin published to
explicit topic names that identify a single logger. In standalone Bro
processes, logs get published to the topic indicated by
:bro:see:`Broker::default_log_topic_prefix`.
For those writing their own scripts which need new topic names, a
suggestion would be to avoid prefixing any new topics/prefixes with
"bro/" as any changes in scripts shipping with Bro will use that prefix
and it's better to not risk unintended conflicts. Again, it's
often less confusing to just re-use existing topic names instead
of introducing new topic names. The typical use case is writing
a cluster-enabled script, which usually just needs to route events
based upon node classes, and that already has usable topics in the
cluster framework.
Connecting to Peers
-------------------
Bro can accept incoming connections by calling :bro:see:`Broker::listen`.
.. literalinclude:: broker/connecting-listener.bro
:caption: connecting-listener.bro
:language: bro
:linenos:
Bro can initiate outgoing connections by calling :bro:see:`Broker::peer`.
.. literalinclude:: broker/connecting-connector.bro
:caption: connecting-connector.bro
:language: bro
:linenos:
In either case, connection status updates are monitored via the
:bro:see:`Broker::peer_added` and :bro:see:`Broker::peer_lost` events.
Remote Events
-------------
To receive remote events, you need to first subscribe to a "topic" to which
the events are being sent. A topic is just a string chosen by the sender,
and named in a way that helps organize events into various categories.
See the :ref:`topic naming conventions section <broker_topic_naming>` for
more on how topics work and are chosen.
Use the :bro:see:`Broker::subscribe` function to subscribe to topics and
define any event handlers for events that peers will send.
.. literalinclude:: broker/events-listener.bro
:caption: events-listener.bro
:language: bro
:linenos:
There are two different ways to send events.
The first is to call the :bro:see:`Broker::publish` function which you can
supply directly with the event and its arguments or give it the return value of
:bro:see:`Broker::make_event` in case you need to send the same event/args
multiple times. When publishing events like this, local event handlers for
the event are not called.
The second option is to call the :bro:see:`Broker::auto_publish` function where
you specify a particular event that will be automatically sent to peers
whenever the event is called locally via the normal event invocation syntax.
When auto-publishing events, local event handlers for the event are called
in addition to sending the event to any subscribed peers.
.. literalinclude:: broker/events-connector.bro
:caption: events-connector.bro
:language: bro
:linenos:
Note that the subscription model is prefix-based, meaning that if you subscribe
to the "bro/events" topic prefix you would receive events that are published
to topic names "bro/events/foo" and "bro/events/bar" but not "bro/misc".
Remote Logging
--------------
.. literalinclude:: broker/testlog.bro
:caption: testlog.bro
:language: bro
:linenos:
To toggle remote logs, redef :bro:see:`Log::enable_remote_logging`.
Use the :bro:see:`Broker::subscribe` function to advertise interest
in logs written by peers. The topic names that Bro uses are determined by
:bro:see:`Broker::log_topic`.
.. literalinclude:: broker/logs-listener.bro
:caption: logs-listener.bro
:language: bro
:linenos:
.. literalinclude:: broker/logs-connector.bro
:caption: logs-connector.bro
:language: bro
:linenos:
Note that logging events are only raised locally on the node that performs
the :bro:see:`Log::write` and not automatically published to peers.
.. _data_store_example:
Distributed Data Stores
-----------------------
See :doc:`/scripts/base/frameworks/broker/store.bro` for an overview
of the Broker data store API.
There are two flavors of key-value data store interfaces: master and clone.
A master data store can be cloned from remote peers which may then
perform lightweight, local queries against the clone, which
automatically stays synchronized with the master store. Clones cannot
modify their content directly, instead they send modifications to the
centralized master store which applies them and then broadcasts them to
all clones.
Master stores get to choose what type of storage backend to
use. E.g. In-memory versus SQLite for persistence.
Data stores also support expiration on a per-key basis using an amount of
time relative to the entry's last modification time.
.. literalinclude:: broker/stores-listener.bro
:caption: stores-listener.bro
:language: bro
:linenos:
.. literalinclude:: broker/stores-connector.bro
:caption: stores-connector.bro
:language: bro
:linenos:
Note that all data store queries must be made within Bro's asynchronous
``when`` statements and must specify a timeout block.
Cluster Framework Examples
==========================
This section contains a few brief examples of how various communication
patterns one might use when developing Bro scripts that are to operate in
the context of a cluster.
A Reminder About Events and Module Namespaces
---------------------------------------------
For simplicity, the following examples do not use any modules/namespaces.
If you choose to use them within your own code, it's important to
remember that the ``event`` and ``schedule`` dispatching statements
should always use the fully-qualified event name.
For example, this will likely not work as expected:
.. sourcecode:: bro
module MyModule;
export {
global my_event: event();
}
event my_event()
{
print "got my event";
}
event bro_init()
{
event my_event();
schedule 10sec { my_event() };
}
This code runs without errors, however, the local ``my_event`` handler
will never be called and also not any remote handlers either, even if
:bro:see:`Broker::auto_publish` was used elsewhere for it. Instead, at
minimum you would need change the ``bro_init()`` handler:
.. sourcecode:: bro
event bro_init()
{
event MyModule::my_event();
schedule 10sec { MyModule::my_event() };
}
Though, an easy rule of thumb to remember would be to always use the
explicit module namespace scoping and you can't go wrong:
.. sourcecode:: bro
module MyModule;
export {
global MyModule::my_event: event();
}
event MyModule::my_event()
{
print "got my event";
}
event bro_init()
{
event MyModule::my_event();
schedule 10sec { MyModule::my_event() };
}
Note that other identifiers in Bro do not have this inconsistency
related to module namespacing, it's just events that require
explicitness.
Manager Sending Events To Workers
---------------------------------
This is fairly straightforward, we just need a topic name which we know
all workers are subscribed combined with the event we want to send them.
.. sourcecode:: bro
event manager_to_workers(s: string)
{
print "got event from manager", s;
}
event some_event_handled_on_manager()
{
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v0");
# If you know this event is only handled on the manager, you don't
# need any of the following conditions, they're just here as an
# example of how you can further discriminate based on node identity.
# Can check based on the name of the node.
if ( Cluster::node == "manager" )
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v1");
# Can check based on the type of the node.
if ( Cluster::local_node_type() == Cluster::MANAGER )
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v2");
# The run-time overhead of the above conditions can even be
# eliminated by using the following conditional directives.
# It's evaluated once per node at parse-time and, if false,
# any code within is just ignored / treated as not existing at all.
@if ( Cluster::local_node_type() == Cluster::MANAGER )
Broker::publish(Cluster::worker_topic, manager_to_workers,
"hello v3");
@endif
}
Worker Sending Events To Manager
--------------------------------
This should look almost identical to the previous case of sending an event
from the manager to workers, except it simply changes the topic name to
one which the manager is subscribed.
.. sourcecode:: bro
event worker_to_manager(worker_name: string)
{
print "got event from worker", worker_name;
}
event some_event_handled_on_worker()
{
Broker::publish(Cluster::manager_topic, worker_to_manager,
Cluster::node);
}
Worker Sending Events To All Workers
------------------------------------
Since workers are not directly connected to each other in the cluster
topology, this type of communication is a bit different than what we
did before since we have to manually relay the event via some node that *is*
connected to all workers. The manager or a proxy satisfies that requirement:
.. sourcecode:: bro
event worker_to_workers(worker_name: string)
{
@if ( Cluster::local_node_type() == Cluster::MANAGER ||
Cluster::local_node_type() == Cluster::PROXY )
Broker::publish(Cluster::worker_topic, worker_to_workers,
worker_name)
@else
print "got event from worker", worker_name;
@endif
}
event some_event_handled_on_worker()
{
# We know the manager is connected to all workers, so we could
# choose to relay the event across it.
Broker::publish(Cluster::manager_topic, worker_to_workers,
Cluster::node + " (via manager)");
# We also know that any given proxy is connected to all workers,
# though now we have a choice of which proxy to use. If we
# want to distribute the work associated with relaying uniformly,
# we can use a round-robin strategy. The key used here is simply
# used by the cluster framework internally to keep track of
# which node is up next in the round-robin.
local pt = Cluster::rr_topic(Cluster::proxy_pool, "example_key");
Broker::publish(pt, worker_to_workers,
Cluster::node + " (via a proxy)");
}
Worker Distributing Events Uniformly Across Proxies
---------------------------------------------------
If you want to offload some data/work from a worker to your proxies,
we can make use of a `Highest Random Weight (HRW) hashing
<https://en.wikipedia.org/wiki/Rendezvous_hashing>`_ distribution strategy
to uniformly map an arbitrary key space across all available proxies.
.. sourcecode:: bro
event worker_to_proxies(worker_name: string)
{
print "got event from worker", worker_name;
}
global my_counter = 0;
event some_event_handled_on_worker()
{
# The key here is used to choose which proxy shall receive
# the event. Different keys may map to different nodes, but
# any given key always maps to the same node provided the
# pool of nodes remains consistent. If a proxy goes offline,
# that key maps to a different node until the original comes
# back up.
Cluster::publish_hrw(Cluster::proxy_pool,
cat("example_key", ++my_counter),
worker_to_proxies, Cluster::node);
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

View file

@ -1,2 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<mxfile userAgent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36" version="9.0.3-1" editor="www.draw.io" type="device"><diagram name="Page-1" id="42789a77-a242-8287-6e28-9cd8cfd52e62">7VxLc6M4EP41Po4LSUjAcZJJZg+7VVOVrd3ZowwKZgYjFyaxvb9+hZEwEhA/eITs2JdYrZYE0ve1ultyZuh+tfua0vXyDx6weAatYDdDX2ZQfDxL/Mkl+0Li2G4hCNMoKETgKHiK/mVSKNuFL1HANppixnmcRWtd6PMkYX6myWia8q2u9sxjfdQ1DVlN8OTTuC79OwqypZQCyzpW/MaicCmHdrGsWFD/Z5jyl0SON4Po+fApqldU9SX1N0sa8G1FhB5m6D7lPCu+rXb3LM7nVk1b0e6xpbZ87pQl2VkNGEXAcTwrgD71XPQJoKKLVxq/MPUOhyfN9mp2WCAmSxYTnog/d4dXZnmnQJSW2SqWX2O6YPFdOSv3PObpsdkmo2n2OV8wQ/YYxXkPlipLiGBRZkmgWvgx3Wwi/89llBQVshkoSpVGP1iW7WWZvmRciHiaLXnIExr/zvlattpkKf/J1FOK1fOIgz6TskahIdd95kn2SFdRnIP8L5YGNKFSLEcCUJYrHVqHj5An/KEyj4dH3kXZ9/yt51iW/pFzcHxWqVpfZ7n0G/6S+qxtcSWVaBqyrEXHK3TyNa50LNHzlfEVy9K9UEhZTLPoVScMlbwLSz3ZVKwY3VcU1jxKsk2l52+5QCgoE6KYJg0IcqCOY1MfQTFp1Ra2bVdbiC/FM6hS5WWOogM7zmWKfWPKL80UNAmmAHIZUwB0RmdKjSgrAYCQpTNIYjG7d4v8W5iVq1VlUByLTT9H+3YZZexpTQ9rthV+h06fI68OVFD7al7l81Xky4pTLGsANW0BtWBQRZOBADOnO9hpHIVJzliBVzFDbwD4laUZ270JPVWL9BVHyrpuj86NctmWFbdGQasJrBW4XIYG2GA2Cxhs1jRRQFinfLf/BJsQUkjEuFX9qQEncLyFZZ0DnOdnRnx/msCBUAcOwHXgOLgOHDwEcJo80zpwYh4Ky5LbnI+BE7Ig+CwDI4IIOFWcePoeZJN3hIlXg4mERIN7dlv7HjYXD7/b4t+CVQ1QXxzvrm3T6Qac0uGuuNvFg4sV+14tKEe87rT35JrDM1xzMIhrXvOlbYLmnmMjDCxgI9dBjsYED84RJB7GmCAHQ7334h1lh29Fth6YE9GLhRwxAlbOerkj2/Y8790Rr01sYBFjmGKaasO0Rxka9S5z/JsC4jbPDtw8u3e12kbUOKZjh29Ge1yjrTImNaNtDWW07enYaAKN/Agi1xlirLwN1RExOhrT1JIbh0biUMmVObIcjS9zizRSpjk52UimQ2ffWBqJpc8t+2Eqe2PYMKn8GjGQbTAM4OsYZqO+GdaSArWMiAVoh2SdE6DOjZwjRyU1plVoS1yDtfC8je56bl4VsgxzmlAnK3J1jlnOdWRFjt4RgEZHPZEV4mHJqixphZx63FBhYnu0ezU57lxs42ZyNMXcBdL7ctO8Oi7tcWBonu/a5lFDCwwvNvVQD5e9nsFDLrPsJp5OOeij4GxANE30dgFCRvRrGbdkTuirMHgo/d5tnNsNpsfNFI9q+Grb+phQhSNZQqLvo90tYYewEoFuQGmwZxXM1C3avJNjORGbNo17IMjM6J2yaeRCG9VR37BpdR4YUSS2+7WB9WPBpuT0lqc/i6PCD5qdXrRzwsxOuz6bana6TBKPcYAMG5BxC9ff4xDRmtumfzFooH5OEu0WlzeP4wwblt/uoU/nlGhOiJn5nmYObaSEN1Fucpnwdq/jKnb1jgA09rO+cmjuwDk0XOPmFTHCIKHk4EedgJwBzJFiSdfMqoGB8KTOUAfKqjmXmfoWhNXzpu8UabaA8PBI/WFsJOMHjPN0bOYrumLsVPx6Kv48s/5UPFp7z57jURWQdgX5W0dfo2TrhjSkw5xGDJM6s/pZT2M1p2WyejVYI0VW4NRRU0+b4qVnChem0zqp9x6dNd0/as2mfdy7nv+PbNqYv8ZoSrP+wmH7G1Z4oGTamCcfI13hhMZ9rauvcNrmTTUT8n1dMDN+EmPsBd19x/ovam8sGzU5dpELNUc5UrT8WemZX5ccO8e9Gomc5W1P5Wp4BqfOJWf5EwTl4pgd9UVO0is5RfH471oK9eP/xEEP/wE=</diagram></mxfile>

View file

@ -1,12 +0,0 @@
redef exit_only_after_terminate = T;
event bro_init()
{
Broker::peer("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
terminate();
}

View file

@ -1,17 +0,0 @@
redef exit_only_after_terminate = T;
event bro_init()
{
Broker::listen("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer lost", endpoint;
terminate();
}

View file

@ -1,35 +0,0 @@
redef exit_only_after_terminate = T;
global my_event: event(msg: string, c: count);
global my_auto_event: event(msg: string, c: count);
event bro_init()
{
Broker::peer("127.0.0.1");
Broker::auto_publish("bro/event/my_auto_event", my_auto_event);
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
Broker::publish("bro/event/my_event", my_event, "hi", 0);
event my_auto_event("stuff", 88);
Broker::publish("bro/event/my_event", my_event, "...", 1);
event my_auto_event("more stuff", 51);
local e = Broker::make_event(my_event, "bye", 2);
Broker::publish("bro/event/my_event", e);
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
terminate();
}
event my_event(msg: string, c: count)
{
print "got my_event", msg, c;
}
event my_auto_event(msg: string, c: count)
{
print "got my_auto_event", msg, c;
}

View file

@ -1,33 +0,0 @@
redef exit_only_after_terminate = T;
global msg_count = 0;
global my_event: event(msg: string, c: count);
global my_auto_event: event(msg: string, c: count);
event bro_init()
{
Broker::subscribe("bro/event/");
Broker::listen("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
}
event my_event(msg: string, c: count)
{
++msg_count;
print "got my_event", msg, c;
if ( msg_count == 5 )
terminate();
}
event my_auto_event(msg: string, c: count)
{
++msg_count;
print "got my_auto_event", msg, c;
if ( msg_count == 5 )
terminate();
}

View file

@ -1,36 +0,0 @@
@load ./testlog
redef exit_only_after_terminate = T;
global n = 0;
event bro_init()
{
Broker::peer("127.0.0.1");
}
event do_write()
{
if ( n == 6 )
return;
Log::write(Test::LOG, [$msg = "ping", $num = n]);
++n;
event do_write();
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
event do_write();
}
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
terminate();
}
event Test::log_test(rec: Test::Info)
{
print "wrote log", rec;
Broker::publish("bro/logs/forward/test", Test::log_test, rec);
}

View file

@ -1,22 +0,0 @@
@load ./testlog
redef exit_only_after_terminate = T;
event bro_init()
{
Broker::subscribe("bro/logs");
Broker::listen("127.0.0.1");
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added", endpoint;
}
event Test::log_test(rec: Test::Info)
{
print "got log event", rec;
if ( rec$num == 5 )
terminate();
}

View file

@ -1,29 +0,0 @@
redef exit_only_after_terminate = T;
global h: opaque of Broker::Store;
global ready: event();
event Broker::peer_lost(endpoint: Broker::EndpointInfo, msg: string)
{
terminate();
}
event bro_init()
{
h = Broker::create_master("mystore");
local myset: set[string] = {"a", "b", "c"};
local myvec: vector of string = {"alpha", "beta", "gamma"};
Broker::put(h, "one", 110);
Broker::put(h, "two", 223);
Broker::put(h, "myset", myset);
Broker::put(h, "myvec", myvec);
Broker::increment(h, "one");
Broker::decrement(h, "two");
Broker::insert_into_set(h, "myset", "d");
Broker::remove_from(h, "myset", "b");
Broker::push(h, "myvec", "delta");
Broker::peer("127.0.0.1");
}

View file

@ -1,79 +0,0 @@
redef exit_only_after_terminate = T;
global h: opaque of Broker::Store;
global expected_key_count = 4;
global key_count = 0;
# Lookup a value in the store based on an arbitrary key string.
function do_lookup(key: string)
{
when ( local res = Broker::get(h, key) )
{
++key_count;
print "lookup", key, res;
# End after we iterated over looking up each key in the store twice.
if ( key_count == expected_key_count * 2 )
terminate();
}
# All data store queries must specify a timeout
timeout 3sec
{ print "timeout", key; }
}
event check_keys()
{
# Here we just query for the list of keys in the store, and show how to
# look up each one's value.
when ( local res = Broker::keys(h) )
{
print "clone keys", res;
if ( res?$result )
{
# Since we know that the keys we are storing are all strings,
# we can conveniently cast the result of Broker::keys to
# a native Bro type, namely 'set[string]'.
for ( k in res$result as string_set )
do_lookup(k);
# Alternatively, we can use a generic iterator to iterate
# over the results (which we know is of the 'set' type because
# that's what Broker::keys() always returns). If the keys
# we stored were not all of the same type, then you would
# likely want to use this method of inspecting the store's keys.
local i = Broker::set_iterator(res$result);
while ( ! Broker::set_iterator_last(i) )
{
do_lookup(Broker::set_iterator_value(i) as string);
Broker::set_iterator_next(i);
}
}
}
# All data store queries must specify a timeout.
# You also might see timeouts on connecting/initializing a clone since
# it hasn't had time to get fully set up yet.
timeout 1sec
{
print "timeout";
schedule 1sec { check_keys() };
}
}
event Broker::peer_added(endpoint: Broker::EndpointInfo, msg: string)
{
print "peer added";
# We could create a clone early, like in bro_init and it will periodically
# try to synchronize with its master once it connects, however, we just
# create it now since we know the peer w/ the master store has just
# connected.
h = Broker::create_clone("mystore");
event check_keys();
}
event bro_init()
{
Broker::listen("127.0.0.1");
}

View file

@ -1,17 +0,0 @@
module Test;
export {
redef enum Log::ID += { LOG };
type Info: record {
msg: string &log;
num: count &log;
};
global log_test: event(rec: Test::Info);
}
event bro_init() &priority=5
{
Log::create_stream(Test::LOG, [$columns=Test::Info, $ev=log_test, $path="test"]);
}

View file

@ -1,198 +0,0 @@
.. _framework-configuration:
=======================
Configuration Framework
=======================
.. rst-class:: opening
Bro includes a "configuration framework" that allows
updating script options dynamically at runtime. This functionality
consists of several components: an "option" declaration, the
ability to specify input files to enable changing the value of options at
runtime, a couple of functions, and a log file "config.log"
which contains information about every change to option values.
Introduction
------------
The configuration framework provides an alternative to using Bro
script constants to store various Bro settings.
In general, traditional constants can be used when a value is not
expected to change at runtime, but they cannot be used for values that
need to be modified occasionally. While a "redef" allows a
re-definition of an already defined constant in Bro, these
redefinitions can only be performed when Bro first starts. Afterwards,
constants can no longer be modified.
However, it is clearly desirable to be able to change at runtime many
of the configuration options that Bro offers. Having to restart Bro
can be time-consuming and causes Bro to lose all connection state and
knowledge that it accumulated. Bro's configuration framework solves
this problem by allowing changing configuration options at runtime.
Declaring options
-----------------
The "option" keyword allows variables to be declared as configuration options.
.. sourcecode:: bro
module TestModule;
export {
option my_networks: set[subnet] = {};
option enable_feature = F;
option hostname = "testsystem";
option timeout = 1min;
option my_ports: vector of port = {};
}
The rules regarding options can be thought of as being in between global
variables and constants. Like global variables, options cannot be declared
inside a function, hook, or event handler. Like constants, options must be
initialized when declared (the type can often be inferred from the initializer
but may need to be specified). The value of an option can change at runtime,
but options cannot be assigned a new value using normal assignments.
The initial value of an option can be redefined with a :bro:keyword:`redef`
declaration just like for global variables and constants. The only difference
being that there is no need to specify the :bro:attr:`&redef` attribute in
the declaration of an option. For example, given the above option
declarations, here are some possible redefs:
.. sourcecode:: bro
redef TestModule::enable_feature = T;
redef TestModule::my_networks += { 10.1.0.0/16, 10.2.0.0/16 };
Changing options
----------------
The configuration framework facilitates reading in new option values
from external files at runtime.
Configuration files contain a mapping between option names and their values.
The format for these files looks like this:
[option name][tab/spaces][new value]
Configuration files can be specified by adding them
to :bro:id:`Config::config_files`. Note that in a cluster configuration,
only the manager node attempts to read the specified configuration files.
For example, simply add something like this to local.bro:
.. sourcecode:: bro
redef Config::config_files += { "/path/to/config.dat" };
The specified configuration file will then be monitored continuously for
changes, so that writing ``TestModule::enable_feature T`` into that file will
automatically update the option's value accordingly (in a cluster
configuration, the change will be sent from the manager to all other nodes in
the cluster). Here is an example configuration file::
TestModule::my_networks 10.0.12.0/24,192.168.17.0/24
TestModule::enable_feature T
TestModule::hostname host-1
TestModule::timeout 50.5
TestModule::my_ports 80/tcp,53/udp
Note that as seen in the above example, for options of
type :bro:type:`interval`, the numeric value in the config file
is interpreted as seconds and there cannot be any time units
(such as sec, min, etc.).
Internally, the configuration framework uses the Bro input framework
with a type of input reader specifically for reading config files. Users
familiar with the Bro input framework might be aware that the input framework
is usually very strict about the syntax of input files. This is not true
for configuration files: the files need no header lines and either
tabs or spaces are accepted as separators.
If you inspect the configuration framework scripts, you will notice that the
scripts simply catch events from the input framework and then a
function :bro:see:`Config::set_value` is called to set an option to the new
value. If you want to change an option yourself during runtime, you can
call Config::set_value directly from a script (in a cluster configuration,
this only needs to happen on the manager, as the change will be automatically
sent to all other nodes in the cluster).
Note that some data types (such as pattern, table, and record) are not
supported by the config input reader. In that case you would need to use
the Config::set_value function to change the value of such an option as
shown in the following example.
.. sourcecode:: bro
module TestModule;
export {
option host_port: table[addr] of port = {};
}
event bro_init() {
local t: table[addr] of port = { [10.0.0.2] = 123/tcp };
Config::set_value("TestModule::host_port", t);
}
Regardless of whether an option change is triggered by a config file or by
the Config::set_value function, the change is always logged to the
log file "config.log".
Change handlers
---------------
A change handler is a user-defined function that is called automatically
each time an option value changes. This example shows how to register a
change handler for an option that has a data type of "addr" (for other
data types, the return type and 2nd parameter data type must be adjusted
accordingly):
.. sourcecode:: bro
module TestModule;
export {
option testaddr = 127.0.0.1;
}
# Note: the data type of 2nd parameter and return type must match
function change_addr(ID: string, new_value: addr): addr
{
print fmt("Value of %s changed from %s to %s", ID, testaddr, new_value);
return new_value;
}
event bro_init()
{
Option::set_change_handler("TestModule::testaddr", change_addr);
}
Immediately before the specified option value is changed, the change handler
function will be called. The value returned by the change handler is the
value finally assigned to the option. This allows, for example, checking of
values to reject invalid input (the original value can be returned to reject
the change).
It is possible define multiple change handlers for a single option. In
this case, the change handlers are chained together: the value returned by the
first change handler is the "new value" seen by the next change handler, and
so on. The built-in function :bro:see:`Option::set_change_handler` takes an
optional third argument that can specify a priority for the handlers.
A change handler function can optionally have a third argument of type
string. When a config file triggers a change, then the third argument is
the pathname of the config file. When the Config::set_value function triggers
a change, then the third argument of the change handler is the value passed
to the optional third argument of the Config::set_value function.
Note that change handlers are also used internally by the
configuration framework. If you look at the script level source code of
the config framework, you can see that change handlers are used for
logging the option changes to config.log.

View file

@ -1,148 +0,0 @@
.. _file-analysis-framework:
=============
File Analysis
=============
.. rst-class:: opening
In the past, writing Bro scripts with the intent of analyzing file
content could be cumbersome because of the fact that the content
would be presented in different ways, via events, at the
script-layer depending on which network protocol was involved in the
file transfer. Scripts written to analyze files over one protocol
would have to be copied and modified to fit other protocols. The
file analysis framework (FAF) instead provides a generalized
presentation of file-related information. The information regarding
the protocol involved in transporting a file over the network is
still available, but it no longer has to dictate how one organizes
their scripting logic to handle it. A goal of the FAF is to
provide analysis specifically for files that is analogous to the
analysis Bro provides for network connections.
File Lifecycle Events
=====================
The key events that may occur during the lifetime of a file are:
:bro:see:`file_new`, :bro:see:`file_over_new_connection`,
:bro:see:`file_timeout`, :bro:see:`file_gap`, and
:bro:see:`file_state_remove`. Handling any of these events provides
some information about the file such as which network
:bro:see:`connection` and protocol are transporting the file, how many
bytes have been transferred so far, and its MIME type.
Here's a simple example:
.. literalinclude:: file_analysis_01.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r http/get.trace file_analysis_01.bro
file_state_remove
FakNcS1Jfe01uljb3
CHhAvVGS1DHFjwGM9
[orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp]
HTTP
connection_state_remove
CHhAvVGS1DHFjwGM9
[orig_h=141.142.228.5, orig_p=59856/tcp, resp_h=192.150.187.43, resp_p=80/tcp]
HTTP
This doesn't perform any interesting analysis yet, but does highlight
the similarity between analysis of connections and files. Connections
are identified by the usual 5-tuple or a convenient UID string while
files are identified just by a string of the same format as the
connection UID. So there's unique ways to identify both files and
connections and files hold references to a connection (or connections)
that transported it.
Adding Analysis
===============
There are builtin file analyzers which can be attached to files. Once
attached, they start receiving the contents of the file as Bro extracts
it from an ongoing network connection. What they do with the file
contents is up to the particular file analyzer implementation, but
they'll typically either report further information about the file via
events (e.g. :bro:see:`Files::ANALYZER_MD5` will report the
file's MD5 checksum via :bro:see:`file_hash` once calculated) or they'll
have some side effect (e.g. :bro:see:`Files::ANALYZER_EXTRACT`
will write the contents of the file out to the local file system).
In the future there may be file analyzers that automatically attach to
files based on heuristics, similar to the Dynamic Protocol Detection
(DPD) framework for connections, but many will always require an
explicit attachment decision.
Here's a simple example of how to use the MD5 file analyzer to
calculate the MD5 of plain text files:
.. literalinclude:: file_analysis_02.bro
:caption:
:language: bro
:linenos:
.. sourcecode:: console
$ bro -r http/get.trace file_analysis_02.bro
new file, FakNcS1Jfe01uljb3
file_hash, FakNcS1Jfe01uljb3, md5, 397168fd09991a0e712254df7bc639ac
Some file analyzers might have tunable parameters that need to be
specified in the call to :bro:see:`Files::add_analyzer`:
.. sourcecode:: bro
event file_new(f: fa_file)
{
Files::add_analyzer(f, Files::ANALYZER_EXTRACT,
[$extract_filename="myfile"]);
}
In this case, the file extraction analyzer doesn't generate any further
events, but does have the effect of writing out the file contents to the
local file system at the location resulting from the concatenation of
the path specified by :bro:see:`FileExtract::prefix` and the string,
``myfile``. Of course, for a network with more than a single file being
transferred, it's probably preferable to specify a different extraction
path for each file, unlike this example.
Regardless of which file analyzers end up acting on a file, general
information about the file (e.g. size, time of last data transferred,
MIME type, etc.) are logged in ``files.log``.
Input Framework Integration
===========================
The FAF comes with a simple way to integrate with the :doc:`Input
Framework <input>`, so that Bro can analyze files from external sources
in the same way it analyzes files that it sees coming over traffic from
a network interface it's monitoring. It only requires a call to
:bro:see:`Input::add_analysis`:
.. literalinclude:: file_analysis_03.bro
:caption:
:language: bro
:linenos:
Note that the "source" field of :bro:see:`fa_file` corresponds to the
"name" field of :bro:see:`Input::AnalysisDescription` since that is what
the input framework uses to uniquely identify an input stream.
Example output of the above script may be:
.. sourcecode:: console
$ echo "Hello world" > myfile
$ bro file_analysis_03.bro
new file, FZedLu4Ajcvge02jA8
file_hash, FZedLu4Ajcvge02jA8, md5, f0ef7081e1539ac00ef5b761b4fb01b3
file_state_remove
Nothing that special, but it at least verifies the MD5 file analyzer
saw all the bytes of the input file and calculated the checksum
correctly!

View file

@ -1,20 +0,0 @@
event connection_state_remove(c: connection)
{
print "connection_state_remove";
print c$uid;
print c$id;
for ( s in c$service )
print s;
}
event file_state_remove(f: fa_file)
{
print "file_state_remove";
print f$id;
for ( cid in f$conns )
{
print f$conns[cid]$uid;
print cid;
}
print f$source;
}

View file

@ -1,12 +0,0 @@
event file_sniff(f: fa_file, meta: fa_metadata)
{
if ( ! meta?$mime_type ) return;
print "new file", f$id;
if ( meta$mime_type == "text/plain" )
Files::add_analyzer(f, Files::ANALYZER_MD5);
}
event file_hash(f: fa_file, kind: string, hash: string)
{
print "file_hash", f$id, kind, hash;
}

View file

@ -1,25 +0,0 @@
redef exit_only_after_terminate = T;
event file_new(f: fa_file)
{
print "new file", f$id;
Files::add_analyzer(f, Files::ANALYZER_MD5);
}
event file_state_remove(f: fa_file)
{
print "file_state_remove";
Input::remove(f$source);
terminate();
}
event file_hash(f: fa_file, kind: string, hash: string)
{
print "file_hash", f$id, kind, hash;
}
event bro_init()
{
local source: string = "./myfile";
Input::add_analysis([$source=source, $name=source]);
}

View file

@ -1,146 +0,0 @@
.. _geolocation:
===========
GeoLocation
===========
.. rst-class:: opening
During the process of creating policy scripts the need may arise
to find the geographic location for an IP address. Bro had support
for the `GeoIP library <http://www.maxmind.com/app/c>`__ at the
policy script level from release 1.3 to 2.5.x to account for this
need. Starting with release 2.6, GeoIP support requires `libmaxminddb
<https://github.com/maxmind/libmaxminddb/releases>`__.
To use this functionality, you need to first install the libmaxminddb
software, and then install the GeoLite2 city database before building
Bro.
Install libmaxminddb
--------------------
Before building Bro, you need to install libmaxminddb.
* RPM/RedHat-based Linux:
.. sourcecode:: console
sudo yum install libmaxminddb-devel
* DEB/Debian-based Linux:
.. sourcecode:: console
sudo apt-get install libmaxminddb-dev
* FreeBSD:
.. sourcecode:: console
sudo pkg install libmaxminddb
* Mac OS X:
You need to install from your preferred package management system
(e.g. Homebrew, MacPorts, or Fink). For Homebrew, the name of the package
that you need is libmaxminddb.
GeoLite2-City Database Installation
-----------------------------------
Bro can use the city or country database. The city database includes cities
and regions in addition to countries.
`Download <http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.tar.gz>`__
the GeoLite2 city binary database:
.. sourcecode:: console
wget http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.tar.gz
tar zxf GeoLite2-City.tar.gz
Next, the file "GeoLite2-City_YYYYMMDD/GeoLite2-City.mmdb" needs to be moved
to the GeoIP database directory. This directory might already exist
and will vary depending on which platform and package you are using. For
FreeBSD, use ``/usr/local/share/GeoIP``. For Linux, use ``/usr/share/GeoIP``
or ``/var/lib/GeoIP`` (choose whichever one already exists).
.. sourcecode:: console
mv <extracted subdir>/GeoLite2-City.mmdb <path_to_database_dir>/GeoLite2-City.mmdb
Testing
-------
Before using the GeoIP functionality, it is a good idea to verify that
everything is setup correctly. After installing libmaxminddb and the GeoIP
city database, and building Bro, you can quickly check if the GeoIP
functionality works by running a command like this:
.. sourcecode:: console
bro -e "print lookup_location(8.8.8.8);"
If you see an error message similar to "Failed to open GeoIP location
database", then you may need to either rename or move your GeoIP
location database file. If the :bro:see:`mmdb_dir` value is set to a
directory pathname (it is not set by default), then Bro looks for location
database files in that directory. If none are found or if mmdb_dir is not set,
then Bro looks for location database files in the following order:
* /usr/share/GeoIP/GeoLite2-City.mmdb
* /var/lib/GeoIP/GeoLite2-City.mmdb
* /usr/local/share/GeoIP/GeoLite2-City.mmdb
* /usr/local/var/GeoIP/GeoLite2-City.mmdb
* /usr/share/GeoIP/GeoLite2-Country.mmdb
* /var/lib/GeoIP/GeoLite2-Country.mmdb
* /usr/local/share/GeoIP/GeoLite2-Country.mmdb
* /usr/local/var/GeoIP/GeoLite2-Country.mmdb
If you see an error message similar to "Bro was not configured for GeoIP
support", then you need to rebuild Bro and make sure it is linked
against libmaxminddb. Normally, if libmaxminddb is installed correctly then it
should automatically be found when building Bro. If this doesn't
happen, then you may need to specify the path to the libmaxminddb
installation (e.g. ``./configure --with-geoip=<path>``).
Usage
-----
There is a built-in function that provides the GeoIP functionality:
.. sourcecode:: bro
function lookup_location(a:addr): geo_location
The return value of the :bro:see:`lookup_location` function is a record
type called :bro:see:`geo_location`, and it consists of several fields
containing the country, region, city, latitude, and longitude of the specified
IP address. Since one or more fields in this record will be uninitialized
for some IP addresses (for example, the country and region of an IP address
might be known, but the city could be unknown), a field should be checked
if it has a value before trying to access the value.
Example
-------
To show every ftp connection from hosts in Ohio, this is now very easy:
.. sourcecode:: bro
event ftp_reply(c: connection, code: count, msg: string, cont_resp: bool)
{
local client = c$id$orig_h;
local loc = lookup_location(client);
if (loc?$region && loc$region == "OH" && loc$country_code == "US")
{
local city = loc?$city ? loc$city : "<unknown>";
print fmt("FTP Connection from:%s (%s,%s,%s)", client, city,
loc$region, loc$country_code);
}
}

View file

@ -1,19 +0,0 @@
==========
Frameworks
==========
.. toctree::
:maxdepth: 1
configuration
file-analysis
geoip
input
intel
logging
netcontrol
notice
signatures
sumstats
broker

View file

@ -1,334 +0,0 @@
.. _framework-input:
===============
Input Framework
===============
.. rst-class:: opening
Bro features a flexible input framework that allows users
to import data into Bro. Data is either read into Bro tables or
converted to events which can then be handled by scripts.
This document gives an overview of how to use the input framework
with some examples. For more complex scenarios it is
worthwhile to take a look at the unit tests in
``testing/btest/scripts/base/frameworks/input/``.
Reading Data into Tables
========================
Probably the most interesting use-case of the input framework is to
read data into a Bro table.
By default, the input framework reads the data in the same format
as it is written by the logging framework in Bro - a tab-separated
ASCII file.
We will show the ways to read files into Bro with a simple example.
For this example we assume that we want to import data from a blacklist
that contains server IP addresses as well as the timestamp and the reason
for the block.
An example input file could look like this (note that all fields must be
tab-separated):
::
#fields ip timestamp reason
192.168.17.1 1333252748 Malware host
192.168.27.2 1330235733 Botnet server
192.168.250.3 1333145108 Virus detected
To read a file into a Bro table, two record types have to be defined.
One contains the types and names of the columns that should constitute the
table keys and the second contains the types and names of the columns that
should constitute the table values.
In our case, we want to be able to lookup IPs. Hence, our key record
only contains the server IP. All other elements should be stored as
the table content.
The two records are defined as:
.. sourcecode:: bro
type Idx: record {
ip: addr;
};
type Val: record {
timestamp: time;
reason: string;
};
Note that the names of the fields in the record definitions must correspond
to the column names listed in the '#fields' line of the log file, in this
case 'ip', 'timestamp', and 'reason'. Also note that the ordering of the
columns does not matter, because each column is identified by name.
The log file is read into the table with a simple call of the
:bro:id:`Input::add_table` function:
.. sourcecode:: bro
global blacklist: table[addr] of Val = table();
event bro_init() {
Input::add_table([$source="blacklist.file", $name="blacklist",
$idx=Idx, $val=Val, $destination=blacklist]);
Input::remove("blacklist");
}
With these three lines we first create an empty table that should contain the
blacklist data and then instruct the input framework to open an input stream
named ``blacklist`` to read the data into the table. The third line removes the
input stream again, because we do not need it any more after the data has been
read.
Because some data files can - potentially - be rather big, the input framework
works asynchronously. A new thread is created for each new input stream.
This thread opens the input data file, converts the data into a Bro format and
sends it back to the main Bro thread.
Because of this, the data is not immediately accessible. Depending on the
size of the data source it might take from a few milliseconds up to a few
seconds until all data is present in the table. Please note that this means
that when Bro is running without an input source or on very short captured
files, it might terminate before the data is present in the table (because
Bro already handled all packets before the import thread finished).
Subsequent calls to an input source are queued until the previous action has
been completed. Because of this, it is, for example, possible to call
``add_table`` and ``remove`` in two subsequent lines: the ``remove`` action
will remain queued until the first read has been completed.
Once the input framework finishes reading from a data source, it fires
the :bro:id:`Input::end_of_data` event. Once this event has been received all
data from the input file is available in the table.
.. sourcecode:: bro
event Input::end_of_data(name: string, source: string) {
# now all data is in the table
print blacklist;
}
The table can be used while the data is still being read - it
just might not contain all lines from the input file before the event has
fired. After the table has been populated it can be used like any other Bro
table and blacklist entries can easily be tested:
.. sourcecode:: bro
if ( 192.168.18.12 in blacklist )
# take action
Re-reading and streaming data
-----------------------------
For many data sources, like for many blacklists, the source data is continually
changing. For these cases, the Bro input framework supports several ways to
deal with changing data files.
The first, very basic method is an explicit refresh of an input stream. When
an input stream is open (this means it has not yet been removed by a call to
:bro:id:`Input::remove`), the function :bro:id:`Input::force_update` can be
called. This will trigger a complete refresh of the table; any changed
elements from the file will be updated. After the update is finished the
:bro:id:`Input::end_of_data` event will be raised.
In our example the call would look like:
.. sourcecode:: bro
Input::force_update("blacklist");
Alternatively, the input framework can automatically refresh the table
contents when it detects a change to the input file. To use this feature,
you need to specify a non-default read mode by setting the ``mode`` option
of the :bro:id:`Input::add_table` call. Valid values are ``Input::MANUAL``
(the default), ``Input::REREAD`` and ``Input::STREAM``. For example,
setting the value of the ``mode`` option in the previous example
would look like this:
.. sourcecode:: bro
Input::add_table([$source="blacklist.file", $name="blacklist",
$idx=Idx, $val=Val, $destination=blacklist,
$mode=Input::REREAD]);
When using the reread mode (i.e., ``$mode=Input::REREAD``), Bro continually
checks if the input file has been changed. If the file has been changed, it
is re-read and the data in the Bro table is updated to reflect the current
state. Each time a change has been detected and all the new data has been
read into the table, the ``end_of_data`` event is raised.
When using the streaming mode (i.e., ``$mode=Input::STREAM``), Bro assumes
that the source data file is an append-only file to which new data is
continually appended. Bro continually checks for new data at the end of
the file and will add the new data to the table. If newer lines in the
file have the same index as previous lines, they will overwrite the
values in the output table. Because of the nature of streaming reads
(data is continually added to the table), the ``end_of_data`` event
is never raised when using streaming reads.
Receiving change events
-----------------------
When re-reading files, it might be interesting to know exactly which lines in
the source files have changed.
For this reason, the input framework can raise an event each time when a data
item is added to, removed from, or changed in a table.
The event definition looks like this (note that you can change the name of
this event in your own Bro script):
.. sourcecode:: bro
event entry(description: Input::TableDescription, tpe: Input::Event,
left: Idx, right: Val) {
# do something here...
print fmt("%s = %s", left, right);
}
The event must be specified in ``$ev`` in the ``add_table`` call:
.. sourcecode:: bro
Input::add_table([$source="blacklist.file", $name="blacklist",
$idx=Idx, $val=Val, $destination=blacklist,
$mode=Input::REREAD, $ev=entry]);
The ``description`` argument of the event contains the arguments that were
originally supplied to the add_table call. Hence, the name of the stream can,
for example, be accessed with ``description$name``. The ``tpe`` argument of the
event is an enum containing the type of the change that occurred.
If a line that was not previously present in the table has been added,
then the value of ``tpe`` will be ``Input::EVENT_NEW``. In this case ``left``
contains the index of the added table entry and ``right`` contains the
values of the added entry.
If a table entry that already was present is altered during the re-reading or
streaming read of a file, then the value of ``tpe`` will be
``Input::EVENT_CHANGED``. In
this case ``left`` contains the index of the changed table entry and ``right``
contains the values of the entry before the change. The reason for this is
that the table already has been updated when the event is raised. The current
value in the table can be ascertained by looking up the current table value.
Hence it is possible to compare the new and the old values of the table.
If a table element is removed because it was no longer present during a
re-read, then the value of ``tpe`` will be ``Input::EVENT_REMOVED``. In this
case ``left`` contains the index and ``right`` the values of the removed
element.
Filtering data during import
----------------------------
The input framework also allows a user to filter the data during the import.
To this end, predicate functions are used. A predicate function is called
before a new element is added/changed/removed from a table. The predicate
can either accept or veto the change by returning true for an accepted
change and false for a rejected change. Furthermore, it can alter the data
before it is written to the table.
The following example filter will reject adding entries to the table when
they were generated over a month ago. It will accept all changes and all
removals of values that are already present in the table.
.. sourcecode:: bro
Input::add_table([$source="blacklist.file", $name="blacklist",
$idx=Idx, $val=Val, $destination=blacklist,
$mode=Input::REREAD,
$pred(typ: Input::Event, left: Idx, right: Val) = {
if ( typ != Input::EVENT_NEW ) {
return T;
}
return (current_time() - right$timestamp) < 30day;
}]);
To change elements while they are being imported, the predicate function can
manipulate ``left`` and ``right``. Note that predicate functions are called
before the change is committed to the table. Hence, when a table element is
changed (``typ`` is ``Input::EVENT_CHANGED``), ``left`` and ``right``
contain the new values, but the destination (``blacklist`` in our example)
still contains the old values. This allows predicate functions to examine
the changes between the old and the new version before deciding if they
should be allowed.
Different readers
-----------------
The input framework supports different kinds of readers for different kinds
of source data files. At the moment, the default reader reads ASCII files
formatted in the Bro log file format (tab-separated values with a "#fields"
header line). Several other readers are included in Bro.
The raw reader reads a file that is
split by a specified record separator (newline by default). The contents are
returned line-by-line as strings; it can, for example, be used to read
configuration files and the like and is probably
only useful in the event mode and not for reading data to tables.
The binary reader is intended to be used with file analysis input streams (and
is the default type of reader for those streams).
The benchmark reader is being used
to optimize the speed of the input framework. It can generate arbitrary
amounts of semi-random data in all Bro data types supported by the input
framework.
Currently, Bro supports the following readers in addition to the
aforementioned ones:
.. toctree::
:maxdepth: 1
logging-input-sqlite
Reading Data to Events
======================
The second supported mode of the input framework is reading data to Bro
events instead of reading them to a table.
Event streams work very similarly to table streams that were already
discussed in much detail. To read the blacklist of the previous example
into an event stream, the :bro:id:`Input::add_event` function is used.
For example:
.. sourcecode:: bro
type Val: record {
ip: addr;
timestamp: time;
reason: string;
};
event blacklistentry(description: Input::EventDescription,
t: Input::Event, data: Val) {
# do something here...
print "data:", data;
}
event bro_init() {
Input::add_event([$source="blacklist.file", $name="blacklist",
$fields=Val, $ev=blacklistentry]);
}
The main difference in the declaration of the event stream is, that an event
stream needs no separate index and value declarations -- instead, all source
data types are provided in a single record definition.
Apart from this, event streams work exactly the same as table streams and
support most of the options that are also supported for table streams.

View file

@ -1,143 +0,0 @@
======================
Intelligence Framework
======================
Intro
-----
Intelligence data is critical to the process of monitoring for
security purposes. There is always data which will be discovered
through the incident response process and data which is shared through
private communities. The goals of Bro's Intelligence Framework are to
consume that data, make it available for matching, and provide
infrastructure around improving performance, memory utilization, and
generally making all of this easier.
Data in the Intelligence Framework is an atomic piece of intelligence
such as an IP address or an e-mail address along with a suite of
metadata about it such as a freeform source field, a freeform
descriptive field and a URL which might lead to more information about
the specific item. The metadata in the default scripts has been
deliberately kept minimal so that the community can find the
appropriate fields that need to be added by writing scripts which extend the
base record using the normal record extension mechanism.
Quick Start
-----------
Refer to the "Loading Intelligence" section below to see the format
for Intelligence Framework text files, then load those text files with
this line in local.bro::
redef Intel::read_files += { "/somewhere/yourdata.txt" };
The text files need to reside only on the manager if running in a
cluster.
Add the following line to local.bro in order to load the scripts
that send "seen" data into the Intelligence Framework to be checked against
the loaded intelligence data::
@load policy/frameworks/intel/seen
Intelligence data matches will be logged to the intel.log file.
Architecture
------------
The Intelligence Framework can be thought of as containing three
separate portions. The first part is how intelligence is loaded,
followed by the mechanism for indicating to the intelligence framework
that a piece of data which needs to be checked has been seen, and
thirdly the part where a positive match has been discovered.
Loading Intelligence
********************
Intelligence data can only be loaded through plain text files using
the Input Framework conventions. Additionally, on clusters the
manager is the only node that needs the intelligence data. The
intelligence framework has distribution mechanisms which will push
data out to all of the nodes that need it.
Here is an example of the intelligence data format (note that there will be
additional fields if you are using CIF intelligence data or if you are
using the policy/frameworks/intel/do_notice script). Note that all fields
must be separated by a single tab character and fields containing only a
hyphen are considered to be null values. ::
#fields indicator indicator_type meta.source meta.desc meta.url
1.2.3.4 Intel::ADDR source1 Sending phishing email http://source1.com/badhosts/1.2.3.4
a.b.com Intel::DOMAIN source2 Name used for data exfiltration -
For a list of all built-in `indicator_type` values, please refer to the
documentation of :bro:see:`Intel::Type`.
Note that if you are using data from the Collective Intelligence Framework,
then you will need to add the following line to your local.bro in order
to support additional metadata fields used by CIF::
@load policy/integration/collective-intel
There is a simple mechanism to raise a Bro notice (of type Intel::Notice)
for user-specified intelligence matches. To use this feature, add the
following line to local.bro in order to support additional metadata fields
(documented in the :bro:see:`Intel::MetaData` record)::
@load policy/frameworks/intel/do_notice
To load the data once the files are created, use the following example
to specify which files to load (with your own file names of course)::
redef Intel::read_files += {
"/somewhere/feed1.txt",
"/somewhere/feed2.txt",
};
Remember, the files only need to be present on the file system of the
manager node on cluster deployments.
Seen Data
*********
When some bit of data is extracted (such as an email address in the
"From" header in a message over SMTP), the Intelligence Framework
needs to be informed that this data was discovered so that its presence
will be checked within the loaded intelligence data. This is
accomplished through the :bro:see:`Intel::seen` function, however
typically users won't need to work with this function due to the
scripts included with Bro that will call this function.
To load all of the scripts included with Bro for sending "seen" data to
the intelligence framework, just add this line to local.bro::
@load policy/frameworks/intel/seen
Alternatively, specific scripts in that directory can be loaded.
Keep in mind that as more data is sent into the
intelligence framework, the CPU load consumed by Bro will increase
depending on how many times the :bro:see:`Intel::seen` function is
being called which is heavily traffic dependent.
Intelligence Matches
********************
Against all hopes, most networks will eventually have a hit on
intelligence data which could indicate a possible compromise or other
unwanted activity. The Intelligence Framework provides an event that
is generated whenever a match is discovered named :bro:see:`Intel::match`.
Due to design restrictions placed upon
the intelligence framework, there is no assurance as to where this
event will be generated. It could be generated on the worker where
the data was seen or on the manager. When the ``Intel::match`` event is
handled, only the data given as event arguments to the event can be
assured since the host where the data was seen may not be where
``Intel::match`` is handled.
Intelligence matches are logged to the intel.log file. For a description of
each field in that file, see the documentation for the :bro:see:`Intel::Info`
record.

View file

@ -1,167 +0,0 @@
============================================
Logging To and Reading From SQLite Databases
============================================
.. rst-class:: opening
Starting with version 2.2, Bro features a SQLite logging writer
as well as a SQLite input reader. SQLite is a simple, file-based,
widely used SQL database system. Using SQLite allows Bro to write
and access data in a format that is easy to use in interchange with
other applications. Due to the transactional nature of SQLite,
databases can be used by several applications simultaneously. Hence,
they can, for example, be used to make data that changes regularly available
to Bro on a continuing basis.
Warning
=======
In contrast to the ASCII reader and writer, the SQLite plugins have not yet
seen extensive use in production environments. While we are not aware
of any issues with them, we urge to caution when using them
in production environments. There could be lingering issues which only occur
when the plugins are used with high amounts of data or in high-load
environments.
Logging Data into SQLite Databases
==================================
Logging support for SQLite is available in all Bro installations starting with
version 2.2. There is no need to load any additional scripts or for any
compile-time configurations.
Sending data from existing logging streams to SQLite is rather straightforward.
You have to define a filter which specifies SQLite as the writer.
The following example code adds SQLite as a filter for the connection log:
.. literalinclude:: sqlite-conn-filter.bro
:caption:
:language: bro
:linenos:
Bro will create the database file ``/var/db/conn.sqlite``, if it does not
already exist. It will also create a table with the name ``conn`` (if it
does not exist) and start appending connection information to the table.
At the moment, SQLite databases are not rotated the same way ASCII log-files
are. You have to take care to create them in an adequate location.
If you examine the resulting SQLite database, the schema will contain the
same fields that are present in the ASCII log files::
# sqlite3 /var/db/conn.sqlite
SQLite version 3.8.0.2 2013-09-03 17:11:13
Enter ".help" for instructions
Enter SQL statements terminated with a ";"
sqlite> .schema
CREATE TABLE conn (
'ts' double precision,
'uid' text,
'id.orig_h' text,
'id.orig_p' integer,
...
Note that the ASCII ``conn.log`` will still be created. To prevent this file
from being created, you can remove the default filter:
.. sourcecode:: bro
Log::remove_filter(Conn::LOG, "default");
To create a custom SQLite log file, you have to create a new log stream
that contains just the information you want to commit to the database.
Please refer to the :ref:`framework-logging` documentation on how to
create custom log streams.
Reading Data from SQLite Databases
==================================
Like logging support, support for reading data from SQLite databases is
built into Bro starting with version 2.2.
Just as with the text-based input readers (please refer to the
:ref:`framework-input` documentation for them and for basic information
on how to use the input framework), the SQLite reader can be used to
read data - in this case the result of SQL queries - into tables or into
events.
Reading Data into Tables
------------------------
To read data from a SQLite database, we first have to provide Bro with
the information, how the resulting data will be structured. For this
example, we expect that we have a SQLite database, which contains
host IP addresses and the user accounts that are allowed to log into
a specific machine.
The SQLite commands to create the schema are as follows::
create table machines_to_users (
host text unique not null,
users text not null);
insert into machines_to_users values ('192.168.17.1', 'bernhard,matthias,seth');
insert into machines_to_users values ('192.168.17.2', 'bernhard');
insert into machines_to_users values ('192.168.17.3', 'seth,matthias');
After creating a file called ``hosts.sqlite`` with this content, we can
read the resulting table into Bro:
.. literalinclude:: sqlite-read-table.bro
:caption:
:language: bro
:linenos:
Afterwards, that table can be used to check logins into hosts against
the available userlist.
Turning Data into Events
------------------------
The second mode is to use the SQLite reader to output the input data as events.
Typically there are two reasons to do this. First, when the structure of
the input data is too complicated for a direct table import. In this case,
the data can be read into an event which can then create the necessary
data structures in Bro in scriptland.
The second reason is, that the dataset is too big to hold it in memory. In
this case, the checks can be performed on-demand, when Bro encounters a
situation where it needs additional information.
An example for this would be an internal huge database with malware
hashes. Live database queries could be used to check the sporadically
happening downloads against the database.
The SQLite commands to create the schema are as follows::
create table malware_hashes (
hash text unique not null,
description text not null);
insert into malware_hashes values ('86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', 'malware a');
insert into malware_hashes values ('e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98', 'malware b');
insert into malware_hashes values ('84a516841ba77a5b4648de2cd0dfcb30ea46dbb4', 'malware c');
insert into malware_hashes values ('3c363836cf4e16666669a25da280a1865c2d2874', 'malware d');
insert into malware_hashes values ('58e6b3a414a1e090dfc6029add0f3555ccba127f', 'malware e');
insert into malware_hashes values ('4a0a19218e082a343a1b17e5333409af9d98f0f5', 'malware f');
insert into malware_hashes values ('54fd1711209fb1c0781092374132c66e79e2241b', 'malware g');
insert into malware_hashes values ('27d5482eebd075de44389774fce28c69f45c8a75', 'malware h');
insert into malware_hashes values ('73f45106968ff8dc51fba105fa91306af1ff6666', 'ftp-trace');
The following code uses the file-analysis framework to get the sha1 hashes
of files that are transmitted over the network. For each hash, a SQL-query
is run against SQLite. If the query returns with a result, we had a hit
against our malware-database and output the matching hash.
.. literalinclude:: sqlite-read-events.bro
:caption:
:language: bro
:linenos:
If you run this script against the trace in
``testing/btest/Traces/ftp/ipv4.trace``, you will get one hit.

View file

@ -1,534 +0,0 @@
.. _framework-logging:
=================
Logging Framework
=================
.. rst-class:: opening
Bro comes with a flexible key-value based logging interface that
allows fine-grained control of what gets logged and how it is
logged. This document describes how logging can be customized and
extended.
Terminology
===========
Bro's logging interface is built around three main abstractions:
Streams
A log stream corresponds to a single log. It defines the set of
fields that a log consists of with their names and types.
Examples are the ``conn`` stream for recording connection summaries,
and the ``http`` stream for recording HTTP activity.
Filters
Each stream has a set of filters attached to it that determine
what information gets written out. By default, each stream has
one default filter that just logs everything directly to disk.
However, additional filters can be added to record only a subset
of the log records, write to different outputs, or set a custom
rotation interval. If all filters are removed from a stream,
then output is disabled for that stream.
Writers
Each filter has a writer. A writer defines the actual output
format for the information being logged. The default writer is
the ASCII writer, which produces tab-separated ASCII files. Other
writers are available, like for binary output or direct logging
into a database.
There are several different ways to customize Bro's logging: you can create
a new log stream, you can extend an existing log with new fields, you
can apply filters to an existing log stream, or you can customize the output
format by setting log writer options. All of these approaches are
described in this document.
Streams
=======
In order to log data to a new log stream, all of the following needs to be
done:
- A :bro:type:`record` type must be defined which consists of all the
fields that will be logged (by convention, the name of this record type is
usually "Info").
- A log stream ID (an :bro:type:`enum` with type name "Log::ID") must be
defined that uniquely identifies the new log stream.
- A log stream must be created using the :bro:id:`Log::create_stream` function.
- When the data to be logged becomes available, the :bro:id:`Log::write`
function must be called.
In the following example, we create a new module "Foo" which creates
a new log stream.
.. sourcecode:: bro
module Foo;
export {
# Create an ID for our new stream. By convention, this is
# called "LOG".
redef enum Log::ID += { LOG };
# Define the record type that will contain the data to log.
type Info: record {
ts: time &log;
id: conn_id &log;
service: string &log &optional;
missed_bytes: count &log &default=0;
};
}
# Optionally, we can add a new field to the connection record so that
# the data we are logging (our "Info" record) will be easily
# accessible in a variety of event handlers.
redef record connection += {
# By convention, the name of this new field is the lowercase name
# of the module.
foo: Info &optional;
};
# This event is handled at a priority higher than zero so that if
# users modify this stream in another script, they can do so at the
# default priority of zero.
event bro_init() &priority=5
{
# Create the stream. This adds a default filter automatically.
Log::create_stream(Foo::LOG, [$columns=Info, $path="foo"]);
}
In the definition of the "Info" record above, notice that each field has the
:bro:attr:`&log` attribute. Without this attribute, a field will not appear in
the log output. Also notice one field has the :bro:attr:`&optional` attribute.
This indicates that the field might not be assigned any value before the
log record is written. Finally, a field with the :bro:attr:`&default`
attribute has a default value assigned to it automatically.
At this point, the only thing missing is a call to the :bro:id:`Log::write`
function to send data to the logging framework. The actual event handler
where this should take place will depend on where your data becomes available.
In this example, the :bro:id:`connection_established` event provides our data,
and we also store a copy of the data being logged into the
:bro:type:`connection` record:
.. sourcecode:: bro
event connection_established(c: connection)
{
local rec: Foo::Info = [$ts=network_time(), $id=c$id];
# Store a copy of the data in the connection record so other
# event handlers can access it.
c$foo = rec;
Log::write(Foo::LOG, rec);
}
If you run Bro with this script, a new log file ``foo.log`` will be created.
Although we only specified four fields in the "Info" record above, the
log output will actually contain seven fields because one of the fields
(the one named "id") is itself a record type. Since a :bro:type:`conn_id`
record has four fields, then each of these fields is a separate column in
the log output. Note that the way that such fields are named in the log
output differs slightly from the way we would refer to the same field
in a Bro script (each dollar sign is replaced with a period). For example,
to access the first field of a ``conn_id`` in a Bro script we would use
the notation ``id$orig_h``, but that field is named ``id.orig_h``
in the log output.
When you are developing scripts that add data to the :bro:type:`connection`
record, care must be given to when and how long data is stored.
Normally data saved to the connection record will remain there for the
duration of the connection and from a practical perspective it's not
uncommon to need to delete that data before the end of the connection.
Add Fields to a Log
-------------------
You can add additional fields to a log by extending the record
type that defines its content, and setting a value for the new fields
before each log record is written.
Let's say we want to add a boolean field ``is_private`` to
:bro:type:`Conn::Info` that indicates whether the originator IP address
is part of the :rfc:`1918` space:
.. sourcecode:: bro
# Add a field to the connection log record.
redef record Conn::Info += {
## Indicate if the originator of the connection is part of the
## "private" address space defined in RFC1918.
is_private: bool &default=F &log;
};
As this example shows, when extending a log stream's "Info" record, each
new field must always be declared either with a ``&default`` value or
as ``&optional``. Furthermore, you need to add the ``&log`` attribute
or otherwise the field won't appear in the log file.
Now we need to set the field. Although the details vary depending on which
log is being extended, in general it is important to choose a suitable event
in which to set the additional fields because we need to make sure that
the fields are set before the log record is written. Sometimes the right
choice is the same event which writes the log record, but at a higher
priority (in order to ensure that the event handler that sets the additional
fields is executed before the event handler that writes the log record).
In this example, since a connection's summary is generated at
the time its state is removed from memory, we can add another handler
at that time that sets our field correctly:
.. sourcecode:: bro
event connection_state_remove(c: connection)
{
if ( c$id$orig_h in Site::private_address_space )
c$conn$is_private = T;
}
Now ``conn.log`` will show a new field ``is_private`` of type
``bool``. If you look at the Bro script which defines the connection
log stream :doc:`/scripts/base/protocols/conn/main.bro`, you will see
that ``Log::write`` gets called in an event handler for the
same event as used in this example to set the additional fields, but at a
lower priority than the one used in this example (i.e., the log record gets
written after we assign the ``is_private`` field).
For extending logs this way, one needs a bit of knowledge about how
the script that creates the log stream is organizing its state
keeping. Most of the standard Bro scripts attach their log state to
the :bro:type:`connection` record where it can then be accessed, just
like ``c$conn`` above. For example, the HTTP analysis adds a field
``http`` of type :bro:type:`HTTP::Info` to the :bro:type:`connection`
record.
Define a Logging Event
----------------------
Sometimes it is helpful to do additional analysis of the information
being logged. For these cases, a stream can specify an event that will
be generated every time a log record is written to it. To do this, we
need to modify the example module shown above to look something like this:
.. sourcecode:: bro
module Foo;
export {
redef enum Log::ID += { LOG };
type Info: record {
ts: time &log;
id: conn_id &log;
service: string &log &optional;
missed_bytes: count &log &default=0;
};
# Define a logging event. By convention, this is called
# "log_<stream>".
global log_foo: event(rec: Info);
}
event bro_init() &priority=5
{
# Specify the "log_foo" event here in order for Bro to raise it.
Log::create_stream(Foo::LOG, [$columns=Info, $ev=log_foo,
$path="foo"]);
}
All of Bro's default log streams define such an event. For example, the
connection log stream raises the event :bro:id:`Conn::log_conn`. You
could use that for example for flagging when a connection to a
specific destination exceeds a certain duration:
.. sourcecode:: bro
redef enum Notice::Type += {
## Indicates that a connection remained established longer
## than 5 minutes.
Long_Conn_Found
};
event Conn::log_conn(rec: Conn::Info)
{
if ( rec?$duration && rec$duration > 5mins )
NOTICE([$note=Long_Conn_Found,
$msg=fmt("unusually long conn to %s", rec$id$resp_h),
$id=rec$id]);
}
Often, these events can be an alternative to post-processing Bro logs
externally with Perl scripts. Much of what such an external script
would do later offline, one may instead do directly inside of Bro in
real-time.
Disable a Stream
----------------
One way to "turn off" a log is to completely disable the stream. For
example, the following example will prevent the conn.log from being written:
.. sourcecode:: bro
event bro_init()
{
Log::disable_stream(Conn::LOG);
}
Note that this must run after the stream is created, so the priority
of this event handler must be lower than the priority of the event handler
where the stream was created.
Filters
=======
A stream has one or more filters attached to it (a stream without any filters
will not produce any log output). When a stream is created, it automatically
gets a default filter attached to it. This default filter can be removed
or replaced, or other filters can be added to the stream. This is accomplished
by using either the :bro:id:`Log::add_filter` or :bro:id:`Log::remove_filter`
function. This section shows how to use filters to do such tasks as
rename a log file, split the output into multiple files, control which
records are written, and set a custom rotation interval.
Rename Log File
---------------
Normally, the log filename for a given log stream is determined when the
stream is created, unless you explicitly specify a different one by adding
a filter.
The easiest way to change a log filename is to simply replace the
default log filter with a new filter that specifies a value for the "path"
field. In this example, "conn.log" will be changed to "myconn.log":
.. sourcecode:: bro
event bro_init()
{
# Replace default filter for the Conn::LOG stream in order to
# change the log filename.
local f = Log::get_filter(Conn::LOG, "default");
f$path = "myconn";
Log::add_filter(Conn::LOG, f);
}
Keep in mind that the "path" field of a log filter never contains the
filename extension. The extension will be determined later by the log writer.
Add a New Log File
------------------
Normally, a log stream writes to only one log file. However, you can
add filters so that the stream writes to multiple files. This is useful
if you want to restrict the set of fields being logged to the new file.
In this example, a new filter is added to the Conn::LOG stream that writes
two fields to a new log file:
.. sourcecode:: bro
event bro_init()
{
# Add a new filter to the Conn::LOG stream that logs only
# timestamp and originator address.
local filter: Log::Filter = [$name="orig-only", $path="origs",
$include=set("ts", "id.orig_h")];
Log::add_filter(Conn::LOG, filter);
}
Notice how the "include" filter attribute specifies a set that limits the
fields to the ones given. The names correspond to those in the
:bro:type:`Conn::Info` record (however, because the "id" field is itself a
record, we can specify an individual field of "id" by the dot notation
shown in the example).
Using the code above, in addition to the regular ``conn.log``, you will
now also get a new log file ``origs.log`` that looks like the regular
``conn.log``, but will have only the fields specified in the "include"
filter attribute.
If you want to skip only some fields but keep the rest, there is a
corresponding ``exclude`` filter attribute that you can use instead of
``include`` to list only the ones you are not interested in.
If you want to make this the only log file for the stream, you can
remove the default filter:
.. sourcecode:: bro
event bro_init()
{
# Remove the filter called "default".
Log::remove_filter(Conn::LOG, "default");
}
Determine Log Path Dynamically
------------------------------
Instead of using the "path" filter attribute, a filter can determine
output paths *dynamically* based on the record being logged. That
allows, e.g., to record local and remote connections into separate
files. To do this, you define a function that returns the desired path,
and use the "path_func" filter attribute:
.. sourcecode:: bro
# Note: if using BroControl then you don't need to redef local_nets.
redef Site::local_nets = { 192.168.0.0/16 };
function myfunc(id: Log::ID, path: string, rec: Conn::Info) : string
{
# Return "conn-local" if originator is a local IP, otherwise
# return "conn-remote".
local r = Site::is_local_addr(rec$id$orig_h) ? "local" : "remote";
return fmt("%s-%s", path, r);
}
event bro_init()
{
local filter: Log::Filter = [$name="conn-split",
$path_func=myfunc, $include=set("ts", "id.orig_h")];
Log::add_filter(Conn::LOG, filter);
}
Running this will now produce two new files, ``conn-local.log`` and
``conn-remote.log``, with the corresponding entries (for this example to work,
the ``Site::local_nets`` must specify your local network). One could extend
this further for example to log information by subnets or even by IP
address. Be careful, however, as it is easy to create many files very
quickly.
The ``myfunc`` function has one drawback: it can be used
only with the :bro:enum:`Conn::LOG` stream as the record type is hardcoded
into its argument list. However, Bro allows to do a more generic
variant:
.. sourcecode:: bro
function myfunc(id: Log::ID, path: string,
rec: record { id: conn_id; } ) : string
{
local r = Site::is_local_addr(rec$id$orig_h) ? "local" : "remote";
return fmt("%s-%s", path, r);
}
This function can be used with all log streams that have records
containing an ``id: conn_id`` field.
Filter Log Records
------------------
We have seen how to customize the columns being logged, but
you can also control which records are written out by providing a
predicate that will be called for each log record:
.. sourcecode:: bro
function http_only(rec: Conn::Info) : bool
{
# Record only connections with successfully analyzed HTTP traffic
return rec?$service && rec$service == "http";
}
event bro_init()
{
local filter: Log::Filter = [$name="http-only", $path="conn-http",
$pred=http_only];
Log::add_filter(Conn::LOG, filter);
}
This will result in a new log file ``conn-http.log`` that contains only
the log records from ``conn.log`` that are analyzed as HTTP traffic.
Rotation
--------
The log rotation interval is globally controllable for all
filters by redefining the :bro:id:`Log::default_rotation_interval` option
(note that when using BroControl, this option is set automatically via
the BroControl configuration).
Or specifically for certain :bro:type:`Log::Filter` instances by setting
their ``interv`` field. Here's an example of changing just the
:bro:enum:`Conn::LOG` stream's default filter rotation.
.. sourcecode:: bro
event bro_init()
{
local f = Log::get_filter(Conn::LOG, "default");
f$interv = 1 min;
Log::add_filter(Conn::LOG, f);
}
Writers
=======
Each filter has a writer. If you do not specify a writer when adding a
filter to a stream, then the ASCII writer is the default.
There are two ways to specify a non-default writer. To change the default
writer for all log filters, just redefine the :bro:id:`Log::default_writer`
option. Alternatively, you can specify the writer to use on a per-filter
basis by setting a value for the filter's "writer" field. Consult the
documentation of the writer to use to see if there are other options that are
needed.
ASCII Writer
------------
By default, the ASCII writer outputs log files that begin with several
lines of metadata, followed by the actual log output. The metadata
describes the format of the log file, the "path" of the log (i.e., the log
filename without file extension), and also specifies the time that the log
was created and the time when Bro finished writing to it.
The ASCII writer has a number of options for customizing the format of its
output, see :doc:`/scripts/base/frameworks/logging/writers/ascii.bro`.
If you change the output format options, then be careful to check whether
your postprocessing scripts can still recognize your log files.
Some writer options are global (i.e., they affect all log filters using
that log writer). For example, to change the output format of all ASCII
logs to JSON format:
.. sourcecode:: bro
redef LogAscii::use_json = T;
Some writer options are filter-specific (i.e., they affect only the filters
that explicitly specify the option). For example, to change the output
format of the ``conn.log`` only:
.. sourcecode:: bro
event bro_init()
{
local f = Log::get_filter(Conn::LOG, "default");
# Use tab-separated-value mode
f$config = table(["tsv"] = "T");
Log::add_filter(Conn::LOG, f);
}
Other Writers
-------------
Bro supports the following additional built-in output formats:
.. toctree::
:maxdepth: 1
logging-input-sqlite
Additional writers are available as external plugins through the `Bro
Package Manager <https://packages.zeek.org>`_.

View file

@ -1,10 +0,0 @@
event NetControl::init()
{
local debug_plugin = NetControl::create_debug(T);
NetControl::activate(debug_plugin, 0);
}
event connection_established(c: connection)
{
NetControl::drop_connection(c$id, 20 secs);
}

View file

@ -1,10 +0,0 @@
event NetControl::init()
{
local skeleton_plugin = NetControl::create_skeleton("");
NetControl::activate(skeleton_plugin, 0);
}
event connection_established(c: connection)
{
NetControl::drop_connection(c$id, 20 secs);
}

View file

@ -1,16 +0,0 @@
@load protocols/ssh/detect-bruteforcing
redef SSH::password_guesses_limit=10;
event NetControl::init()
{
local debug_plugin = NetControl::create_debug(T);
NetControl::activate(debug_plugin, 0);
}
hook Notice::policy(n: Notice::Info)
{
if ( n$note == SSH::Password_Guessing )
NetControl::drop_address(n$src, 60min);
}

View file

@ -1,16 +0,0 @@
@load protocols/ssh/detect-bruteforcing
redef SSH::password_guesses_limit=10;
event NetControl::init()
{
local debug_plugin = NetControl::create_debug(T);
NetControl::activate(debug_plugin, 0);
}
hook Notice::policy(n: Notice::Info)
{
if ( n$note == SSH::Password_Guessing )
add n$actions[Notice::ACTION_DROP];
}

View file

@ -1,26 +0,0 @@
function our_drop_connection(c: conn_id, t: interval)
{
# As a first step, create the NetControl::Entity that we want to block
local e = NetControl::Entity($ty=NetControl::CONNECTION, $conn=c);
# Then, use the entity to create the rule to drop the entity in the forward path
local r = NetControl::Rule($ty=NetControl::DROP,
$target=NetControl::FORWARD, $entity=e, $expire=t);
# Add the rule
local id = NetControl::add_rule(r);
if ( id == "" )
print "Error while dropping";
}
event NetControl::init()
{
local debug_plugin = NetControl::create_debug(T);
NetControl::activate(debug_plugin, 0);
}
event connection_established(c: connection)
{
our_drop_connection(c$id, 20 secs);
}

Some files were not shown because too many files have changed in this diff Show more