Merge remote-tracking branch 'origin/topic/jsiwek/file-signatures'

* origin/topic/jsiwek/file-signatures:
  File type detection changes and fix https.log {orig,resp}_fuids fields.
  Various minor changes related to file mime type detection.
  Refactor common MIME magic matching code.
  Replace libmagic w/ Bro signatures for file MIME type identification.

Conflicts:
	scripts/base/init-default.bro
	testing/btest/Baseline/coverage.bare-load-baseline/canonified_loaded_scripts.log
	testing/btest/Baseline/coverage.default-load-baseline/canonified_loaded_scripts.log

BIT-1143 #merged
This commit is contained in:
Robin Sommer 2014-03-30 22:40:32 +02:00
commit 9efb549236
57 changed files with 4841 additions and 201 deletions

3
.gitmodules vendored
View file

@ -16,9 +16,6 @@
[submodule "cmake"] [submodule "cmake"]
path = cmake path = cmake
url = git://git.bro.org/cmake url = git://git.bro.org/cmake
[submodule "magic"]
path = magic
url = git://git.bro.org/bromagic
[submodule "src/3rdparty"] [submodule "src/3rdparty"]
path = src/3rdparty path = src/3rdparty
url = git://git.bro.org/bro-3rdparty url = git://git.bro.org/bro-3rdparty

60
CHANGES
View file

@ -1,4 +1,64 @@
2.2-302 | 2014-03-30 22:40:32 +0200
* Replace libmagic w/ Bro signatures for file MIME type
identification. Addresses BIT-1143. (Jon Siwek)
Includes:
- libmagic is no longer used at all. All MIME type detection is
done through new Bro signatures, and there's no longer a means
to get verbose file type descriptions. The majority of the
default file magic signatures are derived from the default magic
database of libmagic ~5.17.
- File magic signatures consist of two new constructs in the
signature rule parsing grammar: "file-magic" gives a regular
expression to match against, and "file-mime" gives the MIME type
string of content that matches the magic and an optional strength
value for the match.
- Modified signature/rule syntax for identifiers: they can no
longer start with a '-', which made for ambiguous syntax when
doing negative strength values in "file-mime". Also brought
syntax for Bro script identifiers in line with reality (they
can't start with numbers or include '-' at all).
- A new built-in function, "file_magic", can be used to get all
file magic matches and their corresponding strength against a
given chunk of data.
- The second parameter of the "identify_data" built-in function
can no longer be used to get verbose file type descriptions,
though it can still be used to get the strongest matching file
magic signature.
- The "file_transferred" event's "descr" parameter no longer
contains verbose file type descriptions.
- The BROMAGIC environment variable no longer changes any behavior
in Bro as magic databases are no longer used/installed.
- Removed "binary" and "octet-stream" mime type detections. They
don' provide any more information than an uninitialized
mime_type field which implicitly means no magic signature
matches and so the media type is unknown to Bro.
- The "fa_file" record now contains a "mime_types" field that
contains all magic signatures that matched the file content
(where the "mime_type" field is just a shortcut for the
strongest match).
- Reverted back to minimum requirement of CMake 2.6.3 from 2.8.0.
* The logic for adding file ids to {orig,resp}_fuids fields of the
http.log incorrectly depended on the state of
{orig,resp}_mime_types fields, so sometimes not all file ids
associated w/ the session were logged. (Jon Siwek)
* Fix MHR script's use of fa_file$mime_type before checking if it's
initialized. (Jon Siwek)
2.2-294 | 2014-03-30 22:08:25 +0200 2.2-294 | 2014-03-30 22:08:25 +0200
* TODO: x509 changes. (Bernhard Amann) * TODO: x509 changes. (Bernhard Amann)

View file

@ -1,5 +1,5 @@
project(Bro C CXX) project(Bro C CXX)
cmake_minimum_required(VERSION 2.8.0 FATAL_ERROR) cmake_minimum_required(VERSION 2.6.3 FATAL_ERROR)
include(cmake/CommonCMakeConfig.cmake) include(cmake/CommonCMakeConfig.cmake)
######################################################################## ########################################################################
@ -16,17 +16,12 @@ endif ()
get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH} get_filename_component(BRO_SCRIPT_INSTALL_PATH ${BRO_SCRIPT_INSTALL_PATH}
ABSOLUTE) ABSOLUTE)
set(BRO_MAGIC_INSTALL_PATH ${BRO_ROOT_DIR}/share/bro/magic)
set(BRO_MAGIC_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/magic/database)
configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev) configure_file(bro-path-dev.in ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.sh
"export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "export BROPATH=`${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"export BROMAGIC=\"${BRO_MAGIC_SOURCE_PATH}\"\n"
"export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "export PATH=\"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev.csh
"setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n" "setenv BROPATH `${CMAKE_CURRENT_BINARY_DIR}/bro-path-dev`\n"
"setenv BROMAGIC \"${BRO_MAGIC_SOURCE_PATH}\"\n"
"setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n") "setenv PATH \"${CMAKE_CURRENT_BINARY_DIR}/src\":$PATH\n")
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1) file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" VERSION LIMIT_COUNT 1)
@ -39,32 +34,6 @@ set(VERSION_MAJ_MIN "${VERSION_MAJOR}.${VERSION_MINOR}")
######################################################################## ########################################################################
## Dependency Configuration ## Dependency Configuration
include(ExternalProject)
# LOG_* options to ExternalProject_Add appear in CMake 2.8.3. If
# available, using them hides external project configure/build output.
if("${CMAKE_VERSION}" VERSION_GREATER 2.8.2)
set(EXTERNAL_PROJECT_LOG_OPTIONS
LOG_DOWNLOAD 1 LOG_UPDATE 1 LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1)
else()
set(EXTERNAL_PROJECT_LOG_OPTIONS)
endif()
set(LIBMAGIC_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libmagic-prefix)
set(LIBMAGIC_INCLUDE_DIR ${LIBMAGIC_PREFIX}/include)
set(LIBMAGIC_LIB_DIR ${LIBMAGIC_PREFIX}/lib)
set(LIBMAGIC_LIBRARY ${LIBMAGIC_LIB_DIR}/libmagic.a)
ExternalProject_Add(libmagic
PREFIX ${LIBMAGIC_PREFIX}
URL ${CMAKE_CURRENT_SOURCE_DIR}/src/3rdparty/file-5.17.tar.gz
CONFIGURE_COMMAND ./configure --enable-static --disable-shared
--prefix=${LIBMAGIC_PREFIX}
--includedir=${LIBMAGIC_INCLUDE_DIR}
--libdir=${LIBMAGIC_LIB_DIR}
BUILD_IN_SOURCE 1
${EXTERNAL_PROJECT_LOG_OPTIONS}
)
include(FindRequiredPackage) include(FindRequiredPackage)
# Check cache value first to avoid displaying "Found sed" messages everytime # Check cache value first to avoid displaying "Found sed" messages everytime
@ -107,7 +76,6 @@ include_directories(BEFORE
${OpenSSL_INCLUDE_DIR} ${OpenSSL_INCLUDE_DIR}
${BIND_INCLUDE_DIR} ${BIND_INCLUDE_DIR}
${BinPAC_INCLUDE_DIR} ${BinPAC_INCLUDE_DIR}
${LIBMAGIC_INCLUDE_DIR}
${ZLIB_INCLUDE_DIR} ${ZLIB_INCLUDE_DIR}
${JEMALLOC_INCLUDE_DIR} ${JEMALLOC_INCLUDE_DIR}
) )
@ -187,7 +155,6 @@ set(brodeps
${PCAP_LIBRARY} ${PCAP_LIBRARY}
${OpenSSL_LIBRARIES} ${OpenSSL_LIBRARIES}
${BIND_LIBRARY} ${BIND_LIBRARY}
${LIBMAGIC_LIBRARY}
${ZLIB_LIBRARY} ${ZLIB_LIBRARY}
${JEMALLOC_LIBRARIES} ${JEMALLOC_LIBRARIES}
${OPTLIBS} ${OPTLIBS}
@ -226,10 +193,6 @@ CheckOptionalBuildSources(aux/broctl Broctl INSTALL_BROCTL)
CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS) CheckOptionalBuildSources(aux/bro-aux Bro-Aux INSTALL_AUX_TOOLS)
CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI) CheckOptionalBuildSources(aux/broccoli Broccoli INSTALL_BROCCOLI)
install(DIRECTORY ./magic/database/
DESTINATION ${BRO_MAGIC_INSTALL_PATH}
)
######################################################################## ########################################################################
## Packaging Setup ## Packaging Setup

40
NEWS
View file

@ -15,7 +15,7 @@ Dependencies
- Bro no longer requires a pre-installed libmagic (because it now - Bro no longer requires a pre-installed libmagic (because it now
ships its own). ships its own).
- Compiling from source now needs a CMake version >= 2.8.0. - Libmagic is no longer a dependency.
New Functionality New Functionality
----------------- -----------------
@ -28,6 +28,19 @@ New Functionality
- The DNS analyzer now actually generates the dns_SRV_reply() event. - The DNS analyzer now actually generates the dns_SRV_reply() event.
It had been documented before, yet was never raised. It had been documented before, yet was never raised.
- Bro now uses "file magic signatures" to identify file types. These
are defined via two new constructs in the signature rule parsing
grammar: "file-magic" gives a regular expression to match against,
and "file-mime" gives the MIME type string of content that matches
the magic and an optional strength value for the match. (See also
"Changed Functionality" below for changes due to switching from
using libmagic to such wsignatures.)
- A new built-in function, "file_magic", can be used to get all file
magic matches and their corresponding strength against a given chunk
of data.
Changed Functionality Changed Functionality
--------------------- ---------------------
@ -52,6 +65,31 @@ Changed Functionality
- We have removed the packet sorter component. - We have removed the packet sorter component.
- Bro no longer uses libmagic to identify file types but instead now
comes with its own signature library (which initially is still
derived from libmagic;s database). This leads to a number of further
changes with regards to MIME types:
* The second parameter of the "identify_data" built-in function
can no longer be used to get verbose file type descriptions,
though it can still be used to get the strongest matching file
magic signature.
* The "file_transferred" event's "descr" parameter no longer
contains verbose file type descriptions.
* The BROMAGIC environment variable no longer changes any behavior
in Bro as magic databases are no longer used/installed.
* Removed "binary" and "octet-stream" mime type detections. They
don' provide any more information than an uninitialized
mime_type field.
* The "fa_file" record now contains a "mime_types" field that
contains all magic signatures that matched the file content
(where the "mime_type" field is just a shortcut for the
strongest match).
Bro 2.2 Bro 2.2
======= =======

View file

@ -1 +1 @@
2.2-294 2.2-302

View file

@ -14,8 +14,6 @@ if (NOT ${retval} EQUAL 0)
message(FATAL_ERROR "Problem setting BROPATH") message(FATAL_ERROR "Problem setting BROPATH")
endif () endif ()
set(BROMAGIC ${BRO_MAGIC_SOURCE_PATH})
# Configure the Sphinx config file (expand variables CMake might know about). # Configure the Sphinx config file (expand variables CMake might know about).
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in
${CMAKE_CURRENT_BINARY_DIR}/conf.py ${CMAKE_CURRENT_BINARY_DIR}/conf.py
@ -34,7 +32,6 @@ add_custom_target(sphinxdoc
${CMAKE_CURRENT_SOURCE_DIR}/ ${SPHINX_INPUT_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/ ${SPHINX_INPUT_DIR}
# Use Bro/Broxygen to dynamically generate reST for all Bro scripts. # Use Bro/Broxygen to dynamically generate reST for all Bro scripts.
COMMAND BROPATH=${BROPATH} COMMAND BROPATH=${BROPATH}
BROMAGIC=${BROMAGIC}
${CMAKE_BINARY_DIR}/src/bro ${CMAKE_BINARY_DIR}/src/bro
-X ${CMAKE_CURRENT_BINARY_DIR}/broxygen.conf -X ${CMAKE_CURRENT_BINARY_DIR}/broxygen.conf
broxygen >/dev/null broxygen >/dev/null

View file

@ -64,8 +64,8 @@ expect that signature file in the same directory as the Bro script. The
default extension of the file name is ``.sig``, and Bro appends that default extension of the file name is ``.sig``, and Bro appends that
automatically when necessary. automatically when necessary.
Signature language Signature Language for Network Traffic
================== ======================================
Let's look at the format of a signature more closely. Each individual Let's look at the format of a signature more closely. Each individual
signature has the format ``signature <id> { <attributes> }``. ``<id>`` signature has the format ``signature <id> { <attributes> }``. ``<id>``
@ -286,6 +286,44 @@ two actions defined:
connection (``"http"``, ``"ftp"``, etc.). This is used by Bro's connection (``"http"``, ``"ftp"``, etc.). This is used by Bro's
dynamic protocol detection to activate analyzers on the fly. dynamic protocol detection to activate analyzers on the fly.
Signature Language for File Content
===================================
The signature framework can also be used to identify MIME types of files
irrespective of the network protocol/connection over which the file is
transferred. A special type of signature can be written for this
purpose and will be used automatically by the :doc:`Files Framework
<file-analysis>` or by Bro scripts that use the :bro:see:`file_magic`
built-in function.
Conditions
----------
File signatures use a single type of content condition in the form of a
regular expression:
``file-magic /<regular expression>/``
This is analogous to the ``payload`` content condition for the network
traffic signature language described above. The difference is that
``payload`` signatures are applied to payloads of network connections,
but ``file-magic`` can be applied to any arbitrary data, it does not
have to be tied to a network protocol/connection.
Actions
-------
Upon matching a chunk of data, file signatures use the following action
to get information about that data's MIME type:
``file-mime <string> [, <integer>]``
The arguments include the MIME type string associated with the file
magic regular expression and an optional "strength" as a signed integer.
Since multiple file magic signatures may match against a given chunk of
data, the strength value may be used to help choose a "winner". Higher
values are considered stronger.
Things to keep in mind when writing signatures Things to keep in mind when writing signatures
============================================== ==============================================

View file

@ -35,7 +35,7 @@ before you begin:
To build Bro from source, the following additional dependencies are required: To build Bro from source, the following additional dependencies are required:
* CMake 2.8.0 or greater (http://www.cmake.org) * CMake 2.6.3 or greater (http://www.cmake.org)
* Make * Make
* C/C++ compiler * C/C++ compiler
* SWIG (http://www.swig.org) * SWIG (http://www.swig.org)

1
magic

@ -1 +0,0 @@
Subproject commit 99c6b89230e2b9b0e781c42b0b9412d2ab4e14b2

View file

@ -1 +1,2 @@
@load ./main.bro @load ./main.bro
@load ./magic

View file

@ -0,0 +1,2 @@
@load-sigs ./general
@load-sigs ./libmagic

View file

@ -0,0 +1,11 @@
# General purpose file magic signatures.
signature file-plaintext {
file-magic /([[:print:][:space:]]{10})/
file-mime "text/plain", -20
}
signature file-tar {
file-magic /([[:print:]\x00]){100}(([[:digit:]\x00\x20]){8}){3}/
file-mime "application/x-tar", 150
}

File diff suppressed because it is too large Load diff

View file

@ -65,10 +65,11 @@ export {
## A set of analysis types done during the file analysis. ## A set of analysis types done during the file analysis.
analyzers: set[string] &default=string_set() &log; analyzers: set[string] &default=string_set() &log;
## A mime type provided by libmagic against the *bof_buffer* ## A mime type provided by the strongest file magic signature
## field of :bro:see:`fa_file`, or in the cases where no ## match against the *bof_buffer* field of :bro:see:`fa_file`,
## buffering of the beginning of file occurs, an initial ## or in the cases where no buffering of the beginning of file
## guess of the mime type based on the first data seen. ## occurs, an initial guess of the mime type based on the first
## data seen.
mime_type: string &log &optional; mime_type: string &log &optional;
## A filename for the file if one is available from the source ## A filename for the file if one is available from the source

View file

@ -75,6 +75,23 @@ type addr_vec: vector of addr;
## directly and then remove this alias. ## directly and then remove this alias.
type table_string_of_string: table[string] of string; type table_string_of_string: table[string] of string;
## A structure indicating a MIME type and strength of a match against
## file magic signatures.
##
## :bro:see:`file_magic`
type mime_match: record {
strength: int; ##< How strongly the signature matched. Used for
##< prioritization when multiple file magic signatures
##< match.
mime: string; ##< The MIME type of the file magic signature match.
};
## A vector of file magic signature matches, ordered by strength of
## the signature, strongest first.
##
## :bro:see:`file_magic`
type mime_matches: vector of mime_match;
## A connection's transport-layer protocol. Note that Bro uses the term ## A connection's transport-layer protocol. Note that Bro uses the term
## "connection" broadly, using flow semantics for ICMP and UDP. ## "connection" broadly, using flow semantics for ICMP and UDP.
type transport_proto: enum { type transport_proto: enum {
@ -386,10 +403,15 @@ type fa_file: record {
## This is also the buffer that's used for file/mime type detection. ## This is also the buffer that's used for file/mime type detection.
bof_buffer: string &optional; bof_buffer: string &optional;
## A mime type provided by libmagic against the *bof_buffer*, or ## The mime type of the strongest file magic signature matches against
## in the cases where no buffering of the beginning of file occurs, ## the data chunk in *bof_buffer*, or in the cases where no buffering
## an initial guess of the mime type based on the first data seen. ## of the beginning of file occurs, an initial guess of the mime type
## based on the first data seen.
mime_type: string &optional; mime_type: string &optional;
## All mime types that matched file magic signatures against the data
## chunk in *bof_buffer*, in order of their strength value.
mime_types: mime_matches &optional;
} &redef; } &redef;
## Fields of a SYN packet. ## Fields of a SYN packet.

View file

@ -72,7 +72,7 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
if ( f$is_orig ) if ( f$is_orig )
{ {
if ( ! c$http?$orig_mime_types ) if ( ! c$http?$orig_fuids )
c$http$orig_fuids = string_vec(f$id); c$http$orig_fuids = string_vec(f$id);
else else
c$http$orig_fuids[|c$http$orig_fuids|] = f$id; c$http$orig_fuids[|c$http$orig_fuids|] = f$id;
@ -87,7 +87,7 @@ event file_over_new_connection(f: fa_file, c: connection, is_orig: bool) &priori
} }
else else
{ {
if ( ! c$http?$resp_mime_types ) if ( ! c$http?$resp_fuids )
c$http$resp_fuids = string_vec(f$id); c$http$resp_fuids = string_vec(f$id);
else else
c$http$resp_fuids[|c$http$resp_fuids|] = f$id; c$http$resp_fuids[|c$http$resp_fuids|] = f$id;

View file

@ -66,6 +66,6 @@ function do_mhr_lookup(hash: string, fi: Notice::FileInfo)
event file_hash(f: fa_file, kind: string, hash: string) event file_hash(f: fa_file, kind: string, hash: string)
{ {
if ( kind=="sha1" && match_file_types in f$mime_type ) if ( kind == "sha1" && f?$mime_type && match_file_types in f$mime_type )
do_mhr_lookup(hash, Notice::create_file_info(f)); do_mhr_lookup(hash, Notice::create_file_info(f));
} }

View file

@ -389,9 +389,6 @@ install(TARGETS bro DESTINATION bin)
set(BRO_EXE bro set(BRO_EXE bro
CACHE STRING "Bro executable binary" FORCE) CACHE STRING "Bro executable binary" FORCE)
# External libmagic project must be built before bro.
add_dependencies(bro libmagic)
# Target to create all the autogenerated files. # Target to create all the autogenerated files.
add_custom_target(generate_outputs_stage1) add_custom_target(generate_outputs_stage1)
add_dependencies(generate_outputs_stage1 ${bro_ALL_GENERATED_OUTPUTS}) add_dependencies(generate_outputs_stage1 ${bro_ALL_GENERATED_OUTPUTS})

View file

@ -20,6 +20,8 @@ TableType* string_set;
TableType* string_array; TableType* string_array;
TableType* count_set; TableType* count_set;
VectorType* string_vec; VectorType* string_vec;
VectorType* mime_matches;
RecordType* mime_match;
int watchdog_interval; int watchdog_interval;
@ -327,6 +329,8 @@ void init_net_var()
string_set = internal_type("string_set")->AsTableType(); string_set = internal_type("string_set")->AsTableType();
string_array = internal_type("string_array")->AsTableType(); string_array = internal_type("string_array")->AsTableType();
string_vec = internal_type("string_vec")->AsVectorType(); string_vec = internal_type("string_vec")->AsVectorType();
mime_match = internal_type("mime_match")->AsRecordType();
mime_matches = internal_type("mime_matches")->AsVectorType();
ignore_checksums = opt_internal_int("ignore_checksums"); ignore_checksums = opt_internal_int("ignore_checksums");
partial_connection_ok = opt_internal_int("partial_connection_ok"); partial_connection_ok = opt_internal_int("partial_connection_ok");

View file

@ -23,6 +23,8 @@ extern TableType* string_set;
extern TableType* string_array; extern TableType* string_array;
extern TableType* count_set; extern TableType* count_set;
extern VectorType* string_vec; extern VectorType* string_vec;
extern VectorType* mime_matches;
extern RecordType* mime_match;
extern int watchdog_interval; extern int watchdog_interval;

View file

@ -38,7 +38,7 @@ Rule::~Rule()
const char* Rule::TypeToString(Rule::PatternType type) const char* Rule::TypeToString(Rule::PatternType type)
{ {
static const char* labels[] = { static const char* labels[] = {
"Payload", "HTTP-REQUEST", "HTTP-REQUEST-BODY", "File Magic", "Payload", "HTTP-REQUEST", "HTTP-REQUEST-BODY",
"HTTP-REQUEST-HEADER", "HTTP-REPLY-BODY", "HTTP-REQUEST-HEADER", "HTTP-REPLY-BODY",
"HTTP-REPLY-HEADER", "FTP", "Finger", "HTTP-REPLY-HEADER", "FTP", "Finger",
}; };

View file

@ -37,7 +37,7 @@ public:
unsigned int Index() const { return idx; } unsigned int Index() const { return idx; }
enum PatternType { enum PatternType {
PAYLOAD, HTTP_REQUEST, HTTP_REQUEST_BODY, HTTP_REQUEST_HEADER, FILE_MAGIC, PAYLOAD, HTTP_REQUEST, HTTP_REQUEST_BODY, HTTP_REQUEST_HEADER,
HTTP_REPLY_BODY, HTTP_REPLY_HEADER, FTP, FINGER, TYPES, HTTP_REPLY_BODY, HTTP_REPLY_HEADER, FTP, FINGER, TYPES,
}; };

View file

@ -35,6 +35,11 @@ void RuleActionEvent::PrintDebug()
fprintf(stderr, " RuleActionEvent: |%s|\n", msg); fprintf(stderr, " RuleActionEvent: |%s|\n", msg);
} }
void RuleActionMIME::PrintDebug()
{
fprintf(stderr, " RuleActionMIME: |%s|\n", mime);
}
RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer) RuleActionAnalyzer::RuleActionAnalyzer(const char* arg_analyzer)
{ {
string str(arg_analyzer); string str(arg_analyzer);

View file

@ -36,6 +36,31 @@ private:
const char* msg; const char* msg;
}; };
class RuleActionMIME : public RuleAction {
public:
RuleActionMIME(const char* arg_mime, int arg_strength = 0)
{ mime = copy_string(arg_mime); strength = arg_strength; }
virtual ~RuleActionMIME()
{ delete [] mime; }
virtual void DoAction(const Rule* parent, RuleEndpointState* state,
const u_char* data, int len)
{ }
virtual void PrintDebug();
string GetMIME() const
{ return mime; }
int GetStrength() const
{ return strength; }
private:
const char* mime;
int strength;
};
// Base class for enable/disable actions. // Base class for enable/disable actions.
class RuleActionAnalyzer : public RuleAction { class RuleActionAnalyzer : public RuleAction {
public: public:

View file

@ -186,6 +186,15 @@ RuleEndpointState::~RuleEndpointState()
delete matched_text[j]; delete matched_text[j];
} }
RuleFileMagicState::~RuleFileMagicState()
{
loop_over_list(matchers, i)
{
delete matchers[i]->state;
delete matchers[i];
}
}
RuleMatcher::RuleMatcher(int arg_RE_level) RuleMatcher::RuleMatcher(int arg_RE_level)
{ {
root = new RuleHdrTest(RuleHdrTest::NOPROT, 0, 0, RuleHdrTest::EQ, root = new RuleHdrTest(RuleHdrTest::NOPROT, 0, 0, RuleHdrTest::EQ,
@ -564,6 +573,127 @@ static inline bool compare(const vector<IPPrefix>& prefixes, const IPAddr& a,
return false; return false;
} }
RuleFileMagicState* RuleMatcher::InitFileMagic() const
{
RuleFileMagicState* state = new RuleFileMagicState();
if ( rule_bench == 3 )
return state;
loop_over_list(root->psets[Rule::FILE_MAGIC], i)
{
RuleHdrTest::PatternSet* set = root->psets[Rule::FILE_MAGIC][i];
assert(set->re);
RuleFileMagicState::Matcher* m = new RuleFileMagicState::Matcher;
m->state = new RE_Match_State(set->re);
state->matchers.append(m);
}
// Save some memory.
state->matchers.resize(0);
return state;
}
RuleMatcher::MIME_Matches* RuleMatcher::Match(RuleFileMagicState* state,
const u_char* data, uint64 len,
MIME_Matches* rval) const
{
if ( ! rval )
rval = new MIME_Matches();
if ( ! state )
{
reporter->Warning("RuleFileMagicState not initialized yet.");
return rval;
}
if ( rule_bench >= 2 )
return rval;
#ifdef DEBUG
if ( debug_logger.IsEnabled(DBG_RULES) )
{
const char* s = fmt_bytes(reinterpret_cast<const char*>(data),
min(40, static_cast<int>(len)));
DBG_LOG(DBG_RULES, "Matching %s rules on |%s%s|",
Rule::TypeToString(Rule::FILE_MAGIC), s,
len > 40 ? "..." : "");
}
#endif
bool newmatch = false;
loop_over_list(state->matchers, x)
{
RuleFileMagicState::Matcher* m = state->matchers[x];
if ( m->state->Match(data, len, true, false, true) )
newmatch = true;
}
if ( ! newmatch )
return rval;
DBG_LOG(DBG_RULES, "New pattern match found");
AcceptingSet accepted;
int_list matchpos;
loop_over_list(state->matchers, y)
{
RuleFileMagicState::Matcher* m = state->matchers[y];
const AcceptingSet* ac = m->state->Accepted();
loop_over_list(*ac, k)
{
if ( ! accepted.is_member((*ac)[k]) )
{
accepted.append((*ac)[k]);
matchpos.append((*m->state->MatchPositions())[k]);
}
}
}
// Find rules for which patterns have matched.
rule_list matched;
loop_over_list(accepted, i)
{
Rule* r = Rule::rule_table[accepted[i] - 1];
DBG_LOG(DBG_RULES, "Checking rule: %v", r->id);
loop_over_list(r->patterns, j)
{
if ( ! accepted.is_member(r->patterns[j]->id) )
continue;
if ( (unsigned int) matchpos[i] >
r->patterns[j]->offset + r->patterns[j]->depth )
continue;
DBG_LOG(DBG_RULES, "All patterns of rule satisfied");
}
if ( ! matched.is_member(r) )
matched.append(r);
}
loop_over_list(matched, j)
{
Rule* r = matched[j];
loop_over_list(r->actions, rai)
{
const RuleActionMIME* ram = dynamic_cast<const RuleActionMIME*>(r->actions[rai]);
set<string>& ss = (*rval)[ram->GetStrength()];
ss.insert(ram->GetMIME());
}
}
return rval;
}
RuleEndpointState* RuleMatcher::InitEndpoint(analyzer::Analyzer* analyzer, RuleEndpointState* RuleMatcher::InitEndpoint(analyzer::Analyzer* analyzer,
const IP_Hdr* ip, int caplen, const IP_Hdr* ip, int caplen,
RuleEndpointState* opposite, RuleEndpointState* opposite,
@ -1010,6 +1140,15 @@ void RuleMatcher::ClearEndpointState(RuleEndpointState* state)
state->matchers[j]->state->Clear(); state->matchers[j]->state->Clear();
} }
void RuleMatcher::ClearFileMagicState(RuleFileMagicState* state) const
{
if ( rule_bench == 3 )
return;
loop_over_list(state->matchers, j)
state->matchers[j]->state->Clear();
}
void RuleMatcher::PrintDebug() void RuleMatcher::PrintDebug()
{ {
loop_over_list(rules, i) loop_over_list(rules, i)

View file

@ -3,6 +3,10 @@
#include <limits.h> #include <limits.h>
#include <vector> #include <vector>
#include <map>
#include <functional>
#include <set>
#include <string>
#include "IPAddr.h" #include "IPAddr.h"
#include "BroString.h" #include "BroString.h"
@ -191,6 +195,30 @@ private:
int_list matched_rules; // Rules for which all conditions have matched int_list matched_rules; // Rules for which all conditions have matched
}; };
/**
* A state object used for matching file magic signatures.
*/
class RuleFileMagicState {
friend class RuleMatcher;
public:
~RuleFileMagicState();
private:
// Ctor is private; use RuleMatcher::InitFileMagic() for
// instantiation.
RuleFileMagicState()
{ }
struct Matcher {
RE_Match_State* state;
};
declare(PList, Matcher);
typedef PList(Matcher) matcher_list;
matcher_list matchers;
};
// RuleMatcher is the main class which builds up the data structures // RuleMatcher is the main class which builds up the data structures
// and performs the actual matching. // and performs the actual matching.
@ -205,6 +233,42 @@ public:
// Parse the given files and built up data structures. // Parse the given files and built up data structures.
bool ReadFiles(const name_list& files); bool ReadFiles(const name_list& files);
/**
* Inititialize a state object for matching file magic signatures.
* @return A state object that can be used for file magic mime type
* identification.
*/
RuleFileMagicState* InitFileMagic() const;
/**
* Data structure containing a set of matching file magic signatures.
* Ordered from greatest to least strength. Matches of the same strength
* will be in the set in lexicographic order of the MIME type string.
*/
typedef map<int, set<string>, std::greater<int> > MIME_Matches;
/**
* Matches a chunk of data against file magic signatures.
* @param state A state object previously returned from
* RuleMatcher::InitFileMagic()
* @param data Chunk of data to match signatures against.
* @param len Length of \a data in bytes.
* @param matches An optional pre-existing match result object to
* modify with additional matches. If it's a null
* pointer, one will be instantiated and returned from
* this method.
* @return The results of the signature matching.
*/
MIME_Matches* Match(RuleFileMagicState* state, const u_char* data,
uint64 len, MIME_Matches* matches = 0) const;
/**
* Resets a state object used with matching file magic signatures.
* @param state The state object to reset to an initial condition.
*/
void ClearFileMagicState(RuleFileMagicState* state) const;
// Initialize the matching state for a endpoind of a connection based on // Initialize the matching state for a endpoind of a connection based on
// the given packet (which should be the first packet encountered for // the given packet (which should be the first packet encountered for
// this endpoint). If the matching is triggered by an PIA, a pointer to // this endpoint). If the matching is triggered by an PIA, a pointer to

View file

@ -3,6 +3,7 @@
#include "File.h" #include "File.h"
#include "file_analysis/Manager.h" #include "file_analysis/Manager.h"
#include "RuleMatcher.h"
#include "Reporter.h" #include "Reporter.h"
#include "util.h" #include "util.h"
@ -48,14 +49,19 @@ void File_Analyzer::Done()
void File_Analyzer::Identify() void File_Analyzer::Identify()
{ {
const char* desc = bro_magic_buffer(magic_desc_cookie, buffer, buffer_len); RuleFileMagicState* fms = rule_matcher->InitFileMagic();
const char* mime = bro_magic_buffer(magic_mime_cookie, buffer, buffer_len); RuleMatcher::MIME_Matches matches;
rule_matcher->Match(fms, reinterpret_cast<const u_char*>(buffer),
buffer_len, &matches);
string match = matches.empty() ? "<unknown>"
: *(matches.begin()->second.begin());
val_list* vl = new val_list; val_list* vl = new val_list;
vl->append(BuildConnVal()); vl->append(BuildConnVal());
vl->append(new StringVal(buffer_len, buffer)); vl->append(new StringVal(buffer_len, buffer));
vl->append(new StringVal(desc ? desc : "<unknown>")); vl->append(new StringVal("<unknown>"));
vl->append(new StringVal(mime ? mime : "<unknown>")); vl->append(new StringVal(match));
ConnectionEvent(file_transferred, vl); ConnectionEvent(file_transferred, vl);
} }

View file

@ -1,3 +1,12 @@
## TODO. ## Generated when a TCP connection associated w/ file data transfer is seen
## (e.g. as happens w/ FTP or IRC).
## ##
## c: The connection over which file data is transferred.
##
## prefix: Up to 1024 bytes of the file data.
##
## descr: Deprecated/unused argument.
##
## mime_type: MIME type of the file or "<unknown>" if no file magic signatures
## matched.
event file_transferred%(c: connection, prefix: string, descr: string, mime_type: string%); event file_transferred%(c: connection, prefix: string, descr: string, mime_type: string%);

View file

@ -835,30 +835,44 @@ function syslog%(s: string%): any
return 0; return 0;
%} %}
%%{ ## Determines the MIME type of a piece of data using Bro's file magic
extern "C" { ## signatures.
#include <magic.h>
}
%%}
## Determines the MIME type of a piece of data using ``libmagic``.
## ##
## data: The data to find the MIME type for. ## data: The data to find the MIME type for.
## ##
## return_mime: If true, the function returns a short MIME type string (e.g., ## return_mime: Deprecated argument; does nothing, except emit a warning
## ``text/plain`` instead of a more elaborate textual description). ## when false.
## ##
## Returns: The MIME type of *data*, or "<unknown>" if there was an error. ## Returns: The MIME type of *data*, or "<unknown>" if there was an error
function identify_data%(data: string, return_mime: bool%): string ## or no match. This is the strongest signature match.
##
## .. bro:see:: file_magic
function identify_data%(data: string, return_mime: bool &default=T%): string
%{ %{
magic_t* magic = return_mime ? &magic_mime_cookie : &magic_desc_cookie; if ( ! return_mime )
reporter->Warning("identify_data() builtin-function only returns MIME types, but verbose file info requested");
if( ! *magic ) string strongest_match = file_mgr->DetectMIME(data->Bytes(), data->Len());
if ( strongest_match.empty() )
return new StringVal("<unknown>"); return new StringVal("<unknown>");
const char* desc = bro_magic_buffer(*magic, data->Bytes(), data->Len()); return new StringVal(strongest_match);
%}
return new StringVal(desc ? desc : "<unknown>"); ## Determines the MIME type of a piece of data using Bro's file magic
## signatures.
##
## data: The data for which to find matching MIME types.
##
## Returns: All matching signatures, in order of strength.
##
## .. bro:see:: identify_data
function file_magic%(data: string%): mime_matches
%{
RuleMatcher::MIME_Matches matches;
file_mgr->DetectMIME(data->Bytes(), data->Len(), &matches);
return file_analysis::GenMIMEMatchesVal(matches);
%} %}
## Performs an entropy test on the given data. ## Performs an entropy test on the given data.

View file

@ -10,6 +10,7 @@
#include "Val.h" #include "Val.h"
#include "Type.h" #include "Type.h"
#include "Event.h" #include "Event.h"
#include "RuleMatcher.h"
#include "analyzer/Analyzer.h" #include "analyzer/Analyzer.h"
#include "analyzer/Manager.h" #include "analyzer/Manager.h"
@ -52,6 +53,7 @@ int File::timeout_interval_idx = -1;
int File::bof_buffer_size_idx = -1; int File::bof_buffer_size_idx = -1;
int File::bof_buffer_idx = -1; int File::bof_buffer_idx = -1;
int File::mime_type_idx = -1; int File::mime_type_idx = -1;
int File::mime_types_idx = -1;
void File::StaticInit() void File::StaticInit()
{ {
@ -72,6 +74,7 @@ void File::StaticInit()
bof_buffer_size_idx = Idx("bof_buffer_size"); bof_buffer_size_idx = Idx("bof_buffer_size");
bof_buffer_idx = Idx("bof_buffer"); bof_buffer_idx = Idx("bof_buffer");
mime_type_idx = Idx("mime_type"); mime_type_idx = Idx("mime_type");
mime_types_idx = Idx("mime_types");
} }
File::File(const string& file_id, Connection* conn, analyzer::Tag tag, File::File(const string& file_id, Connection* conn, analyzer::Tag tag,
@ -279,20 +282,17 @@ bool File::BufferBOF(const u_char* data, uint64 len)
bool File::DetectMIME(const u_char* data, uint64 len) bool File::DetectMIME(const u_char* data, uint64 len)
{ {
const char* mime = bro_magic_buffer(magic_mime_cookie, data, len); RuleMatcher::MIME_Matches matches;
file_mgr->DetectMIME(data, len, &matches);
if ( mime ) if ( matches.empty() )
{ return false;
const char* mime_end = strchr(mime, ';');
if ( mime_end ) val->Assign(mime_type_idx,
// strip off charset new StringVal(*(matches.begin()->second.begin())));
val->Assign(mime_type_idx, new StringVal(mime_end - mime, mime)); val->Assign(mime_types_idx, file_analysis::GenMIMEMatchesVal(matches));
else
val->Assign(mime_type_idx, new StringVal(mime));
}
return mime; return true;
} }
void File::ReplayBOF() void File::ReplayBOF()

View file

@ -225,11 +225,12 @@ protected:
void ReplayBOF(); void ReplayBOF();
/** /**
* Does mime type detection and assigns type (if available) to \c mime_type * Does mime type detection via file magic signatures and assigns
* strongest matching mime type (if available) to \c mime_type
* field in #val. * field in #val.
* @param data pointer to a chunk of file data. * @param data pointer to a chunk of file data.
* @param len number of bytes in the data chunk. * @param len number of bytes in the data chunk.
* @return whether mime type was available. * @return whether a mime type match was found.
*/ */
bool DetectMIME(const u_char* data, uint64 len); bool DetectMIME(const u_char* data, uint64 len);
@ -282,6 +283,7 @@ private:
static int bof_buffer_size_idx; static int bof_buffer_size_idx;
static int bof_buffer_idx; static int bof_buffer_idx;
static int mime_type_idx; static int mime_type_idx;
static int mime_types_idx;
}; };
} // namespace file_analysis } // namespace file_analysis

View file

@ -20,13 +20,15 @@ string Manager::salt;
Manager::Manager() Manager::Manager()
: plugin::ComponentManager<file_analysis::Tag, : plugin::ComponentManager<file_analysis::Tag,
file_analysis::Component>("Files") file_analysis::Component>("Files"),
id_map(), ignored(), current_file_id(), magic_state()
{ {
} }
Manager::~Manager() Manager::~Manager()
{ {
Terminate(); Terminate();
delete magic_state;
} }
void Manager::InitPreScript() void Manager::InitPreScript()
@ -42,6 +44,12 @@ void Manager::InitPostScript()
{ {
} }
void Manager::InitMagic()
{
delete magic_state;
magic_state = rule_matcher->InitFileMagic();
}
void Manager::Terminate() void Manager::Terminate()
{ {
vector<string> keys; vector<string> keys;
@ -395,3 +403,47 @@ Analyzer* Manager::InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const
return c->Factory()(args, f); return c->Factory()(args, f);
} }
RuleMatcher::MIME_Matches* Manager::DetectMIME(const u_char* data, uint64 len,
RuleMatcher::MIME_Matches* rval) const
{
if ( ! magic_state )
reporter->InternalError("file magic signature state not initialized");
rval = rule_matcher->Match(magic_state, data, len, rval);
rule_matcher->ClearFileMagicState(magic_state);
return rval;
}
string Manager::DetectMIME(const u_char* data, uint64 len) const
{
RuleMatcher::MIME_Matches matches;
DetectMIME(data, len, &matches);
if ( matches.empty() )
return "";
return *(matches.begin()->second.begin());
}
VectorVal* file_analysis::GenMIMEMatchesVal(const RuleMatcher::MIME_Matches& m)
{
VectorVal* rval = new VectorVal(mime_matches);
for ( RuleMatcher::MIME_Matches::const_iterator it = m.begin();
it != m.end(); ++it )
{
RecordVal* element = new RecordVal(mime_match);
for ( set<string>::const_iterator it2 = it->second.begin();
it2 != it->second.end(); ++it2 )
{
element->Assign(0, new Val(it->first, TYPE_INT));
element->Assign(1, new StringVal(*it2));
}
rval->Assign(rval->Size(), element);
}
return rval;
}

View file

@ -14,6 +14,7 @@
#include "Analyzer.h" #include "Analyzer.h"
#include "Timer.h" #include "Timer.h"
#include "EventHandler.h" #include "EventHandler.h"
#include "RuleMatcher.h"
#include "File.h" #include "File.h"
#include "FileTimer.h" #include "FileTimer.h"
@ -54,6 +55,12 @@ public:
*/ */
void InitPostScript(); void InitPostScript();
/**
* Initializes the state required to match against file magic signatures
* for MIME type identification.
*/
void InitMagic();
/** /**
* Times out any active file analysis to prepare for shutdown. * Times out any active file analysis to prepare for shutdown.
*/ */
@ -255,6 +262,29 @@ public:
*/ */
Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const; Analyzer* InstantiateAnalyzer(Tag tag, RecordVal* args, File* f) const;
/**
* Returns a set of all matching MIME magic signatures for a given
* chunk of data.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @param rval An optional pre-existing structure in which to insert
* new matches. If it's a null pointer, an object is
* allocated and returned from the method.
* @return Set of all matching file magic signatures, which may be
* an object allocated by the method if \a rval is a null pointer.
*/
RuleMatcher::MIME_Matches* DetectMIME(const u_char* data, uint64 len,
RuleMatcher::MIME_Matches* rval) const;
/**
* Returns the strongest MIME magic signature match for a given data chunk.
* @param data A chunk of bytes to match magic MIME signatures against.
* @param len The number of bytes in \a data.
* @returns The MIME type string of the strongest file magic signature
* match, or an empty string if nothing matched.
*/
std::string DetectMIME(const u_char* data, uint64 len) const;
protected: protected:
friend class FileTimer; friend class FileTimer;
@ -334,11 +364,18 @@ private:
IDMap id_map; /**< Map file ID to file_analysis::File records. */ IDMap id_map; /**< Map file ID to file_analysis::File records. */
IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */ IDSet ignored; /**< Ignored files. Will be finally removed on EOF. */
string current_file_id; /**< Hash of what get_file_handle event sets. */ string current_file_id; /**< Hash of what get_file_handle event sets. */
RuleFileMagicState* magic_state; /**< File magic signature match state. */
static TableVal* disabled; /**< Table of disabled analyzers. */ static TableVal* disabled; /**< Table of disabled analyzers. */
static string salt; /**< A salt added to file handles before hashing. */ static string salt; /**< A salt added to file handles before hashing. */
}; };
/**
* Returns a script-layer value corresponding to the \c mime_matches type.
* @param m The MIME match information with which to populate the value.
*/
VectorVal* GenMIMEMatchesVal(const RuleMatcher::MIME_Matches& m);
} // namespace file_analysis } // namespace file_analysis
extern file_analysis::Manager* file_mgr; extern file_analysis::Manager* file_mgr;

View file

@ -23,7 +23,6 @@ extern "C" {
#endif #endif
#include <openssl/md5.h> #include <openssl/md5.h>
#include <magic.h>
extern "C" void OPENSSL_add_all_algorithms_conf(void); extern "C" void OPENSSL_add_all_algorithms_conf(void);
@ -69,9 +68,6 @@ extern "C" void OPENSSL_add_all_algorithms_conf(void);
Brofiler brofiler; Brofiler brofiler;
magic_t magic_desc_cookie = 0;
magic_t magic_mime_cookie = 0;
#ifndef HAVE_STRSEP #ifndef HAVE_STRSEP
extern "C" { extern "C" {
char* strsep(char**, const char*); char* strsep(char**, const char*);
@ -220,7 +216,6 @@ void usage()
#endif #endif
fprintf(stderr, " $BROPATH | file search path (%s)\n", bro_path()); fprintf(stderr, " $BROPATH | file search path (%s)\n", bro_path());
fprintf(stderr, " $BROMAGIC | libmagic mime magic database search path (%s)\n", bro_magic_path());
fprintf(stderr, " $BRO_PREFIXES | prefix list (%s)\n", bro_prefixes().c_str()); fprintf(stderr, " $BRO_PREFIXES | prefix list (%s)\n", bro_prefixes().c_str());
fprintf(stderr, " $BRO_DNS_FAKE | disable DNS lookups (%s)\n", bro_dns_fake()); fprintf(stderr, " $BRO_DNS_FAKE | disable DNS lookups (%s)\n", bro_dns_fake());
fprintf(stderr, " $BRO_SEED_FILE | file to load seeds from (not set)\n"); fprintf(stderr, " $BRO_SEED_FILE | file to load seeds from (not set)\n");
@ -786,9 +781,6 @@ int main(int argc, char** argv)
curl_global_init(CURL_GLOBAL_ALL); curl_global_init(CURL_GLOBAL_ALL);
#endif #endif
bro_init_magic(&magic_desc_cookie, MAGIC_NONE);
bro_init_magic(&magic_mime_cookie, MAGIC_MIME);
int r = sqlite3_initialize(); int r = sqlite3_initialize();
if ( r != SQLITE_OK ) if ( r != SQLITE_OK )
@ -956,6 +948,8 @@ int main(int argc, char** argv)
if ( rule_debug ) if ( rule_debug )
rule_matcher->PrintDebug(); rule_matcher->PrintDebug();
file_mgr->InitMagic();
} }
delete [] script_rule_files; delete [] script_rule_files;

View file

@ -34,6 +34,7 @@ static uint8_t mask_to_len(uint32_t mask)
%token TOK_ENABLE %token TOK_ENABLE
%token TOK_EVAL %token TOK_EVAL
%token TOK_EVENT %token TOK_EVENT
%token TOK_MIME
%token TOK_HEADER %token TOK_HEADER
%token TOK_IDENT %token TOK_IDENT
%token TOK_INT %token TOK_INT
@ -61,9 +62,9 @@ static uint8_t mask_to_len(uint32_t mask)
%type <str> TOK_STRING TOK_IDENT TOK_POLICY_SYMBOL TOK_PATTERN pattern string %type <str> TOK_STRING TOK_IDENT TOK_POLICY_SYMBOL TOK_PATTERN pattern string
%type <val> TOK_INT TOK_TCP_STATE_SYM TOK_IP_OPTION_SYM TOK_COMP %type <val> TOK_INT TOK_TCP_STATE_SYM TOK_IP_OPTION_SYM TOK_COMP
%type <val> integer ipoption_list tcpstate_list %type <val> integer ipoption_list tcpstate_list opt_strength
%type <rule> rule %type <rule> rule
%type <bl> TOK_BOOL %type <bl> TOK_BOOL opt_negate
%type <hdr_test> hdr_expr %type <hdr_test> hdr_expr
%type <range> range rangeopt %type <range> range rangeopt
%type <vallist> value_list %type <vallist> value_list
@ -186,6 +187,9 @@ rule_attr:
| TOK_EVENT string | TOK_EVENT string
{ current_rule->AddAction(new RuleActionEvent($2)); } { current_rule->AddAction(new RuleActionEvent($2)); }
| TOK_MIME string opt_strength
{ current_rule->AddAction(new RuleActionMIME($2, $3)); }
| TOK_ENABLE TOK_STRING | TOK_ENABLE TOK_STRING
{ current_rule->AddAction(new RuleActionEnable($2)); } { current_rule->AddAction(new RuleActionEnable($2)); }
@ -359,6 +363,20 @@ integer:
{ $$ = id_to_uint($1); } { $$ = id_to_uint($1); }
; ;
opt_negate:
'-'
{ $$ = true; }
|
{ $$ = false; }
;
opt_strength:
',' opt_negate TOK_INT
{ $$ = $2 ? -$3 : $3; }
|
{ $$ = 0; }
;
string: string:
TOK_STRING TOK_STRING
{ $$ = $1; } { $$ = $1; }

View file

@ -21,11 +21,13 @@ D [0-9]+
H [0-9a-fA-F]+ H [0-9a-fA-F]+
HEX {H} HEX {H}
STRING \"([^\n\"]|\\\")*\" STRING \"([^\n\"]|\\\")*\"
ID ([0-9a-zA-Z_-]+::)*[0-9a-zA-Z_-]+ IDCOMPONENT [0-9a-zA-Z_][0-9a-zA-Z_-]*
ID {IDCOMPONENT}(::{IDCOMPONENT})*
IP6 ("["({HEX}:){7}{HEX}"]")|("["0x{HEX}({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*({D}"."){3}{D}"]") IP6 ("["({HEX}:){7}{HEX}"]")|("["0x{HEX}({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*"]")|("["({HEX}|:)*"::"({HEX}|:)*({D}"."){3}{D}"]")
RE \/(\\\/)?([^/]|[^\\]\\\/)*\/ RE \/(\\\/)?([^/]|[^\\]\\\/)*\/
META \.[^ \t]+{WS}[^\n]+ META \.[^ \t]+{WS}[^\n]+
PID ([0-9a-zA-Z_-]|"::")+ PIDCOMPONENT [A-Za-z_][A-Za-z_0-9]*
PID {PIDCOMPONENT}(::{PIDCOMPONENT})*
%option nounput nodefault %option nounput nodefault
@ -50,7 +52,7 @@ PID ([0-9a-zA-Z_-]|"::")+
return TOK_IP6; return TOK_IP6;
} }
[!\]\[{}&:,] return rules_text[0]; [!\]\[{}&:,-] return rules_text[0];
"<=" { rules_lval.val = RuleHdrTest::LE; return TOK_COMP; } "<=" { rules_lval.val = RuleHdrTest::LE; return TOK_COMP; }
">=" { rules_lval.val = RuleHdrTest::GE; return TOK_COMP; } ">=" { rules_lval.val = RuleHdrTest::GE; return TOK_COMP; }
@ -116,6 +118,7 @@ dst-port return TOK_DST_PORT;
enable return TOK_ENABLE; enable return TOK_ENABLE;
eval return TOK_EVAL; eval return TOK_EVAL;
event return TOK_EVENT; event return TOK_EVENT;
file-mime return TOK_MIME;
header return TOK_HEADER; header return TOK_HEADER;
ip-options return TOK_IP_OPTIONS; ip-options return TOK_IP_OPTIONS;
ip-proto return TOK_IP_PROTO; ip-proto return TOK_IP_PROTO;
@ -129,6 +132,7 @@ src-port return TOK_SRC_PORT;
tcp-state return TOK_TCP_STATE; tcp-state return TOK_TCP_STATE;
active return TOK_ACTIVE; active return TOK_ACTIVE;
file-magic { rules_lval.val = Rule::FILE_MAGIC; return TOK_PATTERN_TYPE; }
payload { rules_lval.val = Rule::PAYLOAD; return TOK_PATTERN_TYPE; } payload { rules_lval.val = Rule::PAYLOAD; return TOK_PATTERN_TYPE; }
http-request { rules_lval.val = Rule::HTTP_REQUEST; return TOK_PATTERN_TYPE; } http-request { rules_lval.val = Rule::HTTP_REQUEST; return TOK_PATTERN_TYPE; }
http-request-body { rules_lval.val = Rule::HTTP_REQUEST_BODY; return TOK_PATTERN_TYPE; } http-request-body { rules_lval.val = Rule::HTTP_REQUEST_BODY; return TOK_PATTERN_TYPE; }

View file

@ -1,2 +1 @@
#define BRO_SCRIPT_INSTALL_PATH "@BRO_SCRIPT_INSTALL_PATH@" #define BRO_SCRIPT_INSTALL_PATH "@BRO_SCRIPT_INSTALL_PATH@"
#define BRO_MAGIC_INSTALL_PATH "@BRO_MAGIC_INSTALL_PATH@"

View file

@ -911,16 +911,6 @@ const char* bro_path()
return path; return path;
} }
const char* bro_magic_path()
{
const char* path = getenv("BROMAGIC");
if ( ! path )
path = BRO_MAGIC_INSTALL_PATH;
return path;
}
string bro_prefixes() string bro_prefixes()
{ {
string rval; string rval;
@ -1649,45 +1639,6 @@ void operator delete[](void* v)
#endif #endif
void bro_init_magic(magic_t* cookie_ptr, int flags)
{
if ( ! cookie_ptr || *cookie_ptr )
return;
*cookie_ptr = magic_open(flags);
// Always use Bro's custom magic database.
const char* database = bro_magic_path();
if ( ! *cookie_ptr )
{
const char* err = magic_error(*cookie_ptr);
reporter->InternalError("can't init libmagic: %s",
err ? err : "unknown");
}
else if ( magic_load(*cookie_ptr, database) < 0 )
{
const char* err = magic_error(*cookie_ptr);
reporter->InternalError("can't load magic file %s: %s", database,
err ? err : "unknown");
magic_close(*cookie_ptr);
*cookie_ptr = 0;
}
}
const char* bro_magic_buffer(magic_t cookie, const void* buffer, size_t length)
{
const char* rval = magic_buffer(cookie, buffer, length);
if ( ! rval )
{
const char* err = magic_error(cookie);
reporter->Error("magic_buffer error: %s", err ? err : "unknown");
}
return rval;
}
const char* canonify_name(const char* name) const char* canonify_name(const char* name)
{ {
unsigned int len = strlen(name); unsigned int len = strlen(name);

View file

@ -22,7 +22,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdarg.h> #include <stdarg.h>
#include <magic.h>
#include <libgen.h> #include <libgen.h>
#include "config.h" #include "config.h"
@ -483,12 +482,6 @@ struct CompareString
} }
}; };
extern magic_t magic_desc_cookie;
extern magic_t magic_mime_cookie;
void bro_init_magic(magic_t* cookie_ptr, int flags);
const char* bro_magic_buffer(magic_t cookie, const void* buffer, size_t length);
/** /**
* Canonicalizes a name by converting it to uppercase letters and replacing * Canonicalizes a name by converting it to uppercase letters and replacing
* all non-alphanumeric characters with an underscore. * all non-alphanumeric characters with an underscore.

View file

@ -1,4 +1,2 @@
ASCII text, with no line terminators
text/plain text/plain
PNG image data
image/png image/png

View file

@ -6,5 +6,5 @@
#open 2013-08-26-19-02-18 #open 2013-08-26-19-02-18
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types
#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string]
1333458850.375568 CjhGID4nQcgTWjvg4c 10.131.47.185 1923 79.101.110.141 80 1 GET o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com /videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1 http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11 0 56320 206 Partial Content - - - (empty) - - - - - FNJkBA1b8FSHt5N8jl binary 1333458850.375568 CjhGID4nQcgTWjvg4c 10.131.47.185 1923 79.101.110.141 80 1 GET o-o.preferred.telekomrs-beg1.v2.lscache8.c.youtube.com /videoplayback?upn=MTU2MDY5NzQ5OTM0NTI3NDY4NDc&sparams=algorithm,burst,cp,factor,id,ip,ipbits,itag,source,upn,expire&fexp=912300,907210&algorithm=throttle-factor&itag=34&ip=212.0.0.0&burst=40&sver=3&signature=832FB1042E20780CFCA77A4DB5EA64AC593E8627.D1166C7E8365732E52DAFD68076DAE0146E0AE01&source=youtube&expire=1333484980&key=yt1&ipbits=8&factor=1.25&cp=U0hSSFRTUl9NSkNOMl9MTVZKOjh5eEN2SG8tZF84&id=ebf1e932d4bd1286&cm2=1 http://s.ytimg.com/yt/swfbin/watch_as3-vflqrJwOA.swf Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko; X-SBLSP) Chrome/17.0.963.83 Safari/535.11 0 56320 206 Partial Content - - - (empty) - - - - - FNJkBA1b8FSHt5N8jl -
#close 2013-08-26-19-02-18 #close 2013-08-26-19-02-18

View file

@ -3,7 +3,7 @@
#empty_field (empty) #empty_field (empty)
#unset_field - #unset_field -
#path loaded_scripts #path loaded_scripts
#open 2014-03-04-06-37-10 #open 2014-03-03-20-45-31
#fields name #fields name
#types string #types string
scripts/base/init-bare.bro scripts/base/init-bare.bro
@ -96,6 +96,7 @@ scripts/base/init-bare.bro
build/scripts/base/bif/file_analysis.bif.bro build/scripts/base/bif/file_analysis.bif.bro
scripts/base/utils/site.bro scripts/base/utils/site.bro
scripts/base/utils/patterns.bro scripts/base/utils/patterns.bro
scripts/base/frameworks/files/magic/__load__.bro
build/scripts/base/bif/__load__.bro build/scripts/base/bif/__load__.bro
build/scripts/base/bif/bloom-filter.bif.bro build/scripts/base/bif/bloom-filter.bif.bro
build/scripts/base/bif/broxygen.bif.bro build/scripts/base/bif/broxygen.bif.bro

View file

@ -96,6 +96,7 @@ scripts/base/init-bare.bro
build/scripts/base/bif/file_analysis.bif.bro build/scripts/base/bif/file_analysis.bif.bro
scripts/base/utils/site.bro scripts/base/utils/site.bro
scripts/base/utils/patterns.bro scripts/base/utils/patterns.bro
scripts/base/frameworks/files/magic/__load__.bro
build/scripts/base/bif/__load__.bro build/scripts/base/bif/__load__.bro
build/scripts/base/bif/bloom-filter.bif.bro build/scripts/base/bif/bloom-filter.bif.bro
build/scripts/base/bif/broxygen.bif.bro build/scripts/base/bif/broxygen.bif.bro
@ -227,4 +228,4 @@ scripts/base/init-default.bro
scripts/base/misc/find-checksum-offloading.bro scripts/base/misc/find-checksum-offloading.bro
scripts/base/misc/find-filtered-trace.bro scripts/base/misc/find-filtered-trace.bro
scripts/policy/misc/loaded-scripts.bro scripts/policy/misc/loaded-scripts.bro
#close 2014-03-13-22-14-10 #close 2014-03-03-20-45-54

View file

@ -6,9 +6,9 @@
# bro -r http/bro.org.pcap file_extraction.bro # bro -r http/bro.org.pcap file_extraction.bro
Extracting file HTTP-FiIpIB2hRQSDBOSJRg.html Extracting file HTTP-FiIpIB2hRQSDBOSJRg.html
Extracting file HTTP-FMG4bMmVV64eOsCb.txt
Extracting file HTTP-FnaT2a3UDd093opCB9.txt Extracting file HTTP-FnaT2a3UDd093opCB9.txt
Extracting file HTTP-FsvATF146kf1Emc21j.txt Extracting file HTTP-FsvATF146kf1Emc21j.txt
Extracting file HTTP-FkMQHg2nBr44fc5h63.txt Extracting file HTTP-FkMQHg2nBr44fc5h63.txt
Extracting file HTTP-FfQGqj4Fhh3pH7nVQj.txt
[...] [...]

View file

@ -41,7 +41,7 @@ export {
event file_hash(f: fa_file, kind: string, hash: string) event file_hash(f: fa_file, kind: string, hash: string)
{ {
if ( kind=="sha1" && match_file_types in f$mime_type ) if ( kind == "sha1" && f?$mime_type && match_file_types in f$mime_type )
{ {
local hash_domain = fmt("%s.malware.hash.cymru.com", hash); local hash_domain = fmt("%s.malware.hash.cymru.com", hash);
when ( local MHR_result = lookup_hostname_txt(hash_domain) ) when ( local MHR_result = lookup_hostname_txt(hash_domain) )

View file

@ -4,7 +4,7 @@ detect-MHR.bro
event file_hash(f: fa_file, kind: string, hash: string) event file_hash(f: fa_file, kind: string, hash: string)
{ {
if ( kind=="sha1" && match_file_types in f$mime_type ) if ( kind == "sha1" && f?$mime_type && match_file_types in f$mime_type )
{ {
local hash_domain = fmt("%s.malware.hash.cymru.com", hash); local hash_domain = fmt("%s.malware.hash.cymru.com", hash);
when ( local MHR_result = lookup_hostname_txt(hash_domain) ) when ( local MHR_result = lookup_hostname_txt(hash_domain) )

View file

@ -16,16 +16,15 @@
#empty_field (empty) #empty_field (empty)
#unset_field - #unset_field -
#path mime_metrics #path mime_metrics
#open 2014-01-21-21-35-28 #open 2014-03-06-17-30-44
#fields ts ts_delta mtype uniq_hosts hits bytes #fields ts ts_delta mtype uniq_hosts hits bytes
#types time interval string count count count #types time interval string count count count
1389719059.311698 300.000000 text/html 1 4 53070 1389719059.311698 300.000000 text/html 1 4 53070
1389719059.311698 300.000000 image/jpeg 1 1 186859 1389719059.311698 300.000000 image/jpeg 1 1 186859
1389719059.311698 300.000000 text/troff 1 1 2957
1389719059.311698 300.000000 application/pgp-signature 1 1 836 1389719059.311698 300.000000 application/pgp-signature 1 1 836
1389719059.311698 300.000000 text/plain 1 12 114205 1389719059.311698 300.000000 text/plain 1 12 113982
1389719059.311698 300.000000 image/gif 1 1 172 1389719059.311698 300.000000 image/gif 1 1 172
1389719059.311698 300.000000 image/png 1 9 82176 1389719059.311698 300.000000 image/png 1 9 82176
1389719059.311698 300.000000 image/x-icon 1 2 2300 1389719059.311698 300.000000 image/x-icon 1 2 2300
#close 2014-01-21-21-35-28 #close 2014-03-06-17-30-44

View file

@ -10,8 +10,6 @@ total bytes: 1022920
source: HTTP source: HTTP
FILE_NEW FILE_NEW
file #1, 0, 0 file #1, 0, 0
MIME_TYPE
binary
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_TIMEOUT FILE_TIMEOUT
FILE_TIMEOUT FILE_TIMEOUT

View file

@ -3,7 +3,7 @@ file #0, 0, 0
FILE_BOF_BUFFER FILE_BOF_BUFFER
The Nationa The Nationa
MIME_TYPE MIME_TYPE
text/x-pascal text/plain
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE FILE_STATE_REMOVE
file #0, 16557, 0 file #0, 16557, 0

View file

@ -2,8 +2,6 @@ FILE_NEW
file #0, 0, 0 file #0, 0, 0
FILE_BOF_BUFFER FILE_BOF_BUFFER
test^M^J test^M^J
MIME_TYPE
text/plain
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE FILE_STATE_REMOVE
file #0, 6, 0 file #0, 6, 0
@ -16,8 +14,6 @@ FILE_NEW
file #1, 0, 0 file #1, 0, 0
FILE_BOF_BUFFER FILE_BOF_BUFFER
test2^M^J test2^M^J
MIME_TYPE
text/plain
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE FILE_STATE_REMOVE
file #1, 7, 0 file #1, 7, 0
@ -30,8 +26,6 @@ FILE_NEW
file #2, 0, 0 file #2, 0, 0
FILE_BOF_BUFFER FILE_BOF_BUFFER
test3^M^J test3^M^J
MIME_TYPE
text/plain
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE FILE_STATE_REMOVE
file #2, 7, 0 file #2, 7, 0

View file

@ -10,8 +10,6 @@ total bytes: 1022920
source: HTTP source: HTTP
FILE_NEW FILE_NEW
file #1, 0, 0 file #1, 0, 0
MIME_TYPE
binary
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_TIMEOUT FILE_TIMEOUT
FILE_STATE_REMOVE FILE_STATE_REMOVE

View file

@ -1,7 +1,5 @@
FILE_NEW FILE_NEW
file #0, 0, 0 file #0, 0, 0
MIME_TYPE
application/octet-stream
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE FILE_STATE_REMOVE

View file

@ -9,8 +9,6 @@ FILE_NEW
file #1, 0, 0 file #1, 0, 0
FILE_BOF_BUFFER FILE_BOF_BUFFER
\0\0^Ex\0\0^J\xf0\0\0^P \0\0^Ex\0\0^J\xf0\0\0^P
MIME_TYPE
binary
FILE_OVER_NEW_CONNECTION FILE_OVER_NEW_CONNECTION
FILE_STATE_REMOVE FILE_STATE_REMOVE
file #1, 124, 0 file #1, 124, 0

View file

@ -6,5 +6,5 @@
#open 2013-08-26-18-40-16 #open 2013-08-26-18-40-16
#fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer user_agent request_body_len response_body_len status_code status_msg info_code info_msg filename tags username password proxied orig_fuids orig_mime_types resp_fuids resp_mime_types
#types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string] #types time string addr port addr port count string string string string string count count count string count string string table[enum] string string table[string] vector[string] vector[string] vector[string] vector[string]
1369159408.455878 CXWv6p3arKYeMETxOg 141.142.228.5 57262 54.243.88.146 80 1 POST httpbin.org /post - curl/7.30.0 370 465 200 OK - - - (empty) - - - F2yGNX2vGXLxfZeD12,Fq4rJh2kLHKa8YC1q1,F9sKY71Rb9megdy7sg text/plain,text/plain,text/plain FjeopJ2lRk9U1CNNb5 text/plain 1369159408.455878 CXWv6p3arKYeMETxOg 141.142.228.5 57262 54.243.88.146 80 1 POST httpbin.org /post - curl/7.30.0 370 465 200 OK - - - (empty) - - - F2yGNX2vGXLxfZeD12,Fq4rJh2kLHKa8YC1q1,F9sKY71Rb9megdy7sg - FjeopJ2lRk9U1CNNb5 text/plain
#close 2013-08-26-18-40-16 #close 2013-08-26-18-40-16

View file

@ -6,11 +6,9 @@ event bro_init()
{ {
# plain text # plain text
local a = "This is a test"; local a = "This is a test";
print identify_data(a, F);
print identify_data(a, T); print identify_data(a, T);
# PNG image # PNG image
local b = "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00"; local b = "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00";
print identify_data(b, F);
print identify_data(b, T); print identify_data(b, T);
} }

View file

@ -41,7 +41,7 @@ export {
event file_hash(f: fa_file, kind: string, hash: string) event file_hash(f: fa_file, kind: string, hash: string)
{ {
if ( kind=="sha1" && match_file_types in f$mime_type ) if ( kind == "sha1" && f?$mime_type && match_file_types in f$mime_type )
{ {
local hash_domain = fmt("%s.malware.hash.cymru.com", hash); local hash_domain = fmt("%s.malware.hash.cymru.com", hash);
when ( local MHR_result = lookup_hostname_txt(hash_domain) ) when ( local MHR_result = lookup_hostname_txt(hash_domain) )

View file

@ -4,7 +4,7 @@ detect-MHR.bro
event file_hash(f: fa_file, kind: string, hash: string) event file_hash(f: fa_file, kind: string, hash: string)
{ {
if ( kind=="sha1" && match_file_types in f$mime_type ) if ( kind == "sha1" && f?$mime_type && match_file_types in f$mime_type )
{ {
local hash_domain = fmt("%s.malware.hash.cymru.com", hash); local hash_domain = fmt("%s.malware.hash.cymru.com", hash);
when ( local MHR_result = lookup_hostname_txt(hash_domain) ) when ( local MHR_result = lookup_hostname_txt(hash_domain) )